1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
25 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
45 #include "sched-int.h"
48 #include "target-def.h"
51 #include "langhooks.h"
52 #include "cfglayout.h"
54 /* This is used for communication between ASM_OUTPUT_LABEL and
55 ASM_OUTPUT_LABELREF. */
56 int ia64_asm_output_label
= 0;
58 /* Define the information needed to generate branch and scc insns. This is
59 stored from the compare operation. */
60 struct rtx_def
* ia64_compare_op0
;
61 struct rtx_def
* ia64_compare_op1
;
63 /* Register names for ia64_expand_prologue. */
64 static const char * const ia64_reg_numbers
[96] =
65 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
66 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
67 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
68 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
69 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
70 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
71 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
72 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
73 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
74 "r104","r105","r106","r107","r108","r109","r110","r111",
75 "r112","r113","r114","r115","r116","r117","r118","r119",
76 "r120","r121","r122","r123","r124","r125","r126","r127"};
78 /* ??? These strings could be shared with REGISTER_NAMES. */
79 static const char * const ia64_input_reg_names
[8] =
80 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
82 /* ??? These strings could be shared with REGISTER_NAMES. */
83 static const char * const ia64_local_reg_names
[80] =
84 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
85 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
86 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
87 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
88 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
89 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
90 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
91 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
92 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
93 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
95 /* ??? These strings could be shared with REGISTER_NAMES. */
96 static const char * const ia64_output_reg_names
[8] =
97 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
99 /* String used with the -mfixed-range= option. */
100 const char *ia64_fixed_range_string
;
102 /* Determines whether we use adds, addl, or movl to generate our
103 TLS immediate offsets. */
104 int ia64_tls_size
= 22;
106 /* String used with the -mtls-size= option. */
107 const char *ia64_tls_size_string
;
109 /* Which cpu are we scheduling for. */
110 enum processor_type ia64_tune
;
112 /* String used with the -tune= option. */
113 const char *ia64_tune_string
;
115 /* Determines whether we run our final scheduling pass or not. We always
116 avoid the normal second scheduling pass. */
117 static int ia64_flag_schedule_insns2
;
119 /* Variables which are this size or smaller are put in the sdata/sbss
122 unsigned int ia64_section_threshold
;
124 /* The following variable is used by the DFA insn scheduler. The value is
125 TRUE if we do insn bundling instead of insn scheduling. */
128 /* Structure to be filled in by ia64_compute_frame_size with register
129 save masks and offsets for the current function. */
131 struct ia64_frame_info
133 HOST_WIDE_INT total_size
; /* size of the stack frame, not including
134 the caller's scratch area. */
135 HOST_WIDE_INT spill_cfa_off
; /* top of the reg spill area from the cfa. */
136 HOST_WIDE_INT spill_size
; /* size of the gr/br/fr spill area. */
137 HOST_WIDE_INT extra_spill_size
; /* size of spill area for others. */
138 HARD_REG_SET mask
; /* mask of saved registers. */
139 unsigned int gr_used_mask
; /* mask of registers in use as gr spill
140 registers or long-term scratches. */
141 int n_spilled
; /* number of spilled registers. */
142 int reg_fp
; /* register for fp. */
143 int reg_save_b0
; /* save register for b0. */
144 int reg_save_pr
; /* save register for prs. */
145 int reg_save_ar_pfs
; /* save register for ar.pfs. */
146 int reg_save_ar_unat
; /* save register for ar.unat. */
147 int reg_save_ar_lc
; /* save register for ar.lc. */
148 int reg_save_gp
; /* save register for gp. */
149 int n_input_regs
; /* number of input registers used. */
150 int n_local_regs
; /* number of local registers used. */
151 int n_output_regs
; /* number of output registers used. */
152 int n_rotate_regs
; /* number of rotating registers used. */
154 char need_regstk
; /* true if a .regstk directive needed. */
155 char initialized
; /* true if the data is finalized. */
158 /* Current frame information calculated by ia64_compute_frame_size. */
159 static struct ia64_frame_info current_frame_info
;
161 static int ia64_use_dfa_pipeline_interface
PARAMS ((void));
162 static int ia64_first_cycle_multipass_dfa_lookahead
PARAMS ((void));
163 static void ia64_dependencies_evaluation_hook
PARAMS ((rtx
, rtx
));
164 static void ia64_init_dfa_pre_cycle_insn
PARAMS ((void));
165 static rtx ia64_dfa_pre_cycle_insn
PARAMS ((void));
166 static int ia64_first_cycle_multipass_dfa_lookahead_guard
PARAMS ((rtx
));
167 static int ia64_dfa_new_cycle
PARAMS ((FILE *, int, rtx
, int, int, int *));
168 static rtx gen_tls_get_addr
PARAMS ((void));
169 static rtx gen_thread_pointer
PARAMS ((void));
170 static rtx ia64_expand_tls_address
PARAMS ((enum tls_model
, rtx
, rtx
));
171 static int find_gr_spill
PARAMS ((int));
172 static int next_scratch_gr_reg
PARAMS ((void));
173 static void mark_reg_gr_used_mask
PARAMS ((rtx
, void *));
174 static void ia64_compute_frame_size
PARAMS ((HOST_WIDE_INT
));
175 static void setup_spill_pointers
PARAMS ((int, rtx
, HOST_WIDE_INT
));
176 static void finish_spill_pointers
PARAMS ((void));
177 static rtx spill_restore_mem
PARAMS ((rtx
, HOST_WIDE_INT
));
178 static void do_spill
PARAMS ((rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
, rtx
));
179 static void do_restore
PARAMS ((rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
));
180 static rtx gen_movdi_x
PARAMS ((rtx
, rtx
, rtx
));
181 static rtx gen_fr_spill_x
PARAMS ((rtx
, rtx
, rtx
));
182 static rtx gen_fr_restore_x
PARAMS ((rtx
, rtx
, rtx
));
184 static enum machine_mode hfa_element_mode
PARAMS ((tree
, int));
185 static bool ia64_function_ok_for_sibcall
PARAMS ((tree
, tree
));
186 static bool ia64_rtx_costs
PARAMS ((rtx
, int, int, int *));
187 static void fix_range
PARAMS ((const char *));
188 static struct machine_function
* ia64_init_machine_status
PARAMS ((void));
189 static void emit_insn_group_barriers
PARAMS ((FILE *));
190 static void emit_all_insn_group_barriers
PARAMS ((FILE *));
191 static void final_emit_insn_group_barriers
PARAMS ((FILE *));
192 static void emit_predicate_relation_info
PARAMS ((void));
193 static void ia64_reorg
PARAMS ((void));
194 static bool ia64_in_small_data_p
PARAMS ((tree
));
195 static void process_epilogue
PARAMS ((void));
196 static int process_set
PARAMS ((FILE *, rtx
));
198 static rtx ia64_expand_fetch_and_op
PARAMS ((optab
, enum machine_mode
,
200 static rtx ia64_expand_op_and_fetch
PARAMS ((optab
, enum machine_mode
,
202 static rtx ia64_expand_compare_and_swap
PARAMS ((enum machine_mode
,
205 static rtx ia64_expand_lock_test_and_set
PARAMS ((enum machine_mode
,
207 static rtx ia64_expand_lock_release
PARAMS ((enum machine_mode
, tree
, rtx
));
208 static bool ia64_assemble_integer
PARAMS ((rtx
, unsigned int, int));
209 static void ia64_output_function_prologue
PARAMS ((FILE *, HOST_WIDE_INT
));
210 static void ia64_output_function_epilogue
PARAMS ((FILE *, HOST_WIDE_INT
));
211 static void ia64_output_function_end_prologue
PARAMS ((FILE *));
213 static int ia64_issue_rate
PARAMS ((void));
214 static int ia64_adjust_cost
PARAMS ((rtx
, rtx
, rtx
, int));
215 static void ia64_sched_init
PARAMS ((FILE *, int, int));
216 static void ia64_sched_finish
PARAMS ((FILE *, int));
217 static int ia64_dfa_sched_reorder
PARAMS ((FILE *, int, rtx
*, int *,
219 static int ia64_sched_reorder
PARAMS ((FILE *, int, rtx
*, int *, int));
220 static int ia64_sched_reorder2
PARAMS ((FILE *, int, rtx
*, int *, int));
221 static int ia64_variable_issue
PARAMS ((FILE *, int, rtx
, int));
223 static struct bundle_state
*get_free_bundle_state
PARAMS ((void));
224 static void free_bundle_state
PARAMS ((struct bundle_state
*));
225 static void initiate_bundle_states
PARAMS ((void));
226 static void finish_bundle_states
PARAMS ((void));
227 static unsigned bundle_state_hash
PARAMS ((const void *));
228 static int bundle_state_eq_p
PARAMS ((const void *, const void *));
229 static int insert_bundle_state
PARAMS ((struct bundle_state
*));
230 static void initiate_bundle_state_table
PARAMS ((void));
231 static void finish_bundle_state_table
PARAMS ((void));
232 static int try_issue_nops
PARAMS ((struct bundle_state
*, int));
233 static int try_issue_insn
PARAMS ((struct bundle_state
*, rtx
));
234 static void issue_nops_and_insn
PARAMS ((struct bundle_state
*, int,
236 static int get_max_pos
PARAMS ((state_t
));
237 static int get_template
PARAMS ((state_t
, int));
239 static rtx get_next_important_insn
PARAMS ((rtx
, rtx
));
240 static void bundling
PARAMS ((FILE *, int, rtx
, rtx
));
242 static void ia64_output_mi_thunk
PARAMS ((FILE *, tree
, HOST_WIDE_INT
,
243 HOST_WIDE_INT
, tree
));
244 static void ia64_file_start
PARAMS ((void));
246 static void ia64_select_rtx_section
PARAMS ((enum machine_mode
, rtx
,
247 unsigned HOST_WIDE_INT
));
248 static void ia64_rwreloc_select_section
PARAMS ((tree
, int,
249 unsigned HOST_WIDE_INT
))
251 static void ia64_rwreloc_unique_section
PARAMS ((tree
, int))
253 static void ia64_rwreloc_select_rtx_section
PARAMS ((enum machine_mode
, rtx
,
254 unsigned HOST_WIDE_INT
))
256 static unsigned int ia64_rwreloc_section_type_flags
257 PARAMS ((tree
, const char *, int))
260 static void ia64_hpux_add_extern_decl
PARAMS ((const char *name
))
262 static void ia64_hpux_file_end
PARAMS ((void))
265 static tree
ia64_handle_model_attribute (tree
*, tree
, tree
, int, bool *);
266 static void ia64_encode_section_info (tree
, rtx
, int);
269 /* Table of valid machine attributes. */
270 static const struct attribute_spec ia64_attribute_table
[] =
272 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
273 { "syscall_linkage", 0, 0, false, true, true, NULL
},
274 { "model", 1, 1, true, false, false, ia64_handle_model_attribute
},
275 { NULL
, 0, 0, false, false, false, NULL
}
278 /* Initialize the GCC target structure. */
279 #undef TARGET_ATTRIBUTE_TABLE
280 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
282 #undef TARGET_INIT_BUILTINS
283 #define TARGET_INIT_BUILTINS ia64_init_builtins
285 #undef TARGET_EXPAND_BUILTIN
286 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
288 #undef TARGET_ASM_BYTE_OP
289 #define TARGET_ASM_BYTE_OP "\tdata1\t"
290 #undef TARGET_ASM_ALIGNED_HI_OP
291 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
292 #undef TARGET_ASM_ALIGNED_SI_OP
293 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
294 #undef TARGET_ASM_ALIGNED_DI_OP
295 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
296 #undef TARGET_ASM_UNALIGNED_HI_OP
297 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
298 #undef TARGET_ASM_UNALIGNED_SI_OP
299 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
300 #undef TARGET_ASM_UNALIGNED_DI_OP
301 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
302 #undef TARGET_ASM_INTEGER
303 #define TARGET_ASM_INTEGER ia64_assemble_integer
305 #undef TARGET_ASM_FUNCTION_PROLOGUE
306 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
307 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
308 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
309 #undef TARGET_ASM_FUNCTION_EPILOGUE
310 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
312 #undef TARGET_IN_SMALL_DATA_P
313 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
315 #undef TARGET_SCHED_ADJUST_COST
316 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
317 #undef TARGET_SCHED_ISSUE_RATE
318 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
319 #undef TARGET_SCHED_VARIABLE_ISSUE
320 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
321 #undef TARGET_SCHED_INIT
322 #define TARGET_SCHED_INIT ia64_sched_init
323 #undef TARGET_SCHED_FINISH
324 #define TARGET_SCHED_FINISH ia64_sched_finish
325 #undef TARGET_SCHED_REORDER
326 #define TARGET_SCHED_REORDER ia64_sched_reorder
327 #undef TARGET_SCHED_REORDER2
328 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
330 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
331 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
333 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
334 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE ia64_use_dfa_pipeline_interface
336 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
337 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
339 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
340 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
341 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
342 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
344 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
345 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
346 ia64_first_cycle_multipass_dfa_lookahead_guard
348 #undef TARGET_SCHED_DFA_NEW_CYCLE
349 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
352 #undef TARGET_HAVE_TLS
353 #define TARGET_HAVE_TLS true
356 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
357 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
359 #undef TARGET_ASM_OUTPUT_MI_THUNK
360 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
361 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
362 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
364 #undef TARGET_ASM_FILE_START
365 #define TARGET_ASM_FILE_START ia64_file_start
367 #undef TARGET_RTX_COSTS
368 #define TARGET_RTX_COSTS ia64_rtx_costs
369 #undef TARGET_ADDRESS_COST
370 #define TARGET_ADDRESS_COST hook_int_rtx_0
372 #undef TARGET_MACHINE_DEPENDENT_REORG
373 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
375 #undef TARGET_ENCODE_SECTION_INFO
376 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
378 struct gcc_target targetm
= TARGET_INITIALIZER
;
380 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
383 call_operand (op
, mode
)
385 enum machine_mode mode
;
387 if (mode
!= GET_MODE (op
) && mode
!= VOIDmode
)
390 return (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == REG
391 || (GET_CODE (op
) == SUBREG
&& GET_CODE (XEXP (op
, 0)) == REG
));
394 /* Return 1 if OP refers to a symbol in the sdata section. */
397 sdata_symbolic_operand (op
, mode
)
399 enum machine_mode mode ATTRIBUTE_UNUSED
;
401 switch (GET_CODE (op
))
404 if (GET_CODE (XEXP (op
, 0)) != PLUS
405 || GET_CODE (XEXP (XEXP (op
, 0), 0)) != SYMBOL_REF
)
407 op
= XEXP (XEXP (op
, 0), 0);
411 if (CONSTANT_POOL_ADDRESS_P (op
))
412 return GET_MODE_SIZE (get_pool_mode (op
)) <= ia64_section_threshold
;
414 return SYMBOL_REF_LOCAL_P (op
) && SYMBOL_REF_SMALL_P (op
);
424 small_addr_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
426 return SYMBOL_REF_SMALL_ADDR_P (op
);
429 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
432 got_symbolic_operand (op
, mode
)
434 enum machine_mode mode ATTRIBUTE_UNUSED
;
436 switch (GET_CODE (op
))
440 if (GET_CODE (op
) != PLUS
)
442 if (GET_CODE (XEXP (op
, 0)) != SYMBOL_REF
)
445 if (GET_CODE (op
) != CONST_INT
)
450 /* Ok if we're not using GOT entries at all. */
451 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
454 /* "Ok" while emitting rtl, since otherwise we won't be provided
455 with the entire offset during emission, which makes it very
456 hard to split the offset into high and low parts. */
457 if (rtx_equal_function_value_matters
)
460 /* Force the low 14 bits of the constant to zero so that we do not
461 use up so many GOT entries. */
462 return (INTVAL (op
) & 0x3fff) == 0;
465 if (SYMBOL_REF_SMALL_ADDR_P (op
))
476 /* Return 1 if OP refers to a symbol. */
479 symbolic_operand (op
, mode
)
481 enum machine_mode mode ATTRIBUTE_UNUSED
;
483 switch (GET_CODE (op
))
496 /* Return tls_model if OP refers to a TLS symbol. */
499 tls_symbolic_operand (op
, mode
)
501 enum machine_mode mode ATTRIBUTE_UNUSED
;
503 if (GET_CODE (op
) != SYMBOL_REF
)
505 return SYMBOL_REF_TLS_MODEL (op
);
509 /* Return 1 if OP refers to a function. */
512 function_operand (op
, mode
)
514 enum machine_mode mode ATTRIBUTE_UNUSED
;
516 if (GET_CODE (op
) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (op
))
522 /* Return 1 if OP is setjmp or a similar function. */
524 /* ??? This is an unsatisfying solution. Should rethink. */
527 setjmp_operand (op
, mode
)
529 enum machine_mode mode ATTRIBUTE_UNUSED
;
534 if (GET_CODE (op
) != SYMBOL_REF
)
539 /* The following code is borrowed from special_function_p in calls.c. */
541 /* Disregard prefix _, __ or __x. */
544 if (name
[1] == '_' && name
[2] == 'x')
546 else if (name
[1] == '_')
556 && (! strcmp (name
, "setjmp")
557 || ! strcmp (name
, "setjmp_syscall")))
559 && ! strcmp (name
, "sigsetjmp"))
561 && ! strcmp (name
, "savectx")));
563 else if ((name
[0] == 'q' && name
[1] == 's'
564 && ! strcmp (name
, "qsetjmp"))
565 || (name
[0] == 'v' && name
[1] == 'f'
566 && ! strcmp (name
, "vfork")))
572 /* Return 1 if OP is a general operand, excluding tls symbolic operands. */
575 move_operand (op
, mode
)
577 enum machine_mode mode
;
579 return general_operand (op
, mode
) && !tls_symbolic_operand (op
, mode
);
582 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
585 gr_register_operand (op
, mode
)
587 enum machine_mode mode
;
589 if (! register_operand (op
, mode
))
591 if (GET_CODE (op
) == SUBREG
)
592 op
= SUBREG_REG (op
);
593 if (GET_CODE (op
) == REG
)
595 unsigned int regno
= REGNO (op
);
596 if (regno
< FIRST_PSEUDO_REGISTER
)
597 return GENERAL_REGNO_P (regno
);
602 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
605 fr_register_operand (op
, mode
)
607 enum machine_mode mode
;
609 if (! register_operand (op
, mode
))
611 if (GET_CODE (op
) == SUBREG
)
612 op
= SUBREG_REG (op
);
613 if (GET_CODE (op
) == REG
)
615 unsigned int regno
= REGNO (op
);
616 if (regno
< FIRST_PSEUDO_REGISTER
)
617 return FR_REGNO_P (regno
);
622 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
625 grfr_register_operand (op
, mode
)
627 enum machine_mode mode
;
629 if (! register_operand (op
, mode
))
631 if (GET_CODE (op
) == SUBREG
)
632 op
= SUBREG_REG (op
);
633 if (GET_CODE (op
) == REG
)
635 unsigned int regno
= REGNO (op
);
636 if (regno
< FIRST_PSEUDO_REGISTER
)
637 return GENERAL_REGNO_P (regno
) || FR_REGNO_P (regno
);
642 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
645 gr_nonimmediate_operand (op
, mode
)
647 enum machine_mode mode
;
649 if (! nonimmediate_operand (op
, mode
))
651 if (GET_CODE (op
) == SUBREG
)
652 op
= SUBREG_REG (op
);
653 if (GET_CODE (op
) == REG
)
655 unsigned int regno
= REGNO (op
);
656 if (regno
< FIRST_PSEUDO_REGISTER
)
657 return GENERAL_REGNO_P (regno
);
662 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
665 fr_nonimmediate_operand (op
, mode
)
667 enum machine_mode mode
;
669 if (! nonimmediate_operand (op
, mode
))
671 if (GET_CODE (op
) == SUBREG
)
672 op
= SUBREG_REG (op
);
673 if (GET_CODE (op
) == REG
)
675 unsigned int regno
= REGNO (op
);
676 if (regno
< FIRST_PSEUDO_REGISTER
)
677 return FR_REGNO_P (regno
);
682 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
685 grfr_nonimmediate_operand (op
, mode
)
687 enum machine_mode mode
;
689 if (! nonimmediate_operand (op
, mode
))
691 if (GET_CODE (op
) == SUBREG
)
692 op
= SUBREG_REG (op
);
693 if (GET_CODE (op
) == REG
)
695 unsigned int regno
= REGNO (op
);
696 if (regno
< FIRST_PSEUDO_REGISTER
)
697 return GENERAL_REGNO_P (regno
) || FR_REGNO_P (regno
);
702 /* Return 1 if OP is a GR register operand, or zero. */
705 gr_reg_or_0_operand (op
, mode
)
707 enum machine_mode mode
;
709 return (op
== const0_rtx
|| gr_register_operand (op
, mode
));
712 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
715 gr_reg_or_5bit_operand (op
, mode
)
717 enum machine_mode mode
;
719 return ((GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 32)
720 || GET_CODE (op
) == CONSTANT_P_RTX
721 || gr_register_operand (op
, mode
));
724 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
727 gr_reg_or_6bit_operand (op
, mode
)
729 enum machine_mode mode
;
731 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_M (INTVAL (op
)))
732 || GET_CODE (op
) == CONSTANT_P_RTX
733 || gr_register_operand (op
, mode
));
736 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
739 gr_reg_or_8bit_operand (op
, mode
)
741 enum machine_mode mode
;
743 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
)))
744 || GET_CODE (op
) == CONSTANT_P_RTX
745 || gr_register_operand (op
, mode
));
748 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
751 grfr_reg_or_8bit_operand (op
, mode
)
753 enum machine_mode mode
;
755 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
)))
756 || GET_CODE (op
) == CONSTANT_P_RTX
757 || grfr_register_operand (op
, mode
));
760 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
764 gr_reg_or_8bit_adjusted_operand (op
, mode
)
766 enum machine_mode mode
;
768 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_L (INTVAL (op
)))
769 || GET_CODE (op
) == CONSTANT_P_RTX
770 || gr_register_operand (op
, mode
));
773 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
774 immediate and an 8 bit adjusted immediate operand. This is necessary
775 because when we emit a compare, we don't know what the condition will be,
776 so we need the union of the immediates accepted by GT and LT. */
779 gr_reg_or_8bit_and_adjusted_operand (op
, mode
)
781 enum machine_mode mode
;
783 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
))
784 && CONST_OK_FOR_L (INTVAL (op
)))
785 || GET_CODE (op
) == CONSTANT_P_RTX
786 || gr_register_operand (op
, mode
));
789 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
792 gr_reg_or_14bit_operand (op
, mode
)
794 enum machine_mode mode
;
796 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_I (INTVAL (op
)))
797 || GET_CODE (op
) == CONSTANT_P_RTX
798 || gr_register_operand (op
, mode
));
801 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
804 gr_reg_or_22bit_operand (op
, mode
)
806 enum machine_mode mode
;
808 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_J (INTVAL (op
)))
809 || GET_CODE (op
) == CONSTANT_P_RTX
810 || gr_register_operand (op
, mode
));
813 /* Return 1 if OP is a 6 bit immediate operand. */
816 shift_count_operand (op
, mode
)
818 enum machine_mode mode ATTRIBUTE_UNUSED
;
820 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_M (INTVAL (op
)))
821 || GET_CODE (op
) == CONSTANT_P_RTX
);
824 /* Return 1 if OP is a 5 bit immediate operand. */
827 shift_32bit_count_operand (op
, mode
)
829 enum machine_mode mode ATTRIBUTE_UNUSED
;
831 return ((GET_CODE (op
) == CONST_INT
832 && (INTVAL (op
) >= 0 && INTVAL (op
) < 32))
833 || GET_CODE (op
) == CONSTANT_P_RTX
);
836 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
839 shladd_operand (op
, mode
)
841 enum machine_mode mode ATTRIBUTE_UNUSED
;
843 return (GET_CODE (op
) == CONST_INT
844 && (INTVAL (op
) == 2 || INTVAL (op
) == 4
845 || INTVAL (op
) == 8 || INTVAL (op
) == 16));
848 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
851 fetchadd_operand (op
, mode
)
853 enum machine_mode mode ATTRIBUTE_UNUSED
;
855 return (GET_CODE (op
) == CONST_INT
856 && (INTVAL (op
) == -16 || INTVAL (op
) == -8 ||
857 INTVAL (op
) == -4 || INTVAL (op
) == -1 ||
858 INTVAL (op
) == 1 || INTVAL (op
) == 4 ||
859 INTVAL (op
) == 8 || INTVAL (op
) == 16));
862 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
865 fr_reg_or_fp01_operand (op
, mode
)
867 enum machine_mode mode
;
869 return ((GET_CODE (op
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (op
))
870 || fr_register_operand (op
, mode
));
873 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
874 POST_MODIFY with a REG as displacement. */
877 destination_operand (op
, mode
)
879 enum machine_mode mode
;
881 if (! nonimmediate_operand (op
, mode
))
883 if (GET_CODE (op
) == MEM
884 && GET_CODE (XEXP (op
, 0)) == POST_MODIFY
885 && GET_CODE (XEXP (XEXP (XEXP (op
, 0), 1), 1)) == REG
)
890 /* Like memory_operand, but don't allow post-increments. */
893 not_postinc_memory_operand (op
, mode
)
895 enum machine_mode mode
;
897 return (memory_operand (op
, mode
)
898 && GET_RTX_CLASS (GET_CODE (XEXP (op
, 0))) != 'a');
901 /* Return 1 if this is a comparison operator, which accepts a normal 8-bit
902 signed immediate operand. */
905 normal_comparison_operator (op
, mode
)
907 enum machine_mode mode
;
909 enum rtx_code code
= GET_CODE (op
);
910 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
911 && (code
== EQ
|| code
== NE
912 || code
== GT
|| code
== LE
|| code
== GTU
|| code
== LEU
));
915 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
916 signed immediate operand. */
919 adjusted_comparison_operator (op
, mode
)
921 enum machine_mode mode
;
923 enum rtx_code code
= GET_CODE (op
);
924 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
925 && (code
== LT
|| code
== GE
|| code
== LTU
|| code
== GEU
));
928 /* Return 1 if this is a signed inequality operator. */
931 signed_inequality_operator (op
, mode
)
933 enum machine_mode mode
;
935 enum rtx_code code
= GET_CODE (op
);
936 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
937 && (code
== GE
|| code
== GT
938 || code
== LE
|| code
== LT
));
941 /* Return 1 if this operator is valid for predication. */
944 predicate_operator (op
, mode
)
946 enum machine_mode mode
;
948 enum rtx_code code
= GET_CODE (op
);
949 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
950 && (code
== EQ
|| code
== NE
));
953 /* Return 1 if this operator can be used in a conditional operation. */
956 condop_operator (op
, mode
)
958 enum machine_mode mode
;
960 enum rtx_code code
= GET_CODE (op
);
961 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
962 && (code
== PLUS
|| code
== MINUS
|| code
== AND
963 || code
== IOR
|| code
== XOR
));
966 /* Return 1 if this is the ar.lc register. */
969 ar_lc_reg_operand (op
, mode
)
971 enum machine_mode mode
;
973 return (GET_MODE (op
) == DImode
974 && (mode
== DImode
|| mode
== VOIDmode
)
975 && GET_CODE (op
) == REG
976 && REGNO (op
) == AR_LC_REGNUM
);
979 /* Return 1 if this is the ar.ccv register. */
982 ar_ccv_reg_operand (op
, mode
)
984 enum machine_mode mode
;
986 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
987 && GET_CODE (op
) == REG
988 && REGNO (op
) == AR_CCV_REGNUM
);
991 /* Return 1 if this is the ar.pfs register. */
994 ar_pfs_reg_operand (op
, mode
)
996 enum machine_mode mode
;
998 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
999 && GET_CODE (op
) == REG
1000 && REGNO (op
) == AR_PFS_REGNUM
);
1003 /* Like general_operand, but don't allow (mem (addressof)). */
1006 general_tfmode_operand (op
, mode
)
1008 enum machine_mode mode
;
1010 if (! general_operand (op
, mode
))
1012 if (GET_CODE (op
) == MEM
&& GET_CODE (XEXP (op
, 0)) == ADDRESSOF
)
1020 destination_tfmode_operand (op
, mode
)
1022 enum machine_mode mode
;
1024 if (! destination_operand (op
, mode
))
1026 if (GET_CODE (op
) == MEM
&& GET_CODE (XEXP (op
, 0)) == ADDRESSOF
)
1034 tfreg_or_fp01_operand (op
, mode
)
1036 enum machine_mode mode
;
1038 if (GET_CODE (op
) == SUBREG
)
1040 return fr_reg_or_fp01_operand (op
, mode
);
1043 /* Return 1 if OP is valid as a base register in a reg + offset address. */
1046 basereg_operand (op
, mode
)
1048 enum machine_mode mode
;
1050 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
1051 checks from pa.c basereg_operand as well? Seems to be OK without them
1054 return (register_operand (op
, mode
) &&
1055 REG_POINTER ((GET_CODE (op
) == SUBREG
) ? SUBREG_REG (op
) : op
));
1060 ADDR_AREA_NORMAL
, /* normal address area */
1061 ADDR_AREA_SMALL
/* addressable by "addl" (-2MB < addr < 2MB) */
1065 static GTY(()) tree small_ident1
;
1066 static GTY(()) tree small_ident2
;
1071 if (small_ident1
== 0)
1073 small_ident1
= get_identifier ("small");
1074 small_ident2
= get_identifier ("__small__");
1078 /* Retrieve the address area that has been chosen for the given decl. */
1080 static ia64_addr_area
1081 ia64_get_addr_area (tree decl
)
1085 model_attr
= lookup_attribute ("model", DECL_ATTRIBUTES (decl
));
1091 id
= TREE_VALUE (TREE_VALUE (model_attr
));
1092 if (id
== small_ident1
|| id
== small_ident2
)
1093 return ADDR_AREA_SMALL
;
1095 return ADDR_AREA_NORMAL
;
1099 ia64_handle_model_attribute (tree
*node
, tree name
,
1101 int flags ATTRIBUTE_UNUSED
,
1104 ia64_addr_area addr_area
= ADDR_AREA_NORMAL
;
1105 ia64_addr_area area
;
1106 tree arg
, decl
= *node
;
1109 arg
= TREE_VALUE (args
);
1110 if (arg
== small_ident1
|| arg
== small_ident2
)
1112 addr_area
= ADDR_AREA_SMALL
;
1116 warning ("invalid argument of `%s' attribute",
1117 IDENTIFIER_POINTER (name
));
1118 *no_add_attrs
= true;
1121 switch (TREE_CODE (decl
))
1124 if ((DECL_CONTEXT (decl
) && TREE_CODE (DECL_CONTEXT (decl
))
1126 && !TREE_STATIC (decl
))
1128 error ("%Ha an address area attribute cannot be specified for "
1129 "local variables", &DECL_SOURCE_LOCATION (decl
), decl
);
1130 *no_add_attrs
= true;
1132 area
= ia64_get_addr_area (decl
);
1133 if (area
!= ADDR_AREA_NORMAL
&& addr_area
!= area
)
1135 error ("%Ha address area of '%s' conflicts with previous "
1136 "declaration", &DECL_SOURCE_LOCATION (decl
), decl
);
1137 *no_add_attrs
= true;
1142 error ("%Ha address area attribute cannot be specified for functions",
1143 &DECL_SOURCE_LOCATION (decl
), decl
);
1144 *no_add_attrs
= true;
1148 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
1149 *no_add_attrs
= true;
1157 ia64_encode_addr_area (tree decl
, rtx symbol
)
1161 flags
= SYMBOL_REF_FLAGS (symbol
);
1162 switch (ia64_get_addr_area (decl
))
1164 case ADDR_AREA_NORMAL
: break;
1165 case ADDR_AREA_SMALL
: flags
|= SYMBOL_FLAG_SMALL_ADDR
; break;
1168 SYMBOL_REF_FLAGS (symbol
) = flags
;
1172 ia64_encode_section_info (tree decl
, rtx rtl
, int first
)
1174 default_encode_section_info (decl
, rtl
, first
);
1176 if (TREE_CODE (decl
) == VAR_DECL
1177 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
)))
1178 ia64_encode_addr_area (decl
, XEXP (rtl
, 0));
1181 /* Return 1 if the operands of a move are ok. */
1184 ia64_move_ok (dst
, src
)
1187 /* If we're under init_recog_no_volatile, we'll not be able to use
1188 memory_operand. So check the code directly and don't worry about
1189 the validity of the underlying address, which should have been
1190 checked elsewhere anyway. */
1191 if (GET_CODE (dst
) != MEM
)
1193 if (GET_CODE (src
) == MEM
)
1195 if (register_operand (src
, VOIDmode
))
1198 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
1199 if (INTEGRAL_MODE_P (GET_MODE (dst
)))
1200 return src
== const0_rtx
;
1202 return GET_CODE (src
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (src
);
1205 /* Return 0 if we are doing C++ code. This optimization fails with
1206 C++ because of GNAT c++/6685. */
1209 addp4_optimize_ok (op1
, op2
)
1213 if (!strcmp (lang_hooks
.name
, "GNU C++"))
1216 return (basereg_operand (op1
, GET_MODE(op1
)) !=
1217 basereg_operand (op2
, GET_MODE(op2
)));
1220 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
1221 Return the length of the field, or <= 0 on failure. */
1224 ia64_depz_field_mask (rop
, rshift
)
1227 unsigned HOST_WIDE_INT op
= INTVAL (rop
);
1228 unsigned HOST_WIDE_INT shift
= INTVAL (rshift
);
1230 /* Get rid of the zero bits we're shifting in. */
1233 /* We must now have a solid block of 1's at bit 0. */
1234 return exact_log2 (op
+ 1);
1237 /* Expand a symbolic constant load. */
1240 ia64_expand_load_address (dest
, src
)
1243 if (tls_symbolic_operand (src
, VOIDmode
))
1245 if (GET_CODE (dest
) != REG
)
1248 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1249 having to pointer-extend the value afterward. Other forms of address
1250 computation below are also more natural to compute as 64-bit quantities.
1251 If we've been given an SImode destination register, change it. */
1252 if (GET_MODE (dest
) != Pmode
)
1253 dest
= gen_rtx_REG (Pmode
, REGNO (dest
));
1255 if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_SMALL_ADDR_P (src
))
1257 emit_insn (gen_rtx_SET (VOIDmode
, dest
, src
));
1260 else if (TARGET_AUTO_PIC
)
1262 emit_insn (gen_load_gprel64 (dest
, src
));
1265 else if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (src
))
1267 emit_insn (gen_load_fptr (dest
, src
));
1270 else if (sdata_symbolic_operand (src
, VOIDmode
))
1272 emit_insn (gen_load_gprel (dest
, src
));
1276 if (GET_CODE (src
) == CONST
1277 && GET_CODE (XEXP (src
, 0)) == PLUS
1278 && GET_CODE (XEXP (XEXP (src
, 0), 1)) == CONST_INT
1279 && (INTVAL (XEXP (XEXP (src
, 0), 1)) & 0x1fff) != 0)
1281 rtx sym
= XEXP (XEXP (src
, 0), 0);
1282 HOST_WIDE_INT ofs
, hi
, lo
;
1284 /* Split the offset into a sign extended 14-bit low part
1285 and a complementary high part. */
1286 ofs
= INTVAL (XEXP (XEXP (src
, 0), 1));
1287 lo
= ((ofs
& 0x3fff) ^ 0x2000) - 0x2000;
1290 ia64_expand_load_address (dest
, plus_constant (sym
, hi
));
1291 emit_insn (gen_adddi3 (dest
, dest
, GEN_INT (lo
)));
1297 tmp
= gen_rtx_HIGH (Pmode
, src
);
1298 tmp
= gen_rtx_PLUS (Pmode
, tmp
, pic_offset_table_rtx
);
1299 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
1301 tmp
= gen_rtx_LO_SUM (GET_MODE (dest
), dest
, src
);
1302 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
1306 static GTY(()) rtx gen_tls_tga
;
1311 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
1315 static GTY(()) rtx thread_pointer_rtx
;
1317 gen_thread_pointer ()
1319 if (!thread_pointer_rtx
)
1321 thread_pointer_rtx
= gen_rtx_REG (Pmode
, 13);
1322 RTX_UNCHANGING_P (thread_pointer_rtx
) = 1;
1324 return thread_pointer_rtx
;
1328 ia64_expand_tls_address (tls_kind
, op0
, op1
)
1329 enum tls_model tls_kind
;
1332 rtx tga_op1
, tga_op2
, tga_ret
, tga_eqv
, tmp
, insns
;
1336 case TLS_MODEL_GLOBAL_DYNAMIC
:
1339 tga_op1
= gen_reg_rtx (Pmode
);
1340 emit_insn (gen_load_ltoff_dtpmod (tga_op1
, op1
));
1341 tga_op1
= gen_rtx_MEM (Pmode
, tga_op1
);
1342 RTX_UNCHANGING_P (tga_op1
) = 1;
1344 tga_op2
= gen_reg_rtx (Pmode
);
1345 emit_insn (gen_load_ltoff_dtprel (tga_op2
, op1
));
1346 tga_op2
= gen_rtx_MEM (Pmode
, tga_op2
);
1347 RTX_UNCHANGING_P (tga_op2
) = 1;
1349 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
1350 LCT_CONST
, Pmode
, 2, tga_op1
,
1351 Pmode
, tga_op2
, Pmode
);
1353 insns
= get_insns ();
1356 emit_libcall_block (insns
, op0
, tga_ret
, op1
);
1359 case TLS_MODEL_LOCAL_DYNAMIC
:
1360 /* ??? This isn't the completely proper way to do local-dynamic
1361 If the call to __tls_get_addr is used only by a single symbol,
1362 then we should (somehow) move the dtprel to the second arg
1363 to avoid the extra add. */
1366 tga_op1
= gen_reg_rtx (Pmode
);
1367 emit_insn (gen_load_ltoff_dtpmod (tga_op1
, op1
));
1368 tga_op1
= gen_rtx_MEM (Pmode
, tga_op1
);
1369 RTX_UNCHANGING_P (tga_op1
) = 1;
1371 tga_op2
= const0_rtx
;
1373 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
1374 LCT_CONST
, Pmode
, 2, tga_op1
,
1375 Pmode
, tga_op2
, Pmode
);
1377 insns
= get_insns ();
1380 tga_eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
1382 tmp
= gen_reg_rtx (Pmode
);
1383 emit_libcall_block (insns
, tmp
, tga_ret
, tga_eqv
);
1385 if (register_operand (op0
, Pmode
))
1388 tga_ret
= gen_reg_rtx (Pmode
);
1391 emit_insn (gen_load_dtprel (tga_ret
, op1
));
1392 emit_insn (gen_adddi3 (tga_ret
, tmp
, tga_ret
));
1395 emit_insn (gen_add_dtprel (tga_ret
, tmp
, op1
));
1397 return (tga_ret
== op0
? NULL_RTX
: tga_ret
);
1399 case TLS_MODEL_INITIAL_EXEC
:
1400 tmp
= gen_reg_rtx (Pmode
);
1401 emit_insn (gen_load_ltoff_tprel (tmp
, op1
));
1402 tmp
= gen_rtx_MEM (Pmode
, tmp
);
1403 RTX_UNCHANGING_P (tmp
) = 1;
1404 tmp
= force_reg (Pmode
, tmp
);
1406 if (register_operand (op0
, Pmode
))
1409 op1
= gen_reg_rtx (Pmode
);
1410 emit_insn (gen_adddi3 (op1
, tmp
, gen_thread_pointer ()));
1412 return (op1
== op0
? NULL_RTX
: op1
);
1414 case TLS_MODEL_LOCAL_EXEC
:
1415 if (register_operand (op0
, Pmode
))
1418 tmp
= gen_reg_rtx (Pmode
);
1421 emit_insn (gen_load_tprel (tmp
, op1
));
1422 emit_insn (gen_adddi3 (tmp
, gen_thread_pointer (), tmp
));
1425 emit_insn (gen_add_tprel (tmp
, gen_thread_pointer (), op1
));
1427 return (tmp
== op0
? NULL_RTX
: tmp
);
1435 ia64_expand_move (op0
, op1
)
1438 enum machine_mode mode
= GET_MODE (op0
);
1440 if (!reload_in_progress
&& !reload_completed
&& !ia64_move_ok (op0
, op1
))
1441 op1
= force_reg (mode
, op1
);
1443 if ((mode
== Pmode
|| mode
== ptr_mode
) && symbolic_operand (op1
, VOIDmode
))
1445 enum tls_model tls_kind
;
1446 if ((tls_kind
= tls_symbolic_operand (op1
, VOIDmode
)))
1447 return ia64_expand_tls_address (tls_kind
, op0
, op1
);
1449 if (!TARGET_NO_PIC
&& reload_completed
)
1451 ia64_expand_load_address (op0
, op1
);
1459 /* Split a move from OP1 to OP0 conditional on COND. */
1462 ia64_emit_cond_move (op0
, op1
, cond
)
1465 rtx insn
, first
= get_last_insn ();
1467 emit_move_insn (op0
, op1
);
1469 for (insn
= get_last_insn (); insn
!= first
; insn
= PREV_INSN (insn
))
1471 PATTERN (insn
) = gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
),
1475 /* Split a post-reload TImode reference into two DImode components. */
1478 ia64_split_timode (out
, in
, scratch
)
1482 switch (GET_CODE (in
))
1485 out
[0] = gen_rtx_REG (DImode
, REGNO (in
));
1486 out
[1] = gen_rtx_REG (DImode
, REGNO (in
) + 1);
1491 rtx base
= XEXP (in
, 0);
1493 switch (GET_CODE (base
))
1496 out
[0] = adjust_address (in
, DImode
, 0);
1499 base
= XEXP (base
, 0);
1500 out
[0] = adjust_address (in
, DImode
, 0);
1503 /* Since we're changing the mode, we need to change to POST_MODIFY
1504 as well to preserve the size of the increment. Either that or
1505 do the update in two steps, but we've already got this scratch
1506 register handy so let's use it. */
1508 base
= XEXP (base
, 0);
1510 = change_address (in
, DImode
,
1512 (Pmode
, base
, plus_constant (base
, 16)));
1515 base
= XEXP (base
, 0);
1517 = change_address (in
, DImode
,
1519 (Pmode
, base
, plus_constant (base
, -16)));
1525 if (scratch
== NULL_RTX
)
1527 out
[1] = change_address (in
, DImode
, scratch
);
1528 return gen_adddi3 (scratch
, base
, GEN_INT (8));
1533 split_double (in
, &out
[0], &out
[1]);
1541 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1542 through memory plus an extra GR scratch register. Except that you can
1543 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1544 SECONDARY_RELOAD_CLASS, but not both.
1546 We got into problems in the first place by allowing a construct like
1547 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1548 This solution attempts to prevent this situation from occurring. When
1549 we see something like the above, we spill the inner register to memory. */
1552 spill_tfmode_operand (in
, force
)
1556 if (GET_CODE (in
) == SUBREG
1557 && GET_MODE (SUBREG_REG (in
)) == TImode
1558 && GET_CODE (SUBREG_REG (in
)) == REG
)
1560 rtx mem
= gen_mem_addressof (SUBREG_REG (in
), NULL_TREE
, /*rescan=*/true);
1561 return gen_rtx_MEM (TFmode
, copy_to_reg (XEXP (mem
, 0)));
1563 else if (force
&& GET_CODE (in
) == REG
)
1565 rtx mem
= gen_mem_addressof (in
, NULL_TREE
, /*rescan=*/true);
1566 return gen_rtx_MEM (TFmode
, copy_to_reg (XEXP (mem
, 0)));
1568 else if (GET_CODE (in
) == MEM
1569 && GET_CODE (XEXP (in
, 0)) == ADDRESSOF
)
1570 return change_address (in
, TFmode
, copy_to_reg (XEXP (in
, 0)));
1575 /* Emit comparison instruction if necessary, returning the expression
1576 that holds the compare result in the proper mode. */
1579 ia64_expand_compare (code
, mode
)
1581 enum machine_mode mode
;
1583 rtx op0
= ia64_compare_op0
, op1
= ia64_compare_op1
;
1586 /* If we have a BImode input, then we already have a compare result, and
1587 do not need to emit another comparison. */
1588 if (GET_MODE (op0
) == BImode
)
1590 if ((code
== NE
|| code
== EQ
) && op1
== const0_rtx
)
1597 cmp
= gen_reg_rtx (BImode
);
1598 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1599 gen_rtx_fmt_ee (code
, BImode
, op0
, op1
)));
1603 return gen_rtx_fmt_ee (code
, mode
, cmp
, const0_rtx
);
1606 /* Emit the appropriate sequence for a call. */
1609 ia64_expand_call (retval
, addr
, nextarg
, sibcall_p
)
1612 rtx nextarg ATTRIBUTE_UNUSED
;
1617 addr
= XEXP (addr
, 0);
1618 b0
= gen_rtx_REG (DImode
, R_BR (0));
1620 /* ??? Should do this for functions known to bind local too. */
1621 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
1624 insn
= gen_sibcall_nogp (addr
);
1626 insn
= gen_call_nogp (addr
, b0
);
1628 insn
= gen_call_value_nogp (retval
, addr
, b0
);
1629 insn
= emit_call_insn (insn
);
1634 insn
= gen_sibcall_gp (addr
);
1636 insn
= gen_call_gp (addr
, b0
);
1638 insn
= gen_call_value_gp (retval
, addr
, b0
);
1639 insn
= emit_call_insn (insn
);
1641 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), pic_offset_table_rtx
);
1645 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), b0
);
1653 if (current_frame_info
.reg_save_gp
)
1654 tmp
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_gp
);
1657 HOST_WIDE_INT offset
;
1659 offset
= (current_frame_info
.spill_cfa_off
1660 + current_frame_info
.spill_size
);
1661 if (frame_pointer_needed
)
1663 tmp
= hard_frame_pointer_rtx
;
1668 tmp
= stack_pointer_rtx
;
1669 offset
= current_frame_info
.total_size
- offset
;
1672 if (CONST_OK_FOR_I (offset
))
1673 emit_insn (gen_adddi3 (pic_offset_table_rtx
,
1674 tmp
, GEN_INT (offset
)));
1677 emit_move_insn (pic_offset_table_rtx
, GEN_INT (offset
));
1678 emit_insn (gen_adddi3 (pic_offset_table_rtx
,
1679 pic_offset_table_rtx
, tmp
));
1682 tmp
= gen_rtx_MEM (DImode
, pic_offset_table_rtx
);
1685 emit_move_insn (pic_offset_table_rtx
, tmp
);
1689 ia64_split_call (retval
, addr
, retaddr
, scratch_r
, scratch_b
,
1690 noreturn_p
, sibcall_p
)
1691 rtx retval
, addr
, retaddr
, scratch_r
, scratch_b
;
1692 int noreturn_p
, sibcall_p
;
1695 bool is_desc
= false;
1697 /* If we find we're calling through a register, then we're actually
1698 calling through a descriptor, so load up the values. */
1699 if (REG_P (addr
) && GR_REGNO_P (REGNO (addr
)))
1704 /* ??? We are currently constrained to *not* use peep2, because
1705 we can legitimately change the global lifetime of the GP
1706 (in the form of killing where previously live). This is
1707 because a call through a descriptor doesn't use the previous
1708 value of the GP, while a direct call does, and we do not
1709 commit to either form until the split here.
1711 That said, this means that we lack precise life info for
1712 whether ADDR is dead after this call. This is not terribly
1713 important, since we can fix things up essentially for free
1714 with the POST_DEC below, but it's nice to not use it when we
1715 can immediately tell it's not necessary. */
1716 addr_dead_p
= ((noreturn_p
|| sibcall_p
1717 || TEST_HARD_REG_BIT (regs_invalidated_by_call
,
1719 && !FUNCTION_ARG_REGNO_P (REGNO (addr
)));
1721 /* Load the code address into scratch_b. */
1722 tmp
= gen_rtx_POST_INC (Pmode
, addr
);
1723 tmp
= gen_rtx_MEM (Pmode
, tmp
);
1724 emit_move_insn (scratch_r
, tmp
);
1725 emit_move_insn (scratch_b
, scratch_r
);
1727 /* Load the GP address. If ADDR is not dead here, then we must
1728 revert the change made above via the POST_INCREMENT. */
1730 tmp
= gen_rtx_POST_DEC (Pmode
, addr
);
1733 tmp
= gen_rtx_MEM (Pmode
, tmp
);
1734 emit_move_insn (pic_offset_table_rtx
, tmp
);
1741 insn
= gen_sibcall_nogp (addr
);
1743 insn
= gen_call_value_nogp (retval
, addr
, retaddr
);
1745 insn
= gen_call_nogp (addr
, retaddr
);
1746 emit_call_insn (insn
);
1748 if ((!TARGET_CONST_GP
|| is_desc
) && !noreturn_p
&& !sibcall_p
)
1752 /* Begin the assembly file. */
1757 default_file_start ();
1758 emit_safe_across_calls ();
1762 emit_safe_across_calls ()
1764 unsigned int rs
, re
;
1771 while (rs
< 64 && call_used_regs
[PR_REG (rs
)])
1775 for (re
= rs
+ 1; re
< 64 && ! call_used_regs
[PR_REG (re
)]; re
++)
1779 fputs ("\t.pred.safe_across_calls ", asm_out_file
);
1783 fputc (',', asm_out_file
);
1785 fprintf (asm_out_file
, "p%u", rs
);
1787 fprintf (asm_out_file
, "p%u-p%u", rs
, re
- 1);
1791 fputc ('\n', asm_out_file
);
1794 /* Helper function for ia64_compute_frame_size: find an appropriate general
1795 register to spill some special register to. SPECIAL_SPILL_MASK contains
1796 bits in GR0 to GR31 that have already been allocated by this routine.
1797 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1800 find_gr_spill (try_locals
)
1805 /* If this is a leaf function, first try an otherwise unused
1806 call-clobbered register. */
1807 if (current_function_is_leaf
)
1809 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1810 if (! regs_ever_live
[regno
]
1811 && call_used_regs
[regno
]
1812 && ! fixed_regs
[regno
]
1813 && ! global_regs
[regno
]
1814 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1816 current_frame_info
.gr_used_mask
|= 1 << regno
;
1823 regno
= current_frame_info
.n_local_regs
;
1824 /* If there is a frame pointer, then we can't use loc79, because
1825 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1826 reg_name switching code in ia64_expand_prologue. */
1827 if (regno
< (80 - frame_pointer_needed
))
1829 current_frame_info
.n_local_regs
= regno
+ 1;
1830 return LOC_REG (0) + regno
;
1834 /* Failed to find a general register to spill to. Must use stack. */
1838 /* In order to make for nice schedules, we try to allocate every temporary
1839 to a different register. We must of course stay away from call-saved,
1840 fixed, and global registers. We must also stay away from registers
1841 allocated in current_frame_info.gr_used_mask, since those include regs
1842 used all through the prologue.
1844 Any register allocated here must be used immediately. The idea is to
1845 aid scheduling, not to solve data flow problems. */
1847 static int last_scratch_gr_reg
;
1850 next_scratch_gr_reg ()
1854 for (i
= 0; i
< 32; ++i
)
1856 regno
= (last_scratch_gr_reg
+ i
+ 1) & 31;
1857 if (call_used_regs
[regno
]
1858 && ! fixed_regs
[regno
]
1859 && ! global_regs
[regno
]
1860 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1862 last_scratch_gr_reg
= regno
;
1867 /* There must be _something_ available. */
1871 /* Helper function for ia64_compute_frame_size, called through
1872 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1875 mark_reg_gr_used_mask (reg
, data
)
1877 void *data ATTRIBUTE_UNUSED
;
1879 unsigned int regno
= REGNO (reg
);
1882 unsigned int i
, n
= HARD_REGNO_NREGS (regno
, GET_MODE (reg
));
1883 for (i
= 0; i
< n
; ++i
)
1884 current_frame_info
.gr_used_mask
|= 1 << (regno
+ i
);
1888 /* Returns the number of bytes offset between the frame pointer and the stack
1889 pointer for the current function. SIZE is the number of bytes of space
1890 needed for local variables. */
1893 ia64_compute_frame_size (size
)
1896 HOST_WIDE_INT total_size
;
1897 HOST_WIDE_INT spill_size
= 0;
1898 HOST_WIDE_INT extra_spill_size
= 0;
1899 HOST_WIDE_INT pretend_args_size
;
1902 int spilled_gr_p
= 0;
1903 int spilled_fr_p
= 0;
1907 if (current_frame_info
.initialized
)
1910 memset (¤t_frame_info
, 0, sizeof current_frame_info
);
1911 CLEAR_HARD_REG_SET (mask
);
1913 /* Don't allocate scratches to the return register. */
1914 diddle_return_value (mark_reg_gr_used_mask
, NULL
);
1916 /* Don't allocate scratches to the EH scratch registers. */
1917 if (cfun
->machine
->ia64_eh_epilogue_sp
)
1918 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_sp
, NULL
);
1919 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
1920 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_bsp
, NULL
);
1922 /* Find the size of the register stack frame. We have only 80 local
1923 registers, because we reserve 8 for the inputs and 8 for the
1926 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1927 since we'll be adjusting that down later. */
1928 regno
= LOC_REG (78) + ! frame_pointer_needed
;
1929 for (; regno
>= LOC_REG (0); regno
--)
1930 if (regs_ever_live
[regno
])
1932 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
1934 /* For functions marked with the syscall_linkage attribute, we must mark
1935 all eight input registers as in use, so that locals aren't visible to
1938 if (cfun
->machine
->n_varargs
> 0
1939 || lookup_attribute ("syscall_linkage",
1940 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
1941 current_frame_info
.n_input_regs
= 8;
1944 for (regno
= IN_REG (7); regno
>= IN_REG (0); regno
--)
1945 if (regs_ever_live
[regno
])
1947 current_frame_info
.n_input_regs
= regno
- IN_REG (0) + 1;
1950 for (regno
= OUT_REG (7); regno
>= OUT_REG (0); regno
--)
1951 if (regs_ever_live
[regno
])
1953 i
= regno
- OUT_REG (0) + 1;
1955 /* When -p profiling, we need one output register for the mcount argument.
1956 Likewise for -a profiling for the bb_init_func argument. For -ax
1957 profiling, we need two output registers for the two bb_init_trace_func
1959 if (current_function_profile
)
1961 current_frame_info
.n_output_regs
= i
;
1963 /* ??? No rotating register support yet. */
1964 current_frame_info
.n_rotate_regs
= 0;
1966 /* Discover which registers need spilling, and how much room that
1967 will take. Begin with floating point and general registers,
1968 which will always wind up on the stack. */
1970 for (regno
= FR_REG (2); regno
<= FR_REG (127); regno
++)
1971 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1973 SET_HARD_REG_BIT (mask
, regno
);
1979 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1980 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1982 SET_HARD_REG_BIT (mask
, regno
);
1988 for (regno
= BR_REG (1); regno
<= BR_REG (7); regno
++)
1989 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1991 SET_HARD_REG_BIT (mask
, regno
);
1996 /* Now come all special registers that might get saved in other
1997 general registers. */
1999 if (frame_pointer_needed
)
2001 current_frame_info
.reg_fp
= find_gr_spill (1);
2002 /* If we did not get a register, then we take LOC79. This is guaranteed
2003 to be free, even if regs_ever_live is already set, because this is
2004 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2005 as we don't count loc79 above. */
2006 if (current_frame_info
.reg_fp
== 0)
2008 current_frame_info
.reg_fp
= LOC_REG (79);
2009 current_frame_info
.n_local_regs
++;
2013 if (! current_function_is_leaf
)
2015 /* Emit a save of BR0 if we call other functions. Do this even
2016 if this function doesn't return, as EH depends on this to be
2017 able to unwind the stack. */
2018 SET_HARD_REG_BIT (mask
, BR_REG (0));
2020 current_frame_info
.reg_save_b0
= find_gr_spill (1);
2021 if (current_frame_info
.reg_save_b0
== 0)
2027 /* Similarly for ar.pfs. */
2028 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
2029 current_frame_info
.reg_save_ar_pfs
= find_gr_spill (1);
2030 if (current_frame_info
.reg_save_ar_pfs
== 0)
2032 extra_spill_size
+= 8;
2036 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2037 registers are clobbered, so we fall back to the stack. */
2038 current_frame_info
.reg_save_gp
2039 = (current_function_calls_setjmp
? 0 : find_gr_spill (1));
2040 if (current_frame_info
.reg_save_gp
== 0)
2042 SET_HARD_REG_BIT (mask
, GR_REG (1));
2049 if (regs_ever_live
[BR_REG (0)] && ! call_used_regs
[BR_REG (0)])
2051 SET_HARD_REG_BIT (mask
, BR_REG (0));
2056 if (regs_ever_live
[AR_PFS_REGNUM
])
2058 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
2059 current_frame_info
.reg_save_ar_pfs
= find_gr_spill (1);
2060 if (current_frame_info
.reg_save_ar_pfs
== 0)
2062 extra_spill_size
+= 8;
2068 /* Unwind descriptor hackery: things are most efficient if we allocate
2069 consecutive GR save registers for RP, PFS, FP in that order. However,
2070 it is absolutely critical that FP get the only hard register that's
2071 guaranteed to be free, so we allocated it first. If all three did
2072 happen to be allocated hard regs, and are consecutive, rearrange them
2073 into the preferred order now. */
2074 if (current_frame_info
.reg_fp
!= 0
2075 && current_frame_info
.reg_save_b0
== current_frame_info
.reg_fp
+ 1
2076 && current_frame_info
.reg_save_ar_pfs
== current_frame_info
.reg_fp
+ 2)
2078 current_frame_info
.reg_save_b0
= current_frame_info
.reg_fp
;
2079 current_frame_info
.reg_save_ar_pfs
= current_frame_info
.reg_fp
+ 1;
2080 current_frame_info
.reg_fp
= current_frame_info
.reg_fp
+ 2;
2083 /* See if we need to store the predicate register block. */
2084 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
2085 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
2087 if (regno
<= PR_REG (63))
2089 SET_HARD_REG_BIT (mask
, PR_REG (0));
2090 current_frame_info
.reg_save_pr
= find_gr_spill (1);
2091 if (current_frame_info
.reg_save_pr
== 0)
2093 extra_spill_size
+= 8;
2097 /* ??? Mark them all as used so that register renaming and such
2098 are free to use them. */
2099 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
2100 regs_ever_live
[regno
] = 1;
2103 /* If we're forced to use st8.spill, we're forced to save and restore
2104 ar.unat as well. The check for existing liveness allows inline asm
2105 to touch ar.unat. */
2106 if (spilled_gr_p
|| cfun
->machine
->n_varargs
2107 || regs_ever_live
[AR_UNAT_REGNUM
])
2109 regs_ever_live
[AR_UNAT_REGNUM
] = 1;
2110 SET_HARD_REG_BIT (mask
, AR_UNAT_REGNUM
);
2111 current_frame_info
.reg_save_ar_unat
= find_gr_spill (spill_size
== 0);
2112 if (current_frame_info
.reg_save_ar_unat
== 0)
2114 extra_spill_size
+= 8;
2119 if (regs_ever_live
[AR_LC_REGNUM
])
2121 SET_HARD_REG_BIT (mask
, AR_LC_REGNUM
);
2122 current_frame_info
.reg_save_ar_lc
= find_gr_spill (spill_size
== 0);
2123 if (current_frame_info
.reg_save_ar_lc
== 0)
2125 extra_spill_size
+= 8;
2130 /* If we have an odd number of words of pretend arguments written to
2131 the stack, then the FR save area will be unaligned. We round the
2132 size of this area up to keep things 16 byte aligned. */
2134 pretend_args_size
= IA64_STACK_ALIGN (current_function_pretend_args_size
);
2136 pretend_args_size
= current_function_pretend_args_size
;
2138 total_size
= (spill_size
+ extra_spill_size
+ size
+ pretend_args_size
2139 + current_function_outgoing_args_size
);
2140 total_size
= IA64_STACK_ALIGN (total_size
);
2142 /* We always use the 16-byte scratch area provided by the caller, but
2143 if we are a leaf function, there's no one to which we need to provide
2145 if (current_function_is_leaf
)
2146 total_size
= MAX (0, total_size
- 16);
2148 current_frame_info
.total_size
= total_size
;
2149 current_frame_info
.spill_cfa_off
= pretend_args_size
- 16;
2150 current_frame_info
.spill_size
= spill_size
;
2151 current_frame_info
.extra_spill_size
= extra_spill_size
;
2152 COPY_HARD_REG_SET (current_frame_info
.mask
, mask
);
2153 current_frame_info
.n_spilled
= n_spilled
;
2154 current_frame_info
.initialized
= reload_completed
;
2157 /* Compute the initial difference between the specified pair of registers. */
2160 ia64_initial_elimination_offset (from
, to
)
2163 HOST_WIDE_INT offset
;
2165 ia64_compute_frame_size (get_frame_size ());
2168 case FRAME_POINTER_REGNUM
:
2169 if (to
== HARD_FRAME_POINTER_REGNUM
)
2171 if (current_function_is_leaf
)
2172 offset
= -current_frame_info
.total_size
;
2174 offset
= -(current_frame_info
.total_size
2175 - current_function_outgoing_args_size
- 16);
2177 else if (to
== STACK_POINTER_REGNUM
)
2179 if (current_function_is_leaf
)
2182 offset
= 16 + current_function_outgoing_args_size
;
2188 case ARG_POINTER_REGNUM
:
2189 /* Arguments start above the 16 byte save area, unless stdarg
2190 in which case we store through the 16 byte save area. */
2191 if (to
== HARD_FRAME_POINTER_REGNUM
)
2192 offset
= 16 - current_function_pretend_args_size
;
2193 else if (to
== STACK_POINTER_REGNUM
)
2194 offset
= (current_frame_info
.total_size
2195 + 16 - current_function_pretend_args_size
);
2207 /* If there are more than a trivial number of register spills, we use
2208 two interleaved iterators so that we can get two memory references
2211 In order to simplify things in the prologue and epilogue expanders,
2212 we use helper functions to fix up the memory references after the
2213 fact with the appropriate offsets to a POST_MODIFY memory mode.
2214 The following data structure tracks the state of the two iterators
2215 while insns are being emitted. */
2217 struct spill_fill_data
2219 rtx init_after
; /* point at which to emit initializations */
2220 rtx init_reg
[2]; /* initial base register */
2221 rtx iter_reg
[2]; /* the iterator registers */
2222 rtx
*prev_addr
[2]; /* address of last memory use */
2223 rtx prev_insn
[2]; /* the insn corresponding to prev_addr */
2224 HOST_WIDE_INT prev_off
[2]; /* last offset */
2225 int n_iter
; /* number of iterators in use */
2226 int next_iter
; /* next iterator to use */
2227 unsigned int save_gr_used_mask
;
2230 static struct spill_fill_data spill_fill_data
;
2233 setup_spill_pointers (n_spills
, init_reg
, cfa_off
)
2236 HOST_WIDE_INT cfa_off
;
2240 spill_fill_data
.init_after
= get_last_insn ();
2241 spill_fill_data
.init_reg
[0] = init_reg
;
2242 spill_fill_data
.init_reg
[1] = init_reg
;
2243 spill_fill_data
.prev_addr
[0] = NULL
;
2244 spill_fill_data
.prev_addr
[1] = NULL
;
2245 spill_fill_data
.prev_insn
[0] = NULL
;
2246 spill_fill_data
.prev_insn
[1] = NULL
;
2247 spill_fill_data
.prev_off
[0] = cfa_off
;
2248 spill_fill_data
.prev_off
[1] = cfa_off
;
2249 spill_fill_data
.next_iter
= 0;
2250 spill_fill_data
.save_gr_used_mask
= current_frame_info
.gr_used_mask
;
2252 spill_fill_data
.n_iter
= 1 + (n_spills
> 2);
2253 for (i
= 0; i
< spill_fill_data
.n_iter
; ++i
)
2255 int regno
= next_scratch_gr_reg ();
2256 spill_fill_data
.iter_reg
[i
] = gen_rtx_REG (DImode
, regno
);
2257 current_frame_info
.gr_used_mask
|= 1 << regno
;
2262 finish_spill_pointers ()
2264 current_frame_info
.gr_used_mask
= spill_fill_data
.save_gr_used_mask
;
2268 spill_restore_mem (reg
, cfa_off
)
2270 HOST_WIDE_INT cfa_off
;
2272 int iter
= spill_fill_data
.next_iter
;
2273 HOST_WIDE_INT disp
= spill_fill_data
.prev_off
[iter
] - cfa_off
;
2274 rtx disp_rtx
= GEN_INT (disp
);
2277 if (spill_fill_data
.prev_addr
[iter
])
2279 if (CONST_OK_FOR_N (disp
))
2281 *spill_fill_data
.prev_addr
[iter
]
2282 = gen_rtx_POST_MODIFY (DImode
, spill_fill_data
.iter_reg
[iter
],
2283 gen_rtx_PLUS (DImode
,
2284 spill_fill_data
.iter_reg
[iter
],
2286 REG_NOTES (spill_fill_data
.prev_insn
[iter
])
2287 = gen_rtx_EXPR_LIST (REG_INC
, spill_fill_data
.iter_reg
[iter
],
2288 REG_NOTES (spill_fill_data
.prev_insn
[iter
]));
2292 /* ??? Could use register post_modify for loads. */
2293 if (! CONST_OK_FOR_I (disp
))
2295 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
2296 emit_move_insn (tmp
, disp_rtx
);
2299 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
2300 spill_fill_data
.iter_reg
[iter
], disp_rtx
));
2303 /* Micro-optimization: if we've created a frame pointer, it's at
2304 CFA 0, which may allow the real iterator to be initialized lower,
2305 slightly increasing parallelism. Also, if there are few saves
2306 it may eliminate the iterator entirely. */
2308 && spill_fill_data
.init_reg
[iter
] == stack_pointer_rtx
2309 && frame_pointer_needed
)
2311 mem
= gen_rtx_MEM (GET_MODE (reg
), hard_frame_pointer_rtx
);
2312 set_mem_alias_set (mem
, get_varargs_alias_set ());
2320 seq
= gen_movdi (spill_fill_data
.iter_reg
[iter
],
2321 spill_fill_data
.init_reg
[iter
]);
2326 if (! CONST_OK_FOR_I (disp
))
2328 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
2329 emit_move_insn (tmp
, disp_rtx
);
2333 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
2334 spill_fill_data
.init_reg
[iter
],
2341 /* Careful for being the first insn in a sequence. */
2342 if (spill_fill_data
.init_after
)
2343 insn
= emit_insn_after (seq
, spill_fill_data
.init_after
);
2346 rtx first
= get_insns ();
2348 insn
= emit_insn_before (seq
, first
);
2350 insn
= emit_insn (seq
);
2352 spill_fill_data
.init_after
= insn
;
2354 /* If DISP is 0, we may or may not have a further adjustment
2355 afterward. If we do, then the load/store insn may be modified
2356 to be a post-modify. If we don't, then this copy may be
2357 eliminated by copyprop_hardreg_forward, which makes this
2358 insn garbage, which runs afoul of the sanity check in
2359 propagate_one_insn. So mark this insn as legal to delete. */
2361 REG_NOTES(insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
,
2365 mem
= gen_rtx_MEM (GET_MODE (reg
), spill_fill_data
.iter_reg
[iter
]);
2367 /* ??? Not all of the spills are for varargs, but some of them are.
2368 The rest of the spills belong in an alias set of their own. But
2369 it doesn't actually hurt to include them here. */
2370 set_mem_alias_set (mem
, get_varargs_alias_set ());
2372 spill_fill_data
.prev_addr
[iter
] = &XEXP (mem
, 0);
2373 spill_fill_data
.prev_off
[iter
] = cfa_off
;
2375 if (++iter
>= spill_fill_data
.n_iter
)
2377 spill_fill_data
.next_iter
= iter
;
2383 do_spill (move_fn
, reg
, cfa_off
, frame_reg
)
2384 rtx (*move_fn
) PARAMS ((rtx
, rtx
, rtx
));
2386 HOST_WIDE_INT cfa_off
;
2388 int iter
= spill_fill_data
.next_iter
;
2391 mem
= spill_restore_mem (reg
, cfa_off
);
2392 insn
= emit_insn ((*move_fn
) (mem
, reg
, GEN_INT (cfa_off
)));
2393 spill_fill_data
.prev_insn
[iter
] = insn
;
2400 RTX_FRAME_RELATED_P (insn
) = 1;
2402 /* Don't even pretend that the unwind code can intuit its way
2403 through a pair of interleaved post_modify iterators. Just
2404 provide the correct answer. */
2406 if (frame_pointer_needed
)
2408 base
= hard_frame_pointer_rtx
;
2413 base
= stack_pointer_rtx
;
2414 off
= current_frame_info
.total_size
- cfa_off
;
2418 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2419 gen_rtx_SET (VOIDmode
,
2420 gen_rtx_MEM (GET_MODE (reg
),
2421 plus_constant (base
, off
)),
2428 do_restore (move_fn
, reg
, cfa_off
)
2429 rtx (*move_fn
) PARAMS ((rtx
, rtx
, rtx
));
2431 HOST_WIDE_INT cfa_off
;
2433 int iter
= spill_fill_data
.next_iter
;
2436 insn
= emit_insn ((*move_fn
) (reg
, spill_restore_mem (reg
, cfa_off
),
2437 GEN_INT (cfa_off
)));
2438 spill_fill_data
.prev_insn
[iter
] = insn
;
2441 /* Wrapper functions that discards the CONST_INT spill offset. These
2442 exist so that we can give gr_spill/gr_fill the offset they need and
2443 use a consistent function interface. */
2446 gen_movdi_x (dest
, src
, offset
)
2448 rtx offset ATTRIBUTE_UNUSED
;
2450 return gen_movdi (dest
, src
);
2454 gen_fr_spill_x (dest
, src
, offset
)
2456 rtx offset ATTRIBUTE_UNUSED
;
2458 return gen_fr_spill (dest
, src
);
2462 gen_fr_restore_x (dest
, src
, offset
)
2464 rtx offset ATTRIBUTE_UNUSED
;
2466 return gen_fr_restore (dest
, src
);
2469 /* Called after register allocation to add any instructions needed for the
2470 prologue. Using a prologue insn is favored compared to putting all of the
2471 instructions in output_function_prologue(), since it allows the scheduler
2472 to intermix instructions with the saves of the caller saved registers. In
2473 some cases, it might be necessary to emit a barrier instruction as the last
2474 insn to prevent such scheduling.
2476 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2477 so that the debug info generation code can handle them properly.
2479 The register save area is layed out like so:
2481 [ varargs spill area ]
2482 [ fr register spill area ]
2483 [ br register spill area ]
2484 [ ar register spill area ]
2485 [ pr register spill area ]
2486 [ gr register spill area ] */
2488 /* ??? Get inefficient code when the frame size is larger than can fit in an
2489 adds instruction. */
2492 ia64_expand_prologue ()
2494 rtx insn
, ar_pfs_save_reg
, ar_unat_save_reg
;
2495 int i
, epilogue_p
, regno
, alt_regno
, cfa_off
, n_varargs
;
2498 ia64_compute_frame_size (get_frame_size ());
2499 last_scratch_gr_reg
= 15;
2501 /* If there is no epilogue, then we don't need some prologue insns.
2502 We need to avoid emitting the dead prologue insns, because flow
2503 will complain about them. */
2508 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
2509 if ((e
->flags
& EDGE_FAKE
) == 0
2510 && (e
->flags
& EDGE_FALLTHRU
) != 0)
2512 epilogue_p
= (e
!= NULL
);
2517 /* Set the local, input, and output register names. We need to do this
2518 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2519 half. If we use in/loc/out register names, then we get assembler errors
2520 in crtn.S because there is no alloc insn or regstk directive in there. */
2521 if (! TARGET_REG_NAMES
)
2523 int inputs
= current_frame_info
.n_input_regs
;
2524 int locals
= current_frame_info
.n_local_regs
;
2525 int outputs
= current_frame_info
.n_output_regs
;
2527 for (i
= 0; i
< inputs
; i
++)
2528 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
2529 for (i
= 0; i
< locals
; i
++)
2530 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
2531 for (i
= 0; i
< outputs
; i
++)
2532 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
2535 /* Set the frame pointer register name. The regnum is logically loc79,
2536 but of course we'll not have allocated that many locals. Rather than
2537 worrying about renumbering the existing rtxs, we adjust the name. */
2538 /* ??? This code means that we can never use one local register when
2539 there is a frame pointer. loc79 gets wasted in this case, as it is
2540 renamed to a register that will never be used. See also the try_locals
2541 code in find_gr_spill. */
2542 if (current_frame_info
.reg_fp
)
2544 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
2545 reg_names
[HARD_FRAME_POINTER_REGNUM
]
2546 = reg_names
[current_frame_info
.reg_fp
];
2547 reg_names
[current_frame_info
.reg_fp
] = tmp
;
2550 /* We don't need an alloc instruction if we've used no outputs or locals. */
2551 if (current_frame_info
.n_local_regs
== 0
2552 && current_frame_info
.n_output_regs
== 0
2553 && current_frame_info
.n_input_regs
<= current_function_args_info
.int_regs
2554 && !TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
2556 /* If there is no alloc, but there are input registers used, then we
2557 need a .regstk directive. */
2558 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
2559 ar_pfs_save_reg
= NULL_RTX
;
2563 current_frame_info
.need_regstk
= 0;
2565 if (current_frame_info
.reg_save_ar_pfs
)
2566 regno
= current_frame_info
.reg_save_ar_pfs
;
2568 regno
= next_scratch_gr_reg ();
2569 ar_pfs_save_reg
= gen_rtx_REG (DImode
, regno
);
2571 insn
= emit_insn (gen_alloc (ar_pfs_save_reg
,
2572 GEN_INT (current_frame_info
.n_input_regs
),
2573 GEN_INT (current_frame_info
.n_local_regs
),
2574 GEN_INT (current_frame_info
.n_output_regs
),
2575 GEN_INT (current_frame_info
.n_rotate_regs
)));
2576 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_pfs
!= 0);
2579 /* Set up frame pointer, stack pointer, and spill iterators. */
2581 n_varargs
= cfun
->machine
->n_varargs
;
2582 setup_spill_pointers (current_frame_info
.n_spilled
+ n_varargs
,
2583 stack_pointer_rtx
, 0);
2585 if (frame_pointer_needed
)
2587 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
2588 RTX_FRAME_RELATED_P (insn
) = 1;
2591 if (current_frame_info
.total_size
!= 0)
2593 rtx frame_size_rtx
= GEN_INT (- current_frame_info
.total_size
);
2596 if (CONST_OK_FOR_I (- current_frame_info
.total_size
))
2597 offset
= frame_size_rtx
;
2600 regno
= next_scratch_gr_reg ();
2601 offset
= gen_rtx_REG (DImode
, regno
);
2602 emit_move_insn (offset
, frame_size_rtx
);
2605 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
,
2606 stack_pointer_rtx
, offset
));
2608 if (! frame_pointer_needed
)
2610 RTX_FRAME_RELATED_P (insn
) = 1;
2611 if (GET_CODE (offset
) != CONST_INT
)
2614 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2615 gen_rtx_SET (VOIDmode
,
2617 gen_rtx_PLUS (DImode
,
2624 /* ??? At this point we must generate a magic insn that appears to
2625 modify the stack pointer, the frame pointer, and all spill
2626 iterators. This would allow the most scheduling freedom. For
2627 now, just hard stop. */
2628 emit_insn (gen_blockage ());
2631 /* Must copy out ar.unat before doing any integer spills. */
2632 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2634 if (current_frame_info
.reg_save_ar_unat
)
2636 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2639 alt_regno
= next_scratch_gr_reg ();
2640 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2641 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2644 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2645 insn
= emit_move_insn (ar_unat_save_reg
, reg
);
2646 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_unat
!= 0);
2648 /* Even if we're not going to generate an epilogue, we still
2649 need to save the register so that EH works. */
2650 if (! epilogue_p
&& current_frame_info
.reg_save_ar_unat
)
2651 emit_insn (gen_prologue_use (ar_unat_save_reg
));
2654 ar_unat_save_reg
= NULL_RTX
;
2656 /* Spill all varargs registers. Do this before spilling any GR registers,
2657 since we want the UNAT bits for the GR registers to override the UNAT
2658 bits from varargs, which we don't care about. */
2661 for (regno
= GR_ARG_FIRST
+ 7; n_varargs
> 0; --n_varargs
, --regno
)
2663 reg
= gen_rtx_REG (DImode
, regno
);
2664 do_spill (gen_gr_spill
, reg
, cfa_off
+= 8, NULL_RTX
);
2667 /* Locate the bottom of the register save area. */
2668 cfa_off
= (current_frame_info
.spill_cfa_off
2669 + current_frame_info
.spill_size
2670 + current_frame_info
.extra_spill_size
);
2672 /* Save the predicate register block either in a register or in memory. */
2673 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2675 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2676 if (current_frame_info
.reg_save_pr
!= 0)
2678 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2679 insn
= emit_move_insn (alt_reg
, reg
);
2681 /* ??? Denote pr spill/fill by a DImode move that modifies all
2682 64 hard registers. */
2683 RTX_FRAME_RELATED_P (insn
) = 1;
2685 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2686 gen_rtx_SET (VOIDmode
, alt_reg
, reg
),
2689 /* Even if we're not going to generate an epilogue, we still
2690 need to save the register so that EH works. */
2692 emit_insn (gen_prologue_use (alt_reg
));
2696 alt_regno
= next_scratch_gr_reg ();
2697 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2698 insn
= emit_move_insn (alt_reg
, reg
);
2699 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2704 /* Handle AR regs in numerical order. All of them get special handling. */
2705 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
2706 && current_frame_info
.reg_save_ar_unat
== 0)
2708 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2709 do_spill (gen_movdi_x
, ar_unat_save_reg
, cfa_off
, reg
);
2713 /* The alloc insn already copied ar.pfs into a general register. The
2714 only thing we have to do now is copy that register to a stack slot
2715 if we'd not allocated a local register for the job. */
2716 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
)
2717 && current_frame_info
.reg_save_ar_pfs
== 0)
2719 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2720 do_spill (gen_movdi_x
, ar_pfs_save_reg
, cfa_off
, reg
);
2724 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2726 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2727 if (current_frame_info
.reg_save_ar_lc
!= 0)
2729 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2730 insn
= emit_move_insn (alt_reg
, reg
);
2731 RTX_FRAME_RELATED_P (insn
) = 1;
2733 /* Even if we're not going to generate an epilogue, we still
2734 need to save the register so that EH works. */
2736 emit_insn (gen_prologue_use (alt_reg
));
2740 alt_regno
= next_scratch_gr_reg ();
2741 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2742 emit_move_insn (alt_reg
, reg
);
2743 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2748 if (current_frame_info
.reg_save_gp
)
2750 insn
= emit_move_insn (gen_rtx_REG (DImode
,
2751 current_frame_info
.reg_save_gp
),
2752 pic_offset_table_rtx
);
2753 /* We don't know for sure yet if this is actually needed, since
2754 we've not split the PIC call patterns. If all of the calls
2755 are indirect, and not followed by any uses of the gp, then
2756 this save is dead. Allow it to go away. */
2758 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, REG_NOTES (insn
));
2761 /* We should now be at the base of the gr/br/fr spill area. */
2762 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2763 + current_frame_info
.spill_size
))
2766 /* Spill all general registers. */
2767 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2768 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2770 reg
= gen_rtx_REG (DImode
, regno
);
2771 do_spill (gen_gr_spill
, reg
, cfa_off
, reg
);
2775 /* Handle BR0 specially -- it may be getting stored permanently in
2776 some GR register. */
2777 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2779 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2780 if (current_frame_info
.reg_save_b0
!= 0)
2782 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2783 insn
= emit_move_insn (alt_reg
, reg
);
2784 RTX_FRAME_RELATED_P (insn
) = 1;
2786 /* Even if we're not going to generate an epilogue, we still
2787 need to save the register so that EH works. */
2789 emit_insn (gen_prologue_use (alt_reg
));
2793 alt_regno
= next_scratch_gr_reg ();
2794 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2795 emit_move_insn (alt_reg
, reg
);
2796 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2801 /* Spill the rest of the BR registers. */
2802 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2803 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2805 alt_regno
= next_scratch_gr_reg ();
2806 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2807 reg
= gen_rtx_REG (DImode
, regno
);
2808 emit_move_insn (alt_reg
, reg
);
2809 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2813 /* Align the frame and spill all FR registers. */
2814 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2815 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2819 reg
= gen_rtx_REG (TFmode
, regno
);
2820 do_spill (gen_fr_spill_x
, reg
, cfa_off
, reg
);
2824 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2827 finish_spill_pointers ();
2830 /* Called after register allocation to add any instructions needed for the
2831 epilogue. Using an epilogue insn is favored compared to putting all of the
2832 instructions in output_function_prologue(), since it allows the scheduler
2833 to intermix instructions with the saves of the caller saved registers. In
2834 some cases, it might be necessary to emit a barrier instruction as the last
2835 insn to prevent such scheduling. */
2838 ia64_expand_epilogue (sibcall_p
)
2841 rtx insn
, reg
, alt_reg
, ar_unat_save_reg
;
2842 int regno
, alt_regno
, cfa_off
;
2844 ia64_compute_frame_size (get_frame_size ());
2846 /* If there is a frame pointer, then we use it instead of the stack
2847 pointer, so that the stack pointer does not need to be valid when
2848 the epilogue starts. See EXIT_IGNORE_STACK. */
2849 if (frame_pointer_needed
)
2850 setup_spill_pointers (current_frame_info
.n_spilled
,
2851 hard_frame_pointer_rtx
, 0);
2853 setup_spill_pointers (current_frame_info
.n_spilled
, stack_pointer_rtx
,
2854 current_frame_info
.total_size
);
2856 if (current_frame_info
.total_size
!= 0)
2858 /* ??? At this point we must generate a magic insn that appears to
2859 modify the spill iterators and the frame pointer. This would
2860 allow the most scheduling freedom. For now, just hard stop. */
2861 emit_insn (gen_blockage ());
2864 /* Locate the bottom of the register save area. */
2865 cfa_off
= (current_frame_info
.spill_cfa_off
2866 + current_frame_info
.spill_size
2867 + current_frame_info
.extra_spill_size
);
2869 /* Restore the predicate registers. */
2870 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2872 if (current_frame_info
.reg_save_pr
!= 0)
2873 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2876 alt_regno
= next_scratch_gr_reg ();
2877 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2878 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2881 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2882 emit_move_insn (reg
, alt_reg
);
2885 /* Restore the application registers. */
2887 /* Load the saved unat from the stack, but do not restore it until
2888 after the GRs have been restored. */
2889 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2891 if (current_frame_info
.reg_save_ar_unat
!= 0)
2893 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2896 alt_regno
= next_scratch_gr_reg ();
2897 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2898 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2899 do_restore (gen_movdi_x
, ar_unat_save_reg
, cfa_off
);
2904 ar_unat_save_reg
= NULL_RTX
;
2906 if (current_frame_info
.reg_save_ar_pfs
!= 0)
2908 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_pfs
);
2909 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2910 emit_move_insn (reg
, alt_reg
);
2912 else if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
2914 alt_regno
= next_scratch_gr_reg ();
2915 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2916 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2918 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2919 emit_move_insn (reg
, alt_reg
);
2922 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2924 if (current_frame_info
.reg_save_ar_lc
!= 0)
2925 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2928 alt_regno
= next_scratch_gr_reg ();
2929 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2930 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2933 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2934 emit_move_insn (reg
, alt_reg
);
2937 /* We should now be at the base of the gr/br/fr spill area. */
2938 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2939 + current_frame_info
.spill_size
))
2942 /* The GP may be stored on the stack in the prologue, but it's
2943 never restored in the epilogue. Skip the stack slot. */
2944 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, GR_REG (1)))
2947 /* Restore all general registers. */
2948 for (regno
= GR_REG (2); regno
<= GR_REG (31); ++regno
)
2949 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2951 reg
= gen_rtx_REG (DImode
, regno
);
2952 do_restore (gen_gr_restore
, reg
, cfa_off
);
2956 /* Restore the branch registers. Handle B0 specially, as it may
2957 have gotten stored in some GR register. */
2958 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2960 if (current_frame_info
.reg_save_b0
!= 0)
2961 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2964 alt_regno
= next_scratch_gr_reg ();
2965 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2966 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2969 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2970 emit_move_insn (reg
, alt_reg
);
2973 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2974 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2976 alt_regno
= next_scratch_gr_reg ();
2977 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2978 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2980 reg
= gen_rtx_REG (DImode
, regno
);
2981 emit_move_insn (reg
, alt_reg
);
2984 /* Restore floating point registers. */
2985 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2986 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2990 reg
= gen_rtx_REG (TFmode
, regno
);
2991 do_restore (gen_fr_restore_x
, reg
, cfa_off
);
2995 /* Restore ar.unat for real. */
2996 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2998 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2999 emit_move_insn (reg
, ar_unat_save_reg
);
3002 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
3005 finish_spill_pointers ();
3007 if (current_frame_info
.total_size
|| cfun
->machine
->ia64_eh_epilogue_sp
)
3009 /* ??? At this point we must generate a magic insn that appears to
3010 modify the spill iterators, the stack pointer, and the frame
3011 pointer. This would allow the most scheduling freedom. For now,
3013 emit_insn (gen_blockage ());
3016 if (cfun
->machine
->ia64_eh_epilogue_sp
)
3017 emit_move_insn (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
);
3018 else if (frame_pointer_needed
)
3020 insn
= emit_move_insn (stack_pointer_rtx
, hard_frame_pointer_rtx
);
3021 RTX_FRAME_RELATED_P (insn
) = 1;
3023 else if (current_frame_info
.total_size
)
3025 rtx offset
, frame_size_rtx
;
3027 frame_size_rtx
= GEN_INT (current_frame_info
.total_size
);
3028 if (CONST_OK_FOR_I (current_frame_info
.total_size
))
3029 offset
= frame_size_rtx
;
3032 regno
= next_scratch_gr_reg ();
3033 offset
= gen_rtx_REG (DImode
, regno
);
3034 emit_move_insn (offset
, frame_size_rtx
);
3037 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
3040 RTX_FRAME_RELATED_P (insn
) = 1;
3041 if (GET_CODE (offset
) != CONST_INT
)
3044 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
3045 gen_rtx_SET (VOIDmode
,
3047 gen_rtx_PLUS (DImode
,
3054 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
3055 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
3058 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
3061 int fp
= GR_REG (2);
3062 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3063 first available call clobbered register. If there was a frame_pointer
3064 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3065 so we have to make sure we're using the string "r2" when emitting
3066 the register name for the assembler. */
3067 if (current_frame_info
.reg_fp
&& current_frame_info
.reg_fp
== GR_REG (2))
3068 fp
= HARD_FRAME_POINTER_REGNUM
;
3070 /* We must emit an alloc to force the input registers to become output
3071 registers. Otherwise, if the callee tries to pass its parameters
3072 through to another call without an intervening alloc, then these
3074 /* ??? We don't need to preserve all input registers. We only need to
3075 preserve those input registers used as arguments to the sibling call.
3076 It is unclear how to compute that number here. */
3077 if (current_frame_info
.n_input_regs
!= 0)
3078 emit_insn (gen_alloc (gen_rtx_REG (DImode
, fp
),
3079 GEN_INT (0), GEN_INT (0),
3080 GEN_INT (current_frame_info
.n_input_regs
),
3085 /* Return 1 if br.ret can do all the work required to return from a
3089 ia64_direct_return ()
3091 if (reload_completed
&& ! frame_pointer_needed
)
3093 ia64_compute_frame_size (get_frame_size ());
3095 return (current_frame_info
.total_size
== 0
3096 && current_frame_info
.n_spilled
== 0
3097 && current_frame_info
.reg_save_b0
== 0
3098 && current_frame_info
.reg_save_pr
== 0
3099 && current_frame_info
.reg_save_ar_pfs
== 0
3100 && current_frame_info
.reg_save_ar_unat
== 0
3101 && current_frame_info
.reg_save_ar_lc
== 0);
3106 /* Return the magic cookie that we use to hold the return address
3107 during early compilation. */
3110 ia64_return_addr_rtx (count
, frame
)
3111 HOST_WIDE_INT count
;
3112 rtx frame ATTRIBUTE_UNUSED
;
3116 return gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_RET_ADDR
);
3119 /* Split this value after reload, now that we know where the return
3120 address is saved. */
3123 ia64_split_return_addr_rtx (dest
)
3128 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
3130 if (current_frame_info
.reg_save_b0
!= 0)
3131 src
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
3137 /* Compute offset from CFA for BR0. */
3138 /* ??? Must be kept in sync with ia64_expand_prologue. */
3139 off
= (current_frame_info
.spill_cfa_off
3140 + current_frame_info
.spill_size
);
3141 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
3142 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3145 /* Convert CFA offset to a register based offset. */
3146 if (frame_pointer_needed
)
3147 src
= hard_frame_pointer_rtx
;
3150 src
= stack_pointer_rtx
;
3151 off
+= current_frame_info
.total_size
;
3154 /* Load address into scratch register. */
3155 if (CONST_OK_FOR_I (off
))
3156 emit_insn (gen_adddi3 (dest
, src
, GEN_INT (off
)));
3159 emit_move_insn (dest
, GEN_INT (off
));
3160 emit_insn (gen_adddi3 (dest
, src
, dest
));
3163 src
= gen_rtx_MEM (Pmode
, dest
);
3167 src
= gen_rtx_REG (DImode
, BR_REG (0));
3169 emit_move_insn (dest
, src
);
3173 ia64_hard_regno_rename_ok (from
, to
)
3177 /* Don't clobber any of the registers we reserved for the prologue. */
3178 if (to
== current_frame_info
.reg_fp
3179 || to
== current_frame_info
.reg_save_b0
3180 || to
== current_frame_info
.reg_save_pr
3181 || to
== current_frame_info
.reg_save_ar_pfs
3182 || to
== current_frame_info
.reg_save_ar_unat
3183 || to
== current_frame_info
.reg_save_ar_lc
)
3186 if (from
== current_frame_info
.reg_fp
3187 || from
== current_frame_info
.reg_save_b0
3188 || from
== current_frame_info
.reg_save_pr
3189 || from
== current_frame_info
.reg_save_ar_pfs
3190 || from
== current_frame_info
.reg_save_ar_unat
3191 || from
== current_frame_info
.reg_save_ar_lc
)
3194 /* Don't use output registers outside the register frame. */
3195 if (OUT_REGNO_P (to
) && to
>= OUT_REG (current_frame_info
.n_output_regs
))
3198 /* Retain even/oddness on predicate register pairs. */
3199 if (PR_REGNO_P (from
) && PR_REGNO_P (to
))
3200 return (from
& 1) == (to
& 1);
3205 /* Target hook for assembling integer objects. Handle word-sized
3206 aligned objects and detect the cases when @fptr is needed. */
3209 ia64_assemble_integer (x
, size
, aligned_p
)
3214 if (size
== (TARGET_ILP32
? 4 : 8)
3216 && !(TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
3217 && GET_CODE (x
) == SYMBOL_REF
3218 && SYMBOL_REF_FUNCTION_P (x
))
3221 fputs ("\tdata4\t@fptr(", asm_out_file
);
3223 fputs ("\tdata8\t@fptr(", asm_out_file
);
3224 output_addr_const (asm_out_file
, x
);
3225 fputs (")\n", asm_out_file
);
3228 return default_assemble_integer (x
, size
, aligned_p
);
3231 /* Emit the function prologue. */
3234 ia64_output_function_prologue (file
, size
)
3236 HOST_WIDE_INT size ATTRIBUTE_UNUSED
;
3238 int mask
, grsave
, grsave_prev
;
3240 if (current_frame_info
.need_regstk
)
3241 fprintf (file
, "\t.regstk %d, %d, %d, %d\n",
3242 current_frame_info
.n_input_regs
,
3243 current_frame_info
.n_local_regs
,
3244 current_frame_info
.n_output_regs
,
3245 current_frame_info
.n_rotate_regs
);
3247 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
3250 /* Emit the .prologue directive. */
3253 grsave
= grsave_prev
= 0;
3254 if (current_frame_info
.reg_save_b0
!= 0)
3257 grsave
= grsave_prev
= current_frame_info
.reg_save_b0
;
3259 if (current_frame_info
.reg_save_ar_pfs
!= 0
3260 && (grsave_prev
== 0
3261 || current_frame_info
.reg_save_ar_pfs
== grsave_prev
+ 1))
3264 if (grsave_prev
== 0)
3265 grsave
= current_frame_info
.reg_save_ar_pfs
;
3266 grsave_prev
= current_frame_info
.reg_save_ar_pfs
;
3268 if (current_frame_info
.reg_fp
!= 0
3269 && (grsave_prev
== 0
3270 || current_frame_info
.reg_fp
== grsave_prev
+ 1))
3273 if (grsave_prev
== 0)
3274 grsave
= HARD_FRAME_POINTER_REGNUM
;
3275 grsave_prev
= current_frame_info
.reg_fp
;
3277 if (current_frame_info
.reg_save_pr
!= 0
3278 && (grsave_prev
== 0
3279 || current_frame_info
.reg_save_pr
== grsave_prev
+ 1))
3282 if (grsave_prev
== 0)
3283 grsave
= current_frame_info
.reg_save_pr
;
3287 fprintf (file
, "\t.prologue %d, %d\n", mask
,
3288 ia64_dbx_register_number (grsave
));
3290 fputs ("\t.prologue\n", file
);
3292 /* Emit a .spill directive, if necessary, to relocate the base of
3293 the register spill area. */
3294 if (current_frame_info
.spill_cfa_off
!= -16)
3295 fprintf (file
, "\t.spill %ld\n",
3296 (long) (current_frame_info
.spill_cfa_off
3297 + current_frame_info
.spill_size
));
3300 /* Emit the .body directive at the scheduled end of the prologue. */
3303 ia64_output_function_end_prologue (file
)
3306 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
3309 fputs ("\t.body\n", file
);
3312 /* Emit the function epilogue. */
3315 ia64_output_function_epilogue (file
, size
)
3316 FILE *file ATTRIBUTE_UNUSED
;
3317 HOST_WIDE_INT size ATTRIBUTE_UNUSED
;
3321 if (current_frame_info
.reg_fp
)
3323 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
3324 reg_names
[HARD_FRAME_POINTER_REGNUM
]
3325 = reg_names
[current_frame_info
.reg_fp
];
3326 reg_names
[current_frame_info
.reg_fp
] = tmp
;
3328 if (! TARGET_REG_NAMES
)
3330 for (i
= 0; i
< current_frame_info
.n_input_regs
; i
++)
3331 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
3332 for (i
= 0; i
< current_frame_info
.n_local_regs
; i
++)
3333 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
3334 for (i
= 0; i
< current_frame_info
.n_output_regs
; i
++)
3335 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
3338 current_frame_info
.initialized
= 0;
3342 ia64_dbx_register_number (regno
)
3345 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3346 from its home at loc79 to something inside the register frame. We
3347 must perform the same renumbering here for the debug info. */
3348 if (current_frame_info
.reg_fp
)
3350 if (regno
== HARD_FRAME_POINTER_REGNUM
)
3351 regno
= current_frame_info
.reg_fp
;
3352 else if (regno
== current_frame_info
.reg_fp
)
3353 regno
= HARD_FRAME_POINTER_REGNUM
;
3356 if (IN_REGNO_P (regno
))
3357 return 32 + regno
- IN_REG (0);
3358 else if (LOC_REGNO_P (regno
))
3359 return 32 + current_frame_info
.n_input_regs
+ regno
- LOC_REG (0);
3360 else if (OUT_REGNO_P (regno
))
3361 return (32 + current_frame_info
.n_input_regs
3362 + current_frame_info
.n_local_regs
+ regno
- OUT_REG (0));
3368 ia64_initialize_trampoline (addr
, fnaddr
, static_chain
)
3369 rtx addr
, fnaddr
, static_chain
;
3371 rtx addr_reg
, eight
= GEN_INT (8);
3373 /* Load up our iterator. */
3374 addr_reg
= gen_reg_rtx (Pmode
);
3375 emit_move_insn (addr_reg
, addr
);
3377 /* The first two words are the fake descriptor:
3378 __ia64_trampoline, ADDR+16. */
3379 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
3380 gen_rtx_SYMBOL_REF (Pmode
, "__ia64_trampoline"));
3381 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
3383 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
3384 copy_to_reg (plus_constant (addr
, 16)));
3385 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
3387 /* The third word is the target descriptor. */
3388 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), fnaddr
);
3389 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
3391 /* The fourth word is the static chain. */
3392 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), static_chain
);
3395 /* Do any needed setup for a variadic function. CUM has not been updated
3396 for the last named argument which has type TYPE and mode MODE.
3398 We generate the actual spill instructions during prologue generation. */
3401 ia64_setup_incoming_varargs (cum
, int_mode
, type
, pretend_size
, second_time
)
3402 CUMULATIVE_ARGS cum
;
3406 int second_time ATTRIBUTE_UNUSED
;
3408 /* Skip the current argument. */
3409 ia64_function_arg_advance (&cum
, int_mode
, type
, 1);
3411 if (cum
.words
< MAX_ARGUMENT_SLOTS
)
3413 int n
= MAX_ARGUMENT_SLOTS
- cum
.words
;
3414 *pretend_size
= n
* UNITS_PER_WORD
;
3415 cfun
->machine
->n_varargs
= n
;
3419 /* Check whether TYPE is a homogeneous floating point aggregate. If
3420 it is, return the mode of the floating point type that appears
3421 in all leafs. If it is not, return VOIDmode.
3423 An aggregate is a homogeneous floating point aggregate is if all
3424 fields/elements in it have the same floating point type (e.g,
3425 SFmode). 128-bit quad-precision floats are excluded. */
3427 static enum machine_mode
3428 hfa_element_mode (type
, nested
)
3432 enum machine_mode element_mode
= VOIDmode
;
3433 enum machine_mode mode
;
3434 enum tree_code code
= TREE_CODE (type
);
3435 int know_element_mode
= 0;
3440 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
3441 case BOOLEAN_TYPE
: case CHAR_TYPE
: case POINTER_TYPE
:
3442 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
3443 case FILE_TYPE
: case SET_TYPE
: case LANG_TYPE
:
3447 /* Fortran complex types are supposed to be HFAs, so we need to handle
3448 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3451 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
3452 && (TYPE_MODE (type
) != TCmode
|| INTEL_EXTENDED_IEEE_FORMAT
))
3453 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type
))
3454 * BITS_PER_UNIT
, MODE_FLOAT
, 0);
3459 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3460 mode if this is contained within an aggregate. */
3461 if (nested
&& (TYPE_MODE (type
) != TFmode
|| INTEL_EXTENDED_IEEE_FORMAT
))
3462 return TYPE_MODE (type
);
3467 return hfa_element_mode (TREE_TYPE (type
), 1);
3471 case QUAL_UNION_TYPE
:
3472 for (t
= TYPE_FIELDS (type
); t
; t
= TREE_CHAIN (t
))
3474 if (TREE_CODE (t
) != FIELD_DECL
)
3477 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
3478 if (know_element_mode
)
3480 if (mode
!= element_mode
)
3483 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
3487 know_element_mode
= 1;
3488 element_mode
= mode
;
3491 return element_mode
;
3494 /* If we reach here, we probably have some front-end specific type
3495 that the backend doesn't know about. This can happen via the
3496 aggregate_value_p call in init_function_start. All we can do is
3497 ignore unknown tree types. */
3504 /* Return rtx for register where argument is passed, or zero if it is passed
3507 /* ??? 128-bit quad-precision floats are always passed in general
3511 ia64_function_arg (cum
, mode
, type
, named
, incoming
)
3512 CUMULATIVE_ARGS
*cum
;
3513 enum machine_mode mode
;
3518 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
3519 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
3520 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
3523 enum machine_mode hfa_mode
= VOIDmode
;
3525 /* Integer and float arguments larger than 8 bytes start at the next even
3526 boundary. Aggregates larger than 8 bytes start at the next even boundary
3527 if the aggregate has 16 byte alignment. Net effect is that types with
3528 alignment greater than 8 start at the next even boundary. */
3529 /* ??? The ABI does not specify how to handle aggregates with alignment from
3530 9 to 15 bytes, or greater than 16. We handle them all as if they had
3531 16 byte alignment. Such aggregates can occur only if gcc extensions are
3533 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3535 && (cum
->words
& 1))
3538 /* If all argument slots are used, then it must go on the stack. */
3539 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
3542 /* Check for and handle homogeneous FP aggregates. */
3544 hfa_mode
= hfa_element_mode (type
, 0);
3546 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3547 and unprototyped hfas are passed specially. */
3548 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
3552 int fp_regs
= cum
->fp_regs
;
3553 int int_regs
= cum
->words
+ offset
;
3554 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3558 /* If prototyped, pass it in FR regs then GR regs.
3559 If not prototyped, pass it in both FR and GR regs.
3561 If this is an SFmode aggregate, then it is possible to run out of
3562 FR regs while GR regs are still left. In that case, we pass the
3563 remaining part in the GR regs. */
3565 /* Fill the FP regs. We do this always. We stop if we reach the end
3566 of the argument, the last FP register, or the last argument slot. */
3568 byte_size
= ((mode
== BLKmode
)
3569 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3570 args_byte_size
= int_regs
* UNITS_PER_WORD
;
3572 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
3573 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
3575 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3576 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
3580 args_byte_size
+= hfa_size
;
3584 /* If no prototype, then the whole thing must go in GR regs. */
3585 if (! cum
->prototype
)
3587 /* If this is an SFmode aggregate, then we might have some left over
3588 that needs to go in GR regs. */
3589 else if (byte_size
!= offset
)
3590 int_regs
+= offset
/ UNITS_PER_WORD
;
3592 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3594 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
3596 enum machine_mode gr_mode
= DImode
;
3598 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3599 then this goes in a GR reg left adjusted/little endian, right
3600 adjusted/big endian. */
3601 /* ??? Currently this is handled wrong, because 4-byte hunks are
3602 always right adjusted/little endian. */
3605 /* If we have an even 4 byte hunk because the aggregate is a
3606 multiple of 4 bytes in size, then this goes in a GR reg right
3607 adjusted/little endian. */
3608 else if (byte_size
- offset
== 4)
3610 /* Complex floats need to have float mode. */
3611 if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
3614 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3615 gen_rtx_REG (gr_mode
, (basereg
3618 offset
+= GET_MODE_SIZE (gr_mode
);
3619 int_regs
+= GET_MODE_SIZE (gr_mode
) <= UNITS_PER_WORD
3620 ? 1 : GET_MODE_SIZE (gr_mode
) / UNITS_PER_WORD
;
3623 /* If we ended up using just one location, just return that one loc. */
3625 return XEXP (loc
[0], 0);
3627 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3630 /* Integral and aggregates go in general registers. If we have run out of
3631 FR registers, then FP values must also go in general registers. This can
3632 happen when we have a SFmode HFA. */
3633 else if (((mode
== TFmode
) && ! INTEL_EXTENDED_IEEE_FORMAT
)
3634 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
3636 int byte_size
= ((mode
== BLKmode
)
3637 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3638 if (BYTES_BIG_ENDIAN
3639 && (mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3640 && byte_size
< UNITS_PER_WORD
3643 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3644 gen_rtx_REG (DImode
,
3645 (basereg
+ cum
->words
3648 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
3651 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
3655 /* If there is a prototype, then FP values go in a FR register when
3656 named, and in a GR register when unnamed. */
3657 else if (cum
->prototype
)
3660 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
3662 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
3664 /* If there is no prototype, then FP values go in both FR and GR
3668 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3669 gen_rtx_REG (mode
, (FR_ARG_FIRST
3672 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3674 (basereg
+ cum
->words
3678 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, fp_reg
, gr_reg
));
3682 /* Return number of words, at the beginning of the argument, that must be
3683 put in registers. 0 is the argument is entirely in registers or entirely
3687 ia64_function_arg_partial_nregs (cum
, mode
, type
, named
)
3688 CUMULATIVE_ARGS
*cum
;
3689 enum machine_mode mode
;
3691 int named ATTRIBUTE_UNUSED
;
3693 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
3694 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
3698 /* Arguments with alignment larger than 8 bytes start at the next even
3700 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3702 && (cum
->words
& 1))
3705 /* If all argument slots are used, then it must go on the stack. */
3706 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
3709 /* It doesn't matter whether the argument goes in FR or GR regs. If
3710 it fits within the 8 argument slots, then it goes entirely in
3711 registers. If it extends past the last argument slot, then the rest
3712 goes on the stack. */
3714 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
3717 return MAX_ARGUMENT_SLOTS
- cum
->words
- offset
;
3720 /* Update CUM to point after this argument. This is patterned after
3721 ia64_function_arg. */
3724 ia64_function_arg_advance (cum
, mode
, type
, named
)
3725 CUMULATIVE_ARGS
*cum
;
3726 enum machine_mode mode
;
3730 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
3731 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
3734 enum machine_mode hfa_mode
= VOIDmode
;
3736 /* If all arg slots are already full, then there is nothing to do. */
3737 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
3740 /* Arguments with alignment larger than 8 bytes start at the next even
3742 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3744 && (cum
->words
& 1))
3747 cum
->words
+= words
+ offset
;
3749 /* Check for and handle homogeneous FP aggregates. */
3751 hfa_mode
= hfa_element_mode (type
, 0);
3753 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3754 and unprototyped hfas are passed specially. */
3755 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
3757 int fp_regs
= cum
->fp_regs
;
3758 /* This is the original value of cum->words + offset. */
3759 int int_regs
= cum
->words
- words
;
3760 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3764 /* If prototyped, pass it in FR regs then GR regs.
3765 If not prototyped, pass it in both FR and GR regs.
3767 If this is an SFmode aggregate, then it is possible to run out of
3768 FR regs while GR regs are still left. In that case, we pass the
3769 remaining part in the GR regs. */
3771 /* Fill the FP regs. We do this always. We stop if we reach the end
3772 of the argument, the last FP register, or the last argument slot. */
3774 byte_size
= ((mode
== BLKmode
)
3775 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3776 args_byte_size
= int_regs
* UNITS_PER_WORD
;
3778 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
3779 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
3782 args_byte_size
+= hfa_size
;
3786 cum
->fp_regs
= fp_regs
;
3789 /* Integral and aggregates go in general registers. If we have run out of
3790 FR registers, then FP values must also go in general registers. This can
3791 happen when we have a SFmode HFA. */
3792 else if (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
)
3793 cum
->int_regs
= cum
->words
;
3795 /* If there is a prototype, then FP values go in a FR register when
3796 named, and in a GR register when unnamed. */
3797 else if (cum
->prototype
)
3800 cum
->int_regs
= cum
->words
;
3802 /* ??? Complex types should not reach here. */
3803 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3805 /* If there is no prototype, then FP values go in both FR and GR
3809 /* ??? Complex types should not reach here. */
3810 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3811 cum
->int_regs
= cum
->words
;
3815 /* Variable sized types are passed by reference. */
3816 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3819 ia64_function_arg_pass_by_reference (cum
, mode
, type
, named
)
3820 CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
;
3821 enum machine_mode mode ATTRIBUTE_UNUSED
;
3823 int named ATTRIBUTE_UNUSED
;
3825 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3828 /* True if it is OK to do sibling call optimization for the specified
3829 call expression EXP. DECL will be the called function, or NULL if
3830 this is an indirect call. */
3832 ia64_function_ok_for_sibcall (decl
, exp
)
3834 tree exp ATTRIBUTE_UNUSED
;
3836 /* We must always return with our current GP. This means we can
3837 only sibcall to functions defined in the current module. */
3838 return decl
&& (*targetm
.binds_local_p
) (decl
);
3842 /* Implement va_arg. */
3845 ia64_va_arg (valist
, type
)
3850 /* Variable sized types are passed by reference. */
3851 if (TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
3853 rtx addr
= std_expand_builtin_va_arg (valist
, build_pointer_type (type
));
3854 return gen_rtx_MEM (ptr_mode
, force_reg (Pmode
, addr
));
3857 /* Arguments with alignment larger than 8 bytes start at the next even
3859 if (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3861 t
= build (PLUS_EXPR
, TREE_TYPE (valist
), valist
,
3862 build_int_2 (2 * UNITS_PER_WORD
- 1, 0));
3863 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
3864 build_int_2 (-2 * UNITS_PER_WORD
, -1));
3865 t
= build (MODIFY_EXPR
, TREE_TYPE (valist
), valist
, t
);
3866 TREE_SIDE_EFFECTS (t
) = 1;
3867 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3870 return std_expand_builtin_va_arg (valist
, type
);
3873 /* Return 1 if function return value returned in memory. Return 0 if it is
3877 ia64_return_in_memory (valtype
)
3880 enum machine_mode mode
;
3881 enum machine_mode hfa_mode
;
3882 HOST_WIDE_INT byte_size
;
3884 mode
= TYPE_MODE (valtype
);
3885 byte_size
= GET_MODE_SIZE (mode
);
3886 if (mode
== BLKmode
)
3888 byte_size
= int_size_in_bytes (valtype
);
3893 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3895 hfa_mode
= hfa_element_mode (valtype
, 0);
3896 if (hfa_mode
!= VOIDmode
)
3898 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3900 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
3905 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
3911 /* Return rtx for register that holds the function return value. */
3914 ia64_function_value (valtype
, func
)
3916 tree func ATTRIBUTE_UNUSED
;
3918 enum machine_mode mode
;
3919 enum machine_mode hfa_mode
;
3921 mode
= TYPE_MODE (valtype
);
3922 hfa_mode
= hfa_element_mode (valtype
, 0);
3924 if (hfa_mode
!= VOIDmode
)
3932 hfa_size
= GET_MODE_SIZE (hfa_mode
);
3933 byte_size
= ((mode
== BLKmode
)
3934 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
3936 for (i
= 0; offset
< byte_size
; i
++)
3938 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3939 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
3945 return XEXP (loc
[0], 0);
3947 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3949 else if (FLOAT_TYPE_P (valtype
) &&
3950 ((mode
!= TFmode
) || INTEL_EXTENDED_IEEE_FORMAT
))
3951 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
3954 if (BYTES_BIG_ENDIAN
3955 && (mode
== BLKmode
|| (valtype
&& AGGREGATE_TYPE_P (valtype
))))
3963 bytesize
= int_size_in_bytes (valtype
);
3964 for (i
= 0; offset
< bytesize
; i
++)
3966 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3967 gen_rtx_REG (DImode
,
3970 offset
+= UNITS_PER_WORD
;
3972 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3975 return gen_rtx_REG (mode
, GR_RET_FIRST
);
3979 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
3980 We need to emit DTP-relative relocations. */
3983 ia64_output_dwarf_dtprel (file
, size
, x
)
3990 fputs ("\tdata8.ua\t@dtprel(", file
);
3991 output_addr_const (file
, x
);
3995 /* Print a memory address as an operand to reference that memory location. */
3997 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3998 also call this from ia64_print_operand for memory addresses. */
4001 ia64_print_operand_address (stream
, address
)
4002 FILE * stream ATTRIBUTE_UNUSED
;
4003 rtx address ATTRIBUTE_UNUSED
;
4007 /* Print an operand to an assembler instruction.
4008 C Swap and print a comparison operator.
4009 D Print an FP comparison operator.
4010 E Print 32 - constant, for SImode shifts as extract.
4011 e Print 64 - constant, for DImode rotates.
4012 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4013 a floating point register emitted normally.
4014 I Invert a predicate register by adding 1.
4015 J Select the proper predicate register for a condition.
4016 j Select the inverse predicate register for a condition.
4017 O Append .acq for volatile load.
4018 P Postincrement of a MEM.
4019 Q Append .rel for volatile store.
4020 S Shift amount for shladd instruction.
4021 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4022 for Intel assembler.
4023 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4024 for Intel assembler.
4025 r Print register name, or constant 0 as r0. HP compatibility for
4028 ia64_print_operand (file
, x
, code
)
4038 /* Handled below. */
4043 enum rtx_code c
= swap_condition (GET_CODE (x
));
4044 fputs (GET_RTX_NAME (c
), file
);
4049 switch (GET_CODE (x
))
4061 str
= GET_RTX_NAME (GET_CODE (x
));
4068 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
4072 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - INTVAL (x
));
4076 if (x
== CONST0_RTX (GET_MODE (x
)))
4077 str
= reg_names
[FR_REG (0)];
4078 else if (x
== CONST1_RTX (GET_MODE (x
)))
4079 str
= reg_names
[FR_REG (1)];
4080 else if (GET_CODE (x
) == REG
)
4081 str
= reg_names
[REGNO (x
)];
4088 fputs (reg_names
[REGNO (x
) + 1], file
);
4094 unsigned int regno
= REGNO (XEXP (x
, 0));
4095 if (GET_CODE (x
) == EQ
)
4099 fputs (reg_names
[regno
], file
);
4104 if (MEM_VOLATILE_P (x
))
4105 fputs(".acq", file
);
4110 HOST_WIDE_INT value
;
4112 switch (GET_CODE (XEXP (x
, 0)))
4118 x
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
4119 if (GET_CODE (x
) == CONST_INT
)
4121 else if (GET_CODE (x
) == REG
)
4123 fprintf (file
, ", %s", reg_names
[REGNO (x
)]);
4131 value
= GET_MODE_SIZE (GET_MODE (x
));
4135 value
= - (HOST_WIDE_INT
) GET_MODE_SIZE (GET_MODE (x
));
4139 fprintf (file
, ", " HOST_WIDE_INT_PRINT_DEC
, value
);
4144 if (MEM_VOLATILE_P (x
))
4145 fputs(".rel", file
);
4149 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
4153 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
4155 fprintf (file
, "0x%x", (int) INTVAL (x
) & 0xffffffff);
4161 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
4163 const char *prefix
= "0x";
4164 if (INTVAL (x
) & 0x80000000)
4166 fprintf (file
, "0xffffffff");
4169 fprintf (file
, "%s%x", prefix
, (int) INTVAL (x
) & 0xffffffff);
4175 /* If this operand is the constant zero, write it as register zero.
4176 Any register, zero, or CONST_INT value is OK here. */
4177 if (GET_CODE (x
) == REG
)
4178 fputs (reg_names
[REGNO (x
)], file
);
4179 else if (x
== CONST0_RTX (GET_MODE (x
)))
4181 else if (GET_CODE (x
) == CONST_INT
)
4182 output_addr_const (file
, x
);
4184 output_operand_lossage ("invalid %%r value");
4191 /* For conditional branches, returns or calls, substitute
4192 sptk, dptk, dpnt, or spnt for %s. */
4193 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
4196 int pred_val
= INTVAL (XEXP (x
, 0));
4198 /* Guess top and bottom 10% statically predicted. */
4199 if (pred_val
< REG_BR_PROB_BASE
/ 50)
4201 else if (pred_val
< REG_BR_PROB_BASE
/ 2)
4203 else if (pred_val
< REG_BR_PROB_BASE
/ 100 * 98)
4208 else if (GET_CODE (current_output_insn
) == CALL_INSN
)
4213 fputs (which
, file
);
4218 x
= current_insn_predicate
;
4221 unsigned int regno
= REGNO (XEXP (x
, 0));
4222 if (GET_CODE (x
) == EQ
)
4224 fprintf (file
, "(%s) ", reg_names
[regno
]);
4229 output_operand_lossage ("ia64_print_operand: unknown code");
4233 switch (GET_CODE (x
))
4235 /* This happens for the spill/restore instructions. */
4240 /* ... fall through ... */
4243 fputs (reg_names
[REGNO (x
)], file
);
4248 rtx addr
= XEXP (x
, 0);
4249 if (GET_RTX_CLASS (GET_CODE (addr
)) == 'a')
4250 addr
= XEXP (addr
, 0);
4251 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
4256 output_addr_const (file
, x
);
4263 /* Compute a (partial) cost for rtx X. Return true if the complete
4264 cost has been computed, and false if subexpressions should be
4265 scanned. In either case, *TOTAL contains the cost result. */
4266 /* ??? This is incomplete. */
4269 ia64_rtx_costs (x
, code
, outer_code
, total
)
4271 int code
, outer_code
;
4280 *total
= CONST_OK_FOR_J (INTVAL (x
)) ? 0 : COSTS_N_INSNS (1);
4283 if (CONST_OK_FOR_I (INTVAL (x
)))
4285 else if (CONST_OK_FOR_J (INTVAL (x
)))
4288 *total
= COSTS_N_INSNS (1);
4291 if (CONST_OK_FOR_K (INTVAL (x
)) || CONST_OK_FOR_L (INTVAL (x
)))
4294 *total
= COSTS_N_INSNS (1);
4299 *total
= COSTS_N_INSNS (1);
4305 *total
= COSTS_N_INSNS (3);
4309 /* For multiplies wider than HImode, we have to go to the FPU,
4310 which normally involves copies. Plus there's the latency
4311 of the multiply itself, and the latency of the instructions to
4312 transfer integer regs to FP regs. */
4313 /* ??? Check for FP mode. */
4314 if (GET_MODE_SIZE (GET_MODE (x
)) > 2)
4315 *total
= COSTS_N_INSNS (10);
4317 *total
= COSTS_N_INSNS (2);
4325 *total
= COSTS_N_INSNS (1);
4332 /* We make divide expensive, so that divide-by-constant will be
4333 optimized to a multiply. */
4334 *total
= COSTS_N_INSNS (60);
4342 /* Calculate the cost of moving data from a register in class FROM to
4343 one in class TO, using MODE. */
4346 ia64_register_move_cost (mode
, from
, to
)
4347 enum machine_mode mode
;
4348 enum reg_class from
, to
;
4350 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4351 if (to
== ADDL_REGS
)
4353 if (from
== ADDL_REGS
)
4356 /* All costs are symmetric, so reduce cases by putting the
4357 lower number class as the destination. */
4360 enum reg_class tmp
= to
;
4361 to
= from
, from
= tmp
;
4364 /* Moving from FR<->GR in TFmode must be more expensive than 2,
4365 so that we get secondary memory reloads. Between FR_REGS,
4366 we have to make this at least as expensive as MEMORY_MOVE_COST
4367 to avoid spectacularly poor register class preferencing. */
4370 if (to
!= GR_REGS
|| from
!= GR_REGS
)
4371 return MEMORY_MOVE_COST (mode
, to
, 0);
4379 /* Moving between PR registers takes two insns. */
4380 if (from
== PR_REGS
)
4382 /* Moving between PR and anything but GR is impossible. */
4383 if (from
!= GR_REGS
)
4384 return MEMORY_MOVE_COST (mode
, to
, 0);
4388 /* Moving between BR and anything but GR is impossible. */
4389 if (from
!= GR_REGS
&& from
!= GR_AND_BR_REGS
)
4390 return MEMORY_MOVE_COST (mode
, to
, 0);
4395 /* Moving between AR and anything but GR is impossible. */
4396 if (from
!= GR_REGS
)
4397 return MEMORY_MOVE_COST (mode
, to
, 0);
4402 case GR_AND_FR_REGS
:
4403 case GR_AND_BR_REGS
:
4414 /* This function returns the register class required for a secondary
4415 register when copying between one of the registers in CLASS, and X,
4416 using MODE. A return value of NO_REGS means that no secondary register
4420 ia64_secondary_reload_class (class, mode
, x
)
4421 enum reg_class
class;
4422 enum machine_mode mode ATTRIBUTE_UNUSED
;
4427 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
4428 regno
= true_regnum (x
);
4435 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4436 interaction. We end up with two pseudos with overlapping lifetimes
4437 both of which are equiv to the same constant, and both which need
4438 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
4439 changes depending on the path length, which means the qty_first_reg
4440 check in make_regs_eqv can give different answers at different times.
4441 At some point I'll probably need a reload_indi pattern to handle
4444 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4445 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
4446 non-general registers for good measure. */
4447 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
))
4450 /* This is needed if a pseudo used as a call_operand gets spilled to a
4452 if (GET_CODE (x
) == MEM
)
4457 /* Need to go through general registers to get to other class regs. */
4458 if (regno
>= 0 && ! (FR_REGNO_P (regno
) || GENERAL_REGNO_P (regno
)))
4461 /* This can happen when a paradoxical subreg is an operand to the
4463 /* ??? This shouldn't be necessary after instruction scheduling is
4464 enabled, because paradoxical subregs are not accepted by
4465 register_operand when INSN_SCHEDULING is defined. Or alternatively,
4466 stop the paradoxical subreg stupidity in the *_operand functions
4468 if (GET_CODE (x
) == MEM
4469 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
4470 || GET_MODE (x
) == QImode
))
4473 /* This can happen because of the ior/and/etc patterns that accept FP
4474 registers as operands. If the third operand is a constant, then it
4475 needs to be reloaded into a FP register. */
4476 if (GET_CODE (x
) == CONST_INT
)
4479 /* This can happen because of register elimination in a muldi3 insn.
4480 E.g. `26107 * (unsigned long)&u'. */
4481 if (GET_CODE (x
) == PLUS
)
4486 /* ??? This happens if we cse/gcse a BImode value across a call,
4487 and the function has a nonlocal goto. This is because global
4488 does not allocate call crossing pseudos to hard registers when
4489 current_function_has_nonlocal_goto is true. This is relatively
4490 common for C++ programs that use exceptions. To reproduce,
4491 return NO_REGS and compile libstdc++. */
4492 if (GET_CODE (x
) == MEM
)
4495 /* This can happen when we take a BImode subreg of a DImode value,
4496 and that DImode value winds up in some non-GR register. */
4497 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
) && ! PR_REGNO_P (regno
))
4502 /* Since we have no offsettable memory addresses, we need a temporary
4503 to hold the address of the second word. */
4516 /* Emit text to declare externally defined variables and functions, because
4517 the Intel assembler does not support undefined externals. */
4520 ia64_asm_output_external (file
, decl
, name
)
4525 int save_referenced
;
4527 /* GNU as does not need anything here, but the HP linker does need
4528 something for external functions. */
4532 || TREE_CODE (decl
) != FUNCTION_DECL
4533 || strstr(name
, "__builtin_") == name
))
4536 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4537 the linker when we do this, so we need to be careful not to do this for
4538 builtin functions which have no library equivalent. Unfortunately, we
4539 can't tell here whether or not a function will actually be called by
4540 expand_expr, so we pull in library functions even if we may not need
4542 if (! strcmp (name
, "__builtin_next_arg")
4543 || ! strcmp (name
, "alloca")
4544 || ! strcmp (name
, "__builtin_constant_p")
4545 || ! strcmp (name
, "__builtin_args_info"))
4549 ia64_hpux_add_extern_decl (name
);
4552 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4554 save_referenced
= TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
));
4555 if (TREE_CODE (decl
) == FUNCTION_DECL
)
4556 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
4557 (*targetm
.asm_out
.globalize_label
) (file
, name
);
4558 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)) = save_referenced
;
4562 /* Parse the -mfixed-range= option string. */
4565 fix_range (const_str
)
4566 const char *const_str
;
4569 char *str
, *dash
, *comma
;
4571 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4572 REG2 are either register names or register numbers. The effect
4573 of this option is to mark the registers in the range from REG1 to
4574 REG2 as ``fixed'' so they won't be used by the compiler. This is
4575 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4577 i
= strlen (const_str
);
4578 str
= (char *) alloca (i
+ 1);
4579 memcpy (str
, const_str
, i
+ 1);
4583 dash
= strchr (str
, '-');
4586 warning ("value of -mfixed-range must have form REG1-REG2");
4591 comma
= strchr (dash
+ 1, ',');
4595 first
= decode_reg_name (str
);
4598 warning ("unknown register name: %s", str
);
4602 last
= decode_reg_name (dash
+ 1);
4605 warning ("unknown register name: %s", dash
+ 1);
4613 warning ("%s-%s is an empty range", str
, dash
+ 1);
4617 for (i
= first
; i
<= last
; ++i
)
4618 fixed_regs
[i
] = call_used_regs
[i
] = 1;
4628 static struct machine_function
*
4629 ia64_init_machine_status ()
4631 return ggc_alloc_cleared (sizeof (struct machine_function
));
4634 /* Handle TARGET_OPTIONS switches. */
4637 ia64_override_options ()
4641 const char *const name
; /* processor name or nickname. */
4642 const enum processor_type processor
;
4644 const processor_alias_table
[] =
4646 {"itanium", PROCESSOR_ITANIUM
},
4647 {"itanium1", PROCESSOR_ITANIUM
},
4648 {"merced", PROCESSOR_ITANIUM
},
4649 {"itanium2", PROCESSOR_ITANIUM2
},
4650 {"mckinley", PROCESSOR_ITANIUM2
},
4653 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
4656 if (TARGET_AUTO_PIC
)
4657 target_flags
|= MASK_CONST_GP
;
4659 if (TARGET_INLINE_FLOAT_DIV_LAT
&& TARGET_INLINE_FLOAT_DIV_THR
)
4661 warning ("cannot optimize floating point division for both latency and throughput");
4662 target_flags
&= ~MASK_INLINE_FLOAT_DIV_THR
;
4665 if (TARGET_INLINE_INT_DIV_LAT
&& TARGET_INLINE_INT_DIV_THR
)
4667 warning ("cannot optimize integer division for both latency and throughput");
4668 target_flags
&= ~MASK_INLINE_INT_DIV_THR
;
4671 if (ia64_fixed_range_string
)
4672 fix_range (ia64_fixed_range_string
);
4674 if (ia64_tls_size_string
)
4677 unsigned long tmp
= strtoul (ia64_tls_size_string
, &end
, 10);
4678 if (*end
|| (tmp
!= 14 && tmp
!= 22 && tmp
!= 64))
4679 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string
);
4681 ia64_tls_size
= tmp
;
4684 if (!ia64_tune_string
)
4685 ia64_tune_string
= "itanium2";
4687 for (i
= 0; i
< pta_size
; i
++)
4688 if (! strcmp (ia64_tune_string
, processor_alias_table
[i
].name
))
4690 ia64_tune
= processor_alias_table
[i
].processor
;
4695 error ("bad value (%s) for -tune= switch", ia64_tune_string
);
4697 ia64_flag_schedule_insns2
= flag_schedule_insns_after_reload
;
4698 flag_schedule_insns_after_reload
= 0;
4700 ia64_section_threshold
= g_switch_set
? g_switch_value
: IA64_DEFAULT_GVALUE
;
4702 init_machine_status
= ia64_init_machine_status
;
4704 /* Tell the compiler which flavor of TFmode we're using. */
4705 if (INTEL_EXTENDED_IEEE_FORMAT
)
4706 real_format_for_mode
[TFmode
- QFmode
] = &ieee_extended_intel_128_format
;
4709 static enum attr_itanium_class ia64_safe_itanium_class
PARAMS((rtx
));
4710 static enum attr_type ia64_safe_type
PARAMS((rtx
));
4712 static enum attr_itanium_class
4713 ia64_safe_itanium_class (insn
)
4716 if (recog_memoized (insn
) >= 0)
4717 return get_attr_itanium_class (insn
);
4719 return ITANIUM_CLASS_UNKNOWN
;
4722 static enum attr_type
4723 ia64_safe_type (insn
)
4726 if (recog_memoized (insn
) >= 0)
4727 return get_attr_type (insn
);
4729 return TYPE_UNKNOWN
;
4732 /* The following collection of routines emit instruction group stop bits as
4733 necessary to avoid dependencies. */
4735 /* Need to track some additional registers as far as serialization is
4736 concerned so we can properly handle br.call and br.ret. We could
4737 make these registers visible to gcc, but since these registers are
4738 never explicitly used in gcc generated code, it seems wasteful to
4739 do so (plus it would make the call and return patterns needlessly
4741 #define REG_GP (GR_REG (1))
4742 #define REG_RP (BR_REG (0))
4743 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4744 /* This is used for volatile asms which may require a stop bit immediately
4745 before and after them. */
4746 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4747 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4748 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4750 /* For each register, we keep track of how it has been written in the
4751 current instruction group.
4753 If a register is written unconditionally (no qualifying predicate),
4754 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4756 If a register is written if its qualifying predicate P is true, we
4757 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4758 may be written again by the complement of P (P^1) and when this happens,
4759 WRITE_COUNT gets set to 2.
4761 The result of this is that whenever an insn attempts to write a register
4762 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4764 If a predicate register is written by a floating-point insn, we set
4765 WRITTEN_BY_FP to true.
4767 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4768 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4770 struct reg_write_state
4772 unsigned int write_count
: 2;
4773 unsigned int first_pred
: 16;
4774 unsigned int written_by_fp
: 1;
4775 unsigned int written_by_and
: 1;
4776 unsigned int written_by_or
: 1;
4779 /* Cumulative info for the current instruction group. */
4780 struct reg_write_state rws_sum
[NUM_REGS
];
4781 /* Info for the current instruction. This gets copied to rws_sum after a
4782 stop bit is emitted. */
4783 struct reg_write_state rws_insn
[NUM_REGS
];
4785 /* Indicates whether this is the first instruction after a stop bit,
4786 in which case we don't need another stop bit. Without this, we hit
4787 the abort in ia64_variable_issue when scheduling an alloc. */
4788 static int first_instruction
;
4790 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4791 RTL for one instruction. */
4794 unsigned int is_write
: 1; /* Is register being written? */
4795 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
4796 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
4797 unsigned int is_and
: 1; /* Is register used as part of and.orcm? */
4798 unsigned int is_or
: 1; /* Is register used as part of or.andcm? */
4799 unsigned int is_sibcall
: 1; /* Is this a sibling or normal call? */
4802 static void rws_update
PARAMS ((struct reg_write_state
*, int,
4803 struct reg_flags
, int));
4804 static int rws_access_regno
PARAMS ((int, struct reg_flags
, int));
4805 static int rws_access_reg
PARAMS ((rtx
, struct reg_flags
, int));
4806 static void update_set_flags
PARAMS ((rtx
, struct reg_flags
*, int *, rtx
*));
4807 static int set_src_needs_barrier
PARAMS ((rtx
, struct reg_flags
, int, rtx
));
4808 static int rtx_needs_barrier
PARAMS ((rtx
, struct reg_flags
, int));
4809 static void init_insn_group_barriers
PARAMS ((void));
4810 static int group_barrier_needed_p
PARAMS ((rtx
));
4811 static int safe_group_barrier_needed_p
PARAMS ((rtx
));
4813 /* Update *RWS for REGNO, which is being written by the current instruction,
4814 with predicate PRED, and associated register flags in FLAGS. */
4817 rws_update (rws
, regno
, flags
, pred
)
4818 struct reg_write_state
*rws
;
4820 struct reg_flags flags
;
4824 rws
[regno
].write_count
++;
4826 rws
[regno
].write_count
= 2;
4827 rws
[regno
].written_by_fp
|= flags
.is_fp
;
4828 /* ??? Not tracking and/or across differing predicates. */
4829 rws
[regno
].written_by_and
= flags
.is_and
;
4830 rws
[regno
].written_by_or
= flags
.is_or
;
4831 rws
[regno
].first_pred
= pred
;
4834 /* Handle an access to register REGNO of type FLAGS using predicate register
4835 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4836 a dependency with an earlier instruction in the same group. */
4839 rws_access_regno (regno
, flags
, pred
)
4841 struct reg_flags flags
;
4844 int need_barrier
= 0;
4846 if (regno
>= NUM_REGS
)
4849 if (! PR_REGNO_P (regno
))
4850 flags
.is_and
= flags
.is_or
= 0;
4856 /* One insn writes same reg multiple times? */
4857 if (rws_insn
[regno
].write_count
> 0)
4860 /* Update info for current instruction. */
4861 rws_update (rws_insn
, regno
, flags
, pred
);
4862 write_count
= rws_sum
[regno
].write_count
;
4864 switch (write_count
)
4867 /* The register has not been written yet. */
4868 rws_update (rws_sum
, regno
, flags
, pred
);
4872 /* The register has been written via a predicate. If this is
4873 not a complementary predicate, then we need a barrier. */
4874 /* ??? This assumes that P and P+1 are always complementary
4875 predicates for P even. */
4876 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4878 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4880 else if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
4882 rws_update (rws_sum
, regno
, flags
, pred
);
4886 /* The register has been unconditionally written already. We
4888 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4890 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4894 rws_sum
[regno
].written_by_and
= flags
.is_and
;
4895 rws_sum
[regno
].written_by_or
= flags
.is_or
;
4904 if (flags
.is_branch
)
4906 /* Branches have several RAW exceptions that allow to avoid
4909 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== AR_PFS_REGNUM
)
4910 /* RAW dependencies on branch regs are permissible as long
4911 as the writer is a non-branch instruction. Since we
4912 never generate code that uses a branch register written
4913 by a branch instruction, handling this case is
4917 if (REGNO_REG_CLASS (regno
) == PR_REGS
4918 && ! rws_sum
[regno
].written_by_fp
)
4919 /* The predicates of a branch are available within the
4920 same insn group as long as the predicate was written by
4921 something other than a floating-point instruction. */
4925 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4927 if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4930 switch (rws_sum
[regno
].write_count
)
4933 /* The register has not been written yet. */
4937 /* The register has been written via a predicate. If this is
4938 not a complementary predicate, then we need a barrier. */
4939 /* ??? This assumes that P and P+1 are always complementary
4940 predicates for P even. */
4941 if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
4946 /* The register has been unconditionally written already. We
4956 return need_barrier
;
4960 rws_access_reg (reg
, flags
, pred
)
4962 struct reg_flags flags
;
4965 int regno
= REGNO (reg
);
4966 int n
= HARD_REGNO_NREGS (REGNO (reg
), GET_MODE (reg
));
4969 return rws_access_regno (regno
, flags
, pred
);
4972 int need_barrier
= 0;
4974 need_barrier
|= rws_access_regno (regno
+ n
, flags
, pred
);
4975 return need_barrier
;
4979 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4980 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4983 update_set_flags (x
, pflags
, ppred
, pcond
)
4985 struct reg_flags
*pflags
;
4989 rtx src
= SET_SRC (x
);
4993 switch (GET_CODE (src
))
4999 if (SET_DEST (x
) == pc_rtx
)
5000 /* X is a conditional branch. */
5004 int is_complemented
= 0;
5006 /* X is a conditional move. */
5007 rtx cond
= XEXP (src
, 0);
5008 if (GET_CODE (cond
) == EQ
)
5009 is_complemented
= 1;
5010 cond
= XEXP (cond
, 0);
5011 if (GET_CODE (cond
) != REG
5012 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
5015 if (XEXP (src
, 1) == SET_DEST (x
)
5016 || XEXP (src
, 2) == SET_DEST (x
))
5018 /* X is a conditional move that conditionally writes the
5021 /* We need another complement in this case. */
5022 if (XEXP (src
, 1) == SET_DEST (x
))
5023 is_complemented
= ! is_complemented
;
5025 *ppred
= REGNO (cond
);
5026 if (is_complemented
)
5030 /* ??? If this is a conditional write to the dest, then this
5031 instruction does not actually read one source. This probably
5032 doesn't matter, because that source is also the dest. */
5033 /* ??? Multiple writes to predicate registers are allowed
5034 if they are all AND type compares, or if they are all OR
5035 type compares. We do not generate such instructions
5038 /* ... fall through ... */
5041 if (GET_RTX_CLASS (GET_CODE (src
)) == '<'
5042 && GET_MODE_CLASS (GET_MODE (XEXP (src
, 0))) == MODE_FLOAT
)
5043 /* Set pflags->is_fp to 1 so that we know we're dealing
5044 with a floating point comparison when processing the
5045 destination of the SET. */
5048 /* Discover if this is a parallel comparison. We only handle
5049 and.orcm and or.andcm at present, since we must retain a
5050 strict inverse on the predicate pair. */
5051 else if (GET_CODE (src
) == AND
)
5053 else if (GET_CODE (src
) == IOR
)
5060 /* Subroutine of rtx_needs_barrier; this function determines whether the
5061 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5062 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5066 set_src_needs_barrier (x
, flags
, pred
, cond
)
5068 struct reg_flags flags
;
5072 int need_barrier
= 0;
5074 rtx src
= SET_SRC (x
);
5076 if (GET_CODE (src
) == CALL
)
5077 /* We don't need to worry about the result registers that
5078 get written by subroutine call. */
5079 return rtx_needs_barrier (src
, flags
, pred
);
5080 else if (SET_DEST (x
) == pc_rtx
)
5082 /* X is a conditional branch. */
5083 /* ??? This seems redundant, as the caller sets this bit for
5085 flags
.is_branch
= 1;
5086 return rtx_needs_barrier (src
, flags
, pred
);
5089 need_barrier
= rtx_needs_barrier (src
, flags
, pred
);
5091 /* This instruction unconditionally uses a predicate register. */
5093 need_barrier
|= rws_access_reg (cond
, flags
, 0);
5096 if (GET_CODE (dst
) == ZERO_EXTRACT
)
5098 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
5099 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
5100 dst
= XEXP (dst
, 0);
5102 return need_barrier
;
5105 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
5106 Return 1 is this access creates a dependency with an earlier instruction
5107 in the same group. */
5110 rtx_needs_barrier (x
, flags
, pred
)
5112 struct reg_flags flags
;
5116 int is_complemented
= 0;
5117 int need_barrier
= 0;
5118 const char *format_ptr
;
5119 struct reg_flags new_flags
;
5127 switch (GET_CODE (x
))
5130 update_set_flags (x
, &new_flags
, &pred
, &cond
);
5131 need_barrier
= set_src_needs_barrier (x
, new_flags
, pred
, cond
);
5132 if (GET_CODE (SET_SRC (x
)) != CALL
)
5134 new_flags
.is_write
= 1;
5135 need_barrier
|= rtx_needs_barrier (SET_DEST (x
), new_flags
, pred
);
5140 new_flags
.is_write
= 0;
5141 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
5143 /* Avoid multiple register writes, in case this is a pattern with
5144 multiple CALL rtx. This avoids an abort in rws_access_reg. */
5145 if (! flags
.is_sibcall
&& ! rws_insn
[REG_AR_CFM
].write_count
)
5147 new_flags
.is_write
= 1;
5148 need_barrier
|= rws_access_regno (REG_RP
, new_flags
, pred
);
5149 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, new_flags
, pred
);
5150 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
5155 /* X is a predicated instruction. */
5157 cond
= COND_EXEC_TEST (x
);
5160 need_barrier
= rtx_needs_barrier (cond
, flags
, 0);
5162 if (GET_CODE (cond
) == EQ
)
5163 is_complemented
= 1;
5164 cond
= XEXP (cond
, 0);
5165 if (GET_CODE (cond
) != REG
5166 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
5168 pred
= REGNO (cond
);
5169 if (is_complemented
)
5172 need_barrier
|= rtx_needs_barrier (COND_EXEC_CODE (x
), flags
, pred
);
5173 return need_barrier
;
5177 /* Clobber & use are for earlier compiler-phases only. */
5182 /* We always emit stop bits for traditional asms. We emit stop bits
5183 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
5184 if (GET_CODE (x
) != ASM_OPERANDS
5185 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
5187 /* Avoid writing the register multiple times if we have multiple
5188 asm outputs. This avoids an abort in rws_access_reg. */
5189 if (! rws_insn
[REG_VOLATILE
].write_count
)
5191 new_flags
.is_write
= 1;
5192 rws_access_regno (REG_VOLATILE
, new_flags
, pred
);
5197 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5198 We can not just fall through here since then we would be confused
5199 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5200 traditional asms unlike their normal usage. */
5202 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
5203 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
5208 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
5210 rtx pat
= XVECEXP (x
, 0, i
);
5211 if (GET_CODE (pat
) == SET
)
5213 update_set_flags (pat
, &new_flags
, &pred
, &cond
);
5214 need_barrier
|= set_src_needs_barrier (pat
, new_flags
, pred
, cond
);
5216 else if (GET_CODE (pat
) == USE
5217 || GET_CODE (pat
) == CALL
5218 || GET_CODE (pat
) == ASM_OPERANDS
)
5219 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
5220 else if (GET_CODE (pat
) != CLOBBER
&& GET_CODE (pat
) != RETURN
)
5223 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
5225 rtx pat
= XVECEXP (x
, 0, i
);
5226 if (GET_CODE (pat
) == SET
)
5228 if (GET_CODE (SET_SRC (pat
)) != CALL
)
5230 new_flags
.is_write
= 1;
5231 need_barrier
|= rtx_needs_barrier (SET_DEST (pat
), new_flags
,
5235 else if (GET_CODE (pat
) == CLOBBER
|| GET_CODE (pat
) == RETURN
)
5236 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
5244 if (REGNO (x
) == AR_UNAT_REGNUM
)
5246 for (i
= 0; i
< 64; ++i
)
5247 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ i
, flags
, pred
);
5250 need_barrier
= rws_access_reg (x
, flags
, pred
);
5254 /* Find the regs used in memory address computation. */
5255 new_flags
.is_write
= 0;
5256 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
5259 case CONST_INT
: case CONST_DOUBLE
:
5260 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
5263 /* Operators with side-effects. */
5264 case POST_INC
: case POST_DEC
:
5265 if (GET_CODE (XEXP (x
, 0)) != REG
)
5268 new_flags
.is_write
= 0;
5269 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
5270 new_flags
.is_write
= 1;
5271 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
5275 if (GET_CODE (XEXP (x
, 0)) != REG
)
5278 new_flags
.is_write
= 0;
5279 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
5280 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
5281 new_flags
.is_write
= 1;
5282 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
5285 /* Handle common unary and binary ops for efficiency. */
5286 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
5287 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
5288 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
5289 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
5290 case NE
: case EQ
: case GE
: case GT
: case LE
:
5291 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
5292 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
5293 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
5296 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
5297 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
5298 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
5299 case SQRT
: case FFS
: case POPCOUNT
:
5300 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
5304 switch (XINT (x
, 1))
5306 case UNSPEC_LTOFF_DTPMOD
:
5307 case UNSPEC_LTOFF_DTPREL
:
5309 case UNSPEC_LTOFF_TPREL
:
5311 case UNSPEC_PRED_REL_MUTEX
:
5312 case UNSPEC_PIC_CALL
:
5314 case UNSPEC_FETCHADD_ACQ
:
5315 case UNSPEC_BSP_VALUE
:
5316 case UNSPEC_FLUSHRS
:
5317 case UNSPEC_BUNDLE_SELECTOR
:
5320 case UNSPEC_GR_SPILL
:
5321 case UNSPEC_GR_RESTORE
:
5323 HOST_WIDE_INT offset
= INTVAL (XVECEXP (x
, 0, 1));
5324 HOST_WIDE_INT bit
= (offset
>> 3) & 63;
5326 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
5327 new_flags
.is_write
= (XINT (x
, 1) == 1);
5328 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ bit
,
5333 case UNSPEC_FR_SPILL
:
5334 case UNSPEC_FR_RESTORE
:
5335 case UNSPEC_GETF_EXP
:
5337 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
5340 case UNSPEC_FR_RECIP_APPROX
:
5341 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
5342 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
5345 case UNSPEC_CMPXCHG_ACQ
:
5346 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
5347 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 2), flags
, pred
);
5355 case UNSPEC_VOLATILE
:
5356 switch (XINT (x
, 1))
5359 /* Alloc must always be the first instruction of a group.
5360 We force this by always returning true. */
5361 /* ??? We might get better scheduling if we explicitly check for
5362 input/local/output register dependencies, and modify the
5363 scheduler so that alloc is always reordered to the start of
5364 the current group. We could then eliminate all of the
5365 first_instruction code. */
5366 rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
5368 new_flags
.is_write
= 1;
5369 rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
5372 case UNSPECV_SET_BSP
:
5376 case UNSPECV_BLOCKAGE
:
5377 case UNSPECV_INSN_GROUP_BARRIER
:
5379 case UNSPECV_PSAC_ALL
:
5380 case UNSPECV_PSAC_NORMAL
:
5389 new_flags
.is_write
= 0;
5390 need_barrier
= rws_access_regno (REG_RP
, flags
, pred
);
5391 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
5393 new_flags
.is_write
= 1;
5394 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
5395 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
5399 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
5400 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
5401 switch (format_ptr
[i
])
5403 case '0': /* unused field */
5404 case 'i': /* integer */
5405 case 'n': /* note */
5406 case 'w': /* wide integer */
5407 case 's': /* pointer to string */
5408 case 'S': /* optional pointer to string */
5412 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
5417 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
5418 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
5427 return need_barrier
;
5430 /* Clear out the state for group_barrier_needed_p at the start of a
5431 sequence of insns. */
5434 init_insn_group_barriers ()
5436 memset (rws_sum
, 0, sizeof (rws_sum
));
5437 first_instruction
= 1;
5440 /* Given the current state, recorded by previous calls to this function,
5441 determine whether a group barrier (a stop bit) is necessary before INSN.
5442 Return nonzero if so. */
5445 group_barrier_needed_p (insn
)
5449 int need_barrier
= 0;
5450 struct reg_flags flags
;
5452 memset (&flags
, 0, sizeof (flags
));
5453 switch (GET_CODE (insn
))
5459 /* A barrier doesn't imply an instruction group boundary. */
5463 memset (rws_insn
, 0, sizeof (rws_insn
));
5467 flags
.is_branch
= 1;
5468 flags
.is_sibcall
= SIBLING_CALL_P (insn
);
5469 memset (rws_insn
, 0, sizeof (rws_insn
));
5471 /* Don't bundle a call following another call. */
5472 if ((pat
= prev_active_insn (insn
))
5473 && GET_CODE (pat
) == CALL_INSN
)
5479 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
5483 flags
.is_branch
= 1;
5485 /* Don't bundle a jump following a call. */
5486 if ((pat
= prev_active_insn (insn
))
5487 && GET_CODE (pat
) == CALL_INSN
)
5495 if (GET_CODE (PATTERN (insn
)) == USE
5496 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
5497 /* Don't care about USE and CLOBBER "insns"---those are used to
5498 indicate to the optimizer that it shouldn't get rid of
5499 certain operations. */
5502 pat
= PATTERN (insn
);
5504 /* Ug. Hack hacks hacked elsewhere. */
5505 switch (recog_memoized (insn
))
5507 /* We play dependency tricks with the epilogue in order
5508 to get proper schedules. Undo this for dv analysis. */
5509 case CODE_FOR_epilogue_deallocate_stack
:
5510 case CODE_FOR_prologue_allocate_stack
:
5511 pat
= XVECEXP (pat
, 0, 0);
5514 /* The pattern we use for br.cloop confuses the code above.
5515 The second element of the vector is representative. */
5516 case CODE_FOR_doloop_end_internal
:
5517 pat
= XVECEXP (pat
, 0, 1);
5520 /* Doesn't generate code. */
5521 case CODE_FOR_pred_rel_mutex
:
5522 case CODE_FOR_prologue_use
:
5529 memset (rws_insn
, 0, sizeof (rws_insn
));
5530 need_barrier
= rtx_needs_barrier (pat
, flags
, 0);
5532 /* Check to see if the previous instruction was a volatile
5535 need_barrier
= rws_access_regno (REG_VOLATILE
, flags
, 0);
5542 if (first_instruction
&& INSN_P (insn
)
5543 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
5544 && GET_CODE (PATTERN (insn
)) != USE
5545 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
5548 first_instruction
= 0;
5551 return need_barrier
;
5554 /* Like group_barrier_needed_p, but do not clobber the current state. */
5557 safe_group_barrier_needed_p (insn
)
5560 struct reg_write_state rws_saved
[NUM_REGS
];
5561 int saved_first_instruction
;
5564 memcpy (rws_saved
, rws_sum
, NUM_REGS
* sizeof *rws_saved
);
5565 saved_first_instruction
= first_instruction
;
5567 t
= group_barrier_needed_p (insn
);
5569 memcpy (rws_sum
, rws_saved
, NUM_REGS
* sizeof *rws_saved
);
5570 first_instruction
= saved_first_instruction
;
5575 /* Scan the current function and insert stop bits as necessary to
5576 eliminate dependencies. This function assumes that a final
5577 instruction scheduling pass has been run which has already
5578 inserted most of the necessary stop bits. This function only
5579 inserts new ones at basic block boundaries, since these are
5580 invisible to the scheduler. */
5583 emit_insn_group_barriers (dump
)
5588 int insns_since_last_label
= 0;
5590 init_insn_group_barriers ();
5592 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
5594 if (GET_CODE (insn
) == CODE_LABEL
)
5596 if (insns_since_last_label
)
5598 insns_since_last_label
= 0;
5600 else if (GET_CODE (insn
) == NOTE
5601 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
5603 if (insns_since_last_label
)
5605 insns_since_last_label
= 0;
5607 else if (GET_CODE (insn
) == INSN
5608 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
5609 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
5611 init_insn_group_barriers ();
5614 else if (INSN_P (insn
))
5616 insns_since_last_label
= 1;
5618 if (group_barrier_needed_p (insn
))
5623 fprintf (dump
, "Emitting stop before label %d\n",
5624 INSN_UID (last_label
));
5625 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label
);
5628 init_insn_group_barriers ();
5636 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5637 This function has to emit all necessary group barriers. */
5640 emit_all_insn_group_barriers (dump
)
5641 FILE *dump ATTRIBUTE_UNUSED
;
5645 init_insn_group_barriers ();
5647 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
5649 if (GET_CODE (insn
) == BARRIER
)
5651 rtx last
= prev_active_insn (insn
);
5655 if (GET_CODE (last
) == JUMP_INSN
5656 && GET_CODE (PATTERN (last
)) == ADDR_DIFF_VEC
)
5657 last
= prev_active_insn (last
);
5658 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
5659 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
5661 init_insn_group_barriers ();
5663 else if (INSN_P (insn
))
5665 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
5666 init_insn_group_barriers ();
5667 else if (group_barrier_needed_p (insn
))
5669 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5670 init_insn_group_barriers ();
5671 group_barrier_needed_p (insn
);
5678 static int errata_find_address_regs
PARAMS ((rtx
*, void *));
5679 static void errata_emit_nops
PARAMS ((rtx
));
5680 static void fixup_errata
PARAMS ((void));
5682 /* This structure is used to track some details about the previous insns
5683 groups so we can determine if it may be necessary to insert NOPs to
5684 workaround hardware errata. */
5687 HARD_REG_SET p_reg_set
;
5688 HARD_REG_SET gr_reg_conditionally_set
;
5691 /* Index into the last_group array. */
5692 static int group_idx
;
5694 /* Called through for_each_rtx; determines if a hard register that was
5695 conditionally set in the previous group is used as an address register.
5696 It ensures that for_each_rtx returns 1 in that case. */
5698 errata_find_address_regs (xp
, data
)
5700 void *data ATTRIBUTE_UNUSED
;
5703 if (GET_CODE (x
) != MEM
)
5706 if (GET_CODE (x
) == POST_MODIFY
)
5708 if (GET_CODE (x
) == REG
)
5710 struct group
*prev_group
= last_group
+ (group_idx
^ 1);
5711 if (TEST_HARD_REG_BIT (prev_group
->gr_reg_conditionally_set
,
5719 /* Called for each insn; this function keeps track of the state in
5720 last_group and emits additional NOPs if necessary to work around
5721 an Itanium A/B step erratum. */
5723 errata_emit_nops (insn
)
5726 struct group
*this_group
= last_group
+ group_idx
;
5727 struct group
*prev_group
= last_group
+ (group_idx
^ 1);
5728 rtx pat
= PATTERN (insn
);
5729 rtx cond
= GET_CODE (pat
) == COND_EXEC
? COND_EXEC_TEST (pat
) : 0;
5730 rtx real_pat
= cond
? COND_EXEC_CODE (pat
) : pat
;
5731 enum attr_type type
;
5734 if (GET_CODE (real_pat
) == USE
5735 || GET_CODE (real_pat
) == CLOBBER
5736 || GET_CODE (real_pat
) == ASM_INPUT
5737 || GET_CODE (real_pat
) == ADDR_VEC
5738 || GET_CODE (real_pat
) == ADDR_DIFF_VEC
5739 || asm_noperands (PATTERN (insn
)) >= 0)
5742 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5745 if (GET_CODE (set
) == PARALLEL
)
5748 set
= XVECEXP (real_pat
, 0, 0);
5749 for (i
= 1; i
< XVECLEN (real_pat
, 0); i
++)
5750 if (GET_CODE (XVECEXP (real_pat
, 0, i
)) != USE
5751 && GET_CODE (XVECEXP (real_pat
, 0, i
)) != CLOBBER
)
5758 if (set
&& GET_CODE (set
) != SET
)
5761 type
= get_attr_type (insn
);
5764 && set
&& REG_P (SET_DEST (set
)) && PR_REGNO_P (REGNO (SET_DEST (set
))))
5765 SET_HARD_REG_BIT (this_group
->p_reg_set
, REGNO (SET_DEST (set
)));
5767 if ((type
== TYPE_M
|| type
== TYPE_A
) && cond
&& set
5768 && REG_P (SET_DEST (set
))
5769 && GET_CODE (SET_SRC (set
)) != PLUS
5770 && GET_CODE (SET_SRC (set
)) != MINUS
5771 && (GET_CODE (SET_SRC (set
)) != ASHIFT
5772 || !shladd_operand (XEXP (SET_SRC (set
), 1), VOIDmode
))
5773 && (GET_CODE (SET_SRC (set
)) != MEM
5774 || GET_CODE (XEXP (SET_SRC (set
), 0)) != POST_MODIFY
)
5775 && GENERAL_REGNO_P (REGNO (SET_DEST (set
))))
5777 if (GET_RTX_CLASS (GET_CODE (cond
)) != '<'
5778 || ! REG_P (XEXP (cond
, 0)))
5781 if (TEST_HARD_REG_BIT (prev_group
->p_reg_set
, REGNO (XEXP (cond
, 0))))
5782 SET_HARD_REG_BIT (this_group
->gr_reg_conditionally_set
, REGNO (SET_DEST (set
)));
5784 if (for_each_rtx (&real_pat
, errata_find_address_regs
, NULL
))
5786 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5787 emit_insn_before (gen_nop (), insn
);
5788 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5790 memset (last_group
, 0, sizeof last_group
);
5794 /* Emit extra nops if they are required to work around hardware errata. */
5801 if (! TARGET_B_STEP
)
5805 memset (last_group
, 0, sizeof last_group
);
5807 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
5812 if (ia64_safe_type (insn
) == TYPE_S
)
5815 memset (last_group
+ group_idx
, 0, sizeof last_group
[group_idx
]);
5818 errata_emit_nops (insn
);
5823 /* Instruction scheduling support. */
5825 #define NR_BUNDLES 10
5827 /* A list of names of all available bundles. */
5829 static const char *bundle_name
[NR_BUNDLES
] =
5835 #if NR_BUNDLES == 10
5845 /* Nonzero if we should insert stop bits into the schedule. */
5847 int ia64_final_schedule
= 0;
5849 /* Codes of the corresponding quieryied units: */
5851 static int _0mii_
, _0mmi_
, _0mfi_
, _0mmf_
;
5852 static int _0bbb_
, _0mbb_
, _0mib_
, _0mmb_
, _0mfb_
, _0mlx_
;
5854 static int _1mii_
, _1mmi_
, _1mfi_
, _1mmf_
;
5855 static int _1bbb_
, _1mbb_
, _1mib_
, _1mmb_
, _1mfb_
, _1mlx_
;
5857 static int pos_1
, pos_2
, pos_3
, pos_4
, pos_5
, pos_6
;
5859 /* The following variable value is an insn group barrier. */
5861 static rtx dfa_stop_insn
;
5863 /* The following variable value is the last issued insn. */
5865 static rtx last_scheduled_insn
;
5867 /* The following variable value is size of the DFA state. */
5869 static size_t dfa_state_size
;
5871 /* The following variable value is pointer to a DFA state used as
5872 temporary variable. */
5874 static state_t temp_dfa_state
= NULL
;
5876 /* The following variable value is DFA state after issuing the last
5879 static state_t prev_cycle_state
= NULL
;
5881 /* The following array element values are TRUE if the corresponding
5882 insn requires to add stop bits before it. */
5884 static char *stops_p
;
5886 /* The following variable is used to set up the mentioned above array. */
5888 static int stop_before_p
= 0;
5890 /* The following variable value is length of the arrays `clocks' and
5893 static int clocks_length
;
5895 /* The following array element values are cycles on which the
5896 corresponding insn will be issued. The array is used only for
5901 /* The following array element values are numbers of cycles should be
5902 added to improve insn scheduling for MM_insns for Itanium1. */
5904 static int *add_cycles
;
5906 static rtx ia64_single_set
PARAMS ((rtx
));
5907 static void ia64_emit_insn_before
PARAMS ((rtx
, rtx
));
5909 /* Map a bundle number to its pseudo-op. */
5915 return bundle_name
[b
];
5919 /* Return the maximum number of instructions a cpu can issue. */
5927 /* Helper function - like single_set, but look inside COND_EXEC. */
5930 ia64_single_set (insn
)
5933 rtx x
= PATTERN (insn
), ret
;
5934 if (GET_CODE (x
) == COND_EXEC
)
5935 x
= COND_EXEC_CODE (x
);
5936 if (GET_CODE (x
) == SET
)
5939 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5940 Although they are not classical single set, the second set is there just
5941 to protect it from moving past FP-relative stack accesses. */
5942 switch (recog_memoized (insn
))
5944 case CODE_FOR_prologue_allocate_stack
:
5945 case CODE_FOR_epilogue_deallocate_stack
:
5946 ret
= XVECEXP (x
, 0, 0);
5950 ret
= single_set_2 (insn
, x
);
5957 /* Adjust the cost of a scheduling dependency. Return the new cost of
5958 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5961 ia64_adjust_cost (insn
, link
, dep_insn
, cost
)
5962 rtx insn
, link
, dep_insn
;
5965 enum attr_itanium_class dep_class
;
5966 enum attr_itanium_class insn_class
;
5968 if (REG_NOTE_KIND (link
) != REG_DEP_OUTPUT
)
5971 insn_class
= ia64_safe_itanium_class (insn
);
5972 dep_class
= ia64_safe_itanium_class (dep_insn
);
5973 if (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
5974 || insn_class
== ITANIUM_CLASS_ST
|| insn_class
== ITANIUM_CLASS_STF
)
5980 /* Like emit_insn_before, but skip cycle_display notes.
5981 ??? When cycle display notes are implemented, update this. */
5984 ia64_emit_insn_before (insn
, before
)
5987 emit_insn_before (insn
, before
);
5990 /* The following function marks insns who produce addresses for load
5991 and store insns. Such insns will be placed into M slots because it
5992 decrease latency time for Itanium1 (see function
5993 `ia64_produce_address_p' and the DFA descriptions). */
5996 ia64_dependencies_evaluation_hook (head
, tail
)
5999 rtx insn
, link
, next
, next_tail
;
6001 next_tail
= NEXT_INSN (tail
);
6002 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
6005 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
6007 && ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IALU
)
6009 for (link
= INSN_DEPEND (insn
); link
!= 0; link
= XEXP (link
, 1))
6011 next
= XEXP (link
, 0);
6012 if ((ia64_safe_itanium_class (next
) == ITANIUM_CLASS_ST
6013 || ia64_safe_itanium_class (next
) == ITANIUM_CLASS_STF
)
6014 && ia64_st_address_bypass_p (insn
, next
))
6016 else if ((ia64_safe_itanium_class (next
) == ITANIUM_CLASS_LD
6017 || ia64_safe_itanium_class (next
)
6018 == ITANIUM_CLASS_FLD
)
6019 && ia64_ld_address_bypass_p (insn
, next
))
6022 insn
->call
= link
!= 0;
6026 /* We're beginning a new block. Initialize data structures as necessary. */
6029 ia64_sched_init (dump
, sched_verbose
, max_ready
)
6030 FILE *dump ATTRIBUTE_UNUSED
;
6031 int sched_verbose ATTRIBUTE_UNUSED
;
6032 int max_ready ATTRIBUTE_UNUSED
;
6034 #ifdef ENABLE_CHECKING
6037 if (reload_completed
)
6038 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
6039 insn
!= current_sched_info
->next_tail
;
6040 insn
= NEXT_INSN (insn
))
6041 if (SCHED_GROUP_P (insn
))
6044 last_scheduled_insn
= NULL_RTX
;
6045 init_insn_group_barriers ();
6048 /* We are about to being issuing insns for this clock cycle.
6049 Override the default sort algorithm to better slot instructions. */
6052 ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
,
6053 clock_var
, reorder_type
)
6058 int clock_var ATTRIBUTE_UNUSED
;
6062 int n_ready
= *pn_ready
;
6063 rtx
*e_ready
= ready
+ n_ready
;
6067 fprintf (dump
, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type
);
6069 if (reorder_type
== 0)
6071 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6073 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
6074 if (insnp
< e_ready
)
6077 enum attr_type t
= ia64_safe_type (insn
);
6078 if (t
== TYPE_UNKNOWN
)
6080 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6081 || asm_noperands (PATTERN (insn
)) >= 0)
6083 rtx lowest
= ready
[n_asms
];
6084 ready
[n_asms
] = insn
;
6090 rtx highest
= ready
[n_ready
- 1];
6091 ready
[n_ready
- 1] = insn
;
6098 if (n_asms
< n_ready
)
6100 /* Some normal insns to process. Skip the asms. */
6104 else if (n_ready
> 0)
6108 if (ia64_final_schedule
)
6111 int nr_need_stop
= 0;
6113 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
6114 if (safe_group_barrier_needed_p (*insnp
))
6117 if (reorder_type
== 1 && n_ready
== nr_need_stop
)
6119 if (reorder_type
== 0)
6122 /* Move down everything that needs a stop bit, preserving
6124 while (insnp
-- > ready
+ deleted
)
6125 while (insnp
>= ready
+ deleted
)
6128 if (! safe_group_barrier_needed_p (insn
))
6130 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
6141 /* We are about to being issuing insns for this clock cycle. Override
6142 the default sort algorithm to better slot instructions. */
6145 ia64_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
, clock_var
)
6152 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
,
6153 pn_ready
, clock_var
, 0);
6156 /* Like ia64_sched_reorder, but called after issuing each insn.
6157 Override the default sort algorithm to better slot instructions. */
6160 ia64_sched_reorder2 (dump
, sched_verbose
, ready
, pn_ready
, clock_var
)
6161 FILE *dump ATTRIBUTE_UNUSED
;
6162 int sched_verbose ATTRIBUTE_UNUSED
;
6167 if (ia64_tune
== PROCESSOR_ITANIUM
&& reload_completed
&& last_scheduled_insn
)
6168 clocks
[INSN_UID (last_scheduled_insn
)] = clock_var
;
6169 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
,
6173 /* We are about to issue INSN. Return the number of insns left on the
6174 ready queue that can be issued this cycle. */
6177 ia64_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
6178 FILE *dump ATTRIBUTE_UNUSED
;
6179 int sched_verbose ATTRIBUTE_UNUSED
;
6180 rtx insn ATTRIBUTE_UNUSED
;
6181 int can_issue_more ATTRIBUTE_UNUSED
;
6183 last_scheduled_insn
= insn
;
6184 memcpy (prev_cycle_state
, curr_state
, dfa_state_size
);
6185 if (reload_completed
)
6187 if (group_barrier_needed_p (insn
))
6189 if (GET_CODE (insn
) == CALL_INSN
)
6190 init_insn_group_barriers ();
6191 stops_p
[INSN_UID (insn
)] = stop_before_p
;
6197 /* We are choosing insn from the ready queue. Return nonzero if INSN
6201 ia64_first_cycle_multipass_dfa_lookahead_guard (insn
)
6204 if (insn
== NULL_RTX
|| !INSN_P (insn
))
6206 return (!reload_completed
6207 || !safe_group_barrier_needed_p (insn
));
6210 /* The following variable value is pseudo-insn used by the DFA insn
6211 scheduler to change the DFA state when the simulated clock is
6214 static rtx dfa_pre_cycle_insn
;
6216 /* We are about to being issuing INSN. Return nonzero if we can not
6217 issue it on given cycle CLOCK and return zero if we should not sort
6218 the ready queue on the next clock start. */
6221 ia64_dfa_new_cycle (dump
, verbose
, insn
, last_clock
, clock
, sort_p
)
6225 int last_clock
, clock
;
6228 int setup_clocks_p
= FALSE
;
6230 if (insn
== NULL_RTX
|| !INSN_P (insn
))
6232 if ((reload_completed
&& safe_group_barrier_needed_p (insn
))
6233 || (last_scheduled_insn
6234 && (GET_CODE (last_scheduled_insn
) == CALL_INSN
6235 || GET_CODE (PATTERN (last_scheduled_insn
)) == ASM_INPUT
6236 || asm_noperands (PATTERN (last_scheduled_insn
)) >= 0)))
6238 init_insn_group_barriers ();
6239 if (verbose
&& dump
)
6240 fprintf (dump
, "// Stop should be before %d%s\n", INSN_UID (insn
),
6241 last_clock
== clock
? " + cycle advance" : "");
6243 if (last_clock
== clock
)
6245 state_transition (curr_state
, dfa_stop_insn
);
6246 if (TARGET_EARLY_STOP_BITS
)
6247 *sort_p
= (last_scheduled_insn
== NULL_RTX
6248 || GET_CODE (last_scheduled_insn
) != CALL_INSN
);
6253 else if (reload_completed
)
6254 setup_clocks_p
= TRUE
;
6255 memcpy (curr_state
, prev_cycle_state
, dfa_state_size
);
6256 state_transition (curr_state
, dfa_stop_insn
);
6257 state_transition (curr_state
, dfa_pre_cycle_insn
);
6258 state_transition (curr_state
, NULL
);
6260 else if (reload_completed
)
6261 setup_clocks_p
= TRUE
;
6262 if (setup_clocks_p
&& ia64_tune
== PROCESSOR_ITANIUM
)
6264 enum attr_itanium_class c
= ia64_safe_itanium_class (insn
);
6266 if (c
!= ITANIUM_CLASS_MMMUL
&& c
!= ITANIUM_CLASS_MMSHF
)
6271 for (link
= LOG_LINKS (insn
); link
; link
= XEXP (link
, 1))
6272 if (REG_NOTE_KIND (link
) == 0)
6274 enum attr_itanium_class dep_class
;
6275 rtx dep_insn
= XEXP (link
, 0);
6277 dep_class
= ia64_safe_itanium_class (dep_insn
);
6278 if ((dep_class
== ITANIUM_CLASS_MMMUL
6279 || dep_class
== ITANIUM_CLASS_MMSHF
)
6280 && last_clock
- clocks
[INSN_UID (dep_insn
)] < 4
6282 || last_clock
- clocks
[INSN_UID (dep_insn
)] < d
))
6283 d
= last_clock
- clocks
[INSN_UID (dep_insn
)];
6286 add_cycles
[INSN_UID (insn
)] = 3 - d
;
6294 /* The following page contains abstract data `bundle states' which are
6295 used for bundling insns (inserting nops and template generation). */
6297 /* The following describes state of insn bundling. */
6301 /* Unique bundle state number to identify them in the debugging
6304 rtx insn
; /* corresponding insn, NULL for the 1st and the last state */
6305 /* number nops before and after the insn */
6306 short before_nops_num
, after_nops_num
;
6307 int insn_num
; /* insn number (0 - for initial state, 1 - for the 1st
6309 int cost
; /* cost of the state in cycles */
6310 int accumulated_insns_num
; /* number of all previous insns including
6311 nops. L is considered as 2 insns */
6312 int branch_deviation
; /* deviation of previous branches from 3rd slots */
6313 struct bundle_state
*next
; /* next state with the same insn_num */
6314 struct bundle_state
*originator
; /* originator (previous insn state) */
6315 /* All bundle states are in the following chain. */
6316 struct bundle_state
*allocated_states_chain
;
6317 /* The DFA State after issuing the insn and the nops. */
6321 /* The following is map insn number to the corresponding bundle state. */
6323 static struct bundle_state
**index_to_bundle_states
;
6325 /* The unique number of next bundle state. */
6327 static int bundle_states_num
;
6329 /* All allocated bundle states are in the following chain. */
6331 static struct bundle_state
*allocated_bundle_states_chain
;
6333 /* All allocated but not used bundle states are in the following
6336 static struct bundle_state
*free_bundle_state_chain
;
6339 /* The following function returns a free bundle state. */
6341 static struct bundle_state
*
6342 get_free_bundle_state ()
6344 struct bundle_state
*result
;
6346 if (free_bundle_state_chain
!= NULL
)
6348 result
= free_bundle_state_chain
;
6349 free_bundle_state_chain
= result
->next
;
6353 result
= xmalloc (sizeof (struct bundle_state
));
6354 result
->dfa_state
= xmalloc (dfa_state_size
);
6355 result
->allocated_states_chain
= allocated_bundle_states_chain
;
6356 allocated_bundle_states_chain
= result
;
6358 result
->unique_num
= bundle_states_num
++;
6363 /* The following function frees given bundle state. */
6366 free_bundle_state (state
)
6367 struct bundle_state
*state
;
6369 state
->next
= free_bundle_state_chain
;
6370 free_bundle_state_chain
= state
;
6373 /* Start work with abstract data `bundle states'. */
6376 initiate_bundle_states ()
6378 bundle_states_num
= 0;
6379 free_bundle_state_chain
= NULL
;
6380 allocated_bundle_states_chain
= NULL
;
6383 /* Finish work with abstract data `bundle states'. */
6386 finish_bundle_states ()
6388 struct bundle_state
*curr_state
, *next_state
;
6390 for (curr_state
= allocated_bundle_states_chain
;
6392 curr_state
= next_state
)
6394 next_state
= curr_state
->allocated_states_chain
;
6395 free (curr_state
->dfa_state
);
6400 /* Hash table of the bundle states. The key is dfa_state and insn_num
6401 of the bundle states. */
6403 static htab_t bundle_state_table
;
6405 /* The function returns hash of BUNDLE_STATE. */
6408 bundle_state_hash (bundle_state
)
6409 const void *bundle_state
;
6411 const struct bundle_state
*state
= (struct bundle_state
*) bundle_state
;
6414 for (result
= i
= 0; i
< dfa_state_size
; i
++)
6415 result
+= (((unsigned char *) state
->dfa_state
) [i
]
6416 << ((i
% CHAR_BIT
) * 3 + CHAR_BIT
));
6417 return result
+ state
->insn_num
;
6420 /* The function returns nonzero if the bundle state keys are equal. */
6423 bundle_state_eq_p (bundle_state_1
, bundle_state_2
)
6424 const void *bundle_state_1
;
6425 const void *bundle_state_2
;
6427 const struct bundle_state
* state1
= (struct bundle_state
*) bundle_state_1
;
6428 const struct bundle_state
* state2
= (struct bundle_state
*) bundle_state_2
;
6430 return (state1
->insn_num
== state2
->insn_num
6431 && memcmp (state1
->dfa_state
, state2
->dfa_state
,
6432 dfa_state_size
) == 0);
6435 /* The function inserts the BUNDLE_STATE into the hash table. The
6436 function returns nonzero if the bundle has been inserted into the
6437 table. The table contains the best bundle state with given key. */
6440 insert_bundle_state (bundle_state
)
6441 struct bundle_state
*bundle_state
;
6445 entry_ptr
= htab_find_slot (bundle_state_table
, bundle_state
, 1);
6446 if (*entry_ptr
== NULL
)
6448 bundle_state
->next
= index_to_bundle_states
[bundle_state
->insn_num
];
6449 index_to_bundle_states
[bundle_state
->insn_num
] = bundle_state
;
6450 *entry_ptr
= (void *) bundle_state
;
6453 else if (bundle_state
->cost
< ((struct bundle_state
*) *entry_ptr
)->cost
6454 || (bundle_state
->cost
== ((struct bundle_state
*) *entry_ptr
)->cost
6455 && (((struct bundle_state
*)*entry_ptr
)->accumulated_insns_num
6456 > bundle_state
->accumulated_insns_num
6457 || (((struct bundle_state
*)
6458 *entry_ptr
)->accumulated_insns_num
6459 == bundle_state
->accumulated_insns_num
6460 && ((struct bundle_state
*)
6461 *entry_ptr
)->branch_deviation
6462 > bundle_state
->branch_deviation
))))
6465 struct bundle_state temp
;
6467 temp
= *(struct bundle_state
*) *entry_ptr
;
6468 *(struct bundle_state
*) *entry_ptr
= *bundle_state
;
6469 ((struct bundle_state
*) *entry_ptr
)->next
= temp
.next
;
6470 *bundle_state
= temp
;
6475 /* Start work with the hash table. */
6478 initiate_bundle_state_table ()
6480 bundle_state_table
= htab_create (50, bundle_state_hash
, bundle_state_eq_p
,
6484 /* Finish work with the hash table. */
6487 finish_bundle_state_table ()
6489 htab_delete (bundle_state_table
);
6494 /* The following variable is a insn `nop' used to check bundle states
6495 with different number of inserted nops. */
6497 static rtx ia64_nop
;
6499 /* The following function tries to issue NOPS_NUM nops for the current
6500 state without advancing processor cycle. If it failed, the
6501 function returns FALSE and frees the current state. */
6504 try_issue_nops (curr_state
, nops_num
)
6505 struct bundle_state
*curr_state
;
6510 for (i
= 0; i
< nops_num
; i
++)
6511 if (state_transition (curr_state
->dfa_state
, ia64_nop
) >= 0)
6513 free_bundle_state (curr_state
);
6519 /* The following function tries to issue INSN for the current
6520 state without advancing processor cycle. If it failed, the
6521 function returns FALSE and frees the current state. */
6524 try_issue_insn (curr_state
, insn
)
6525 struct bundle_state
*curr_state
;
6528 if (insn
&& state_transition (curr_state
->dfa_state
, insn
) >= 0)
6530 free_bundle_state (curr_state
);
6536 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6537 starting with ORIGINATOR without advancing processor cycle. If
6538 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6539 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6540 If it was successful, the function creates new bundle state and
6541 insert into the hash table and into `index_to_bundle_states'. */
6544 issue_nops_and_insn (originator
, before_nops_num
, insn
, try_bundle_end_p
,
6546 struct bundle_state
*originator
;
6547 int before_nops_num
;
6549 int try_bundle_end_p
, only_bundle_end_p
;
6551 struct bundle_state
*curr_state
;
6553 curr_state
= get_free_bundle_state ();
6554 memcpy (curr_state
->dfa_state
, originator
->dfa_state
, dfa_state_size
);
6555 curr_state
->insn
= insn
;
6556 curr_state
->insn_num
= originator
->insn_num
+ 1;
6557 curr_state
->cost
= originator
->cost
;
6558 curr_state
->originator
= originator
;
6559 curr_state
->before_nops_num
= before_nops_num
;
6560 curr_state
->after_nops_num
= 0;
6561 curr_state
->accumulated_insns_num
6562 = originator
->accumulated_insns_num
+ before_nops_num
;
6563 curr_state
->branch_deviation
= originator
->branch_deviation
;
6564 if (insn
== NULL_RTX
)
6566 else if (INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
)
6568 if (GET_MODE (insn
) == TImode
)
6570 if (!try_issue_nops (curr_state
, before_nops_num
))
6572 if (!try_issue_insn (curr_state
, insn
))
6574 memcpy (temp_dfa_state
, curr_state
->dfa_state
, dfa_state_size
);
6575 if (state_transition (temp_dfa_state
, dfa_pre_cycle_insn
) >= 0
6576 && curr_state
->accumulated_insns_num
% 3 != 0)
6578 free_bundle_state (curr_state
);
6582 else if (GET_MODE (insn
) != TImode
)
6584 if (!try_issue_nops (curr_state
, before_nops_num
))
6586 if (!try_issue_insn (curr_state
, insn
))
6588 curr_state
->accumulated_insns_num
++;
6589 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6590 || asm_noperands (PATTERN (insn
)) >= 0)
6592 if (ia64_safe_type (insn
) == TYPE_L
)
6593 curr_state
->accumulated_insns_num
++;
6597 state_transition (curr_state
->dfa_state
, dfa_pre_cycle_insn
);
6598 state_transition (curr_state
->dfa_state
, NULL
);
6600 if (!try_issue_nops (curr_state
, before_nops_num
))
6602 if (!try_issue_insn (curr_state
, insn
))
6604 curr_state
->accumulated_insns_num
++;
6605 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6606 || asm_noperands (PATTERN (insn
)) >= 0)
6608 /* Finish bundle containing asm insn. */
6609 curr_state
->after_nops_num
6610 = 3 - curr_state
->accumulated_insns_num
% 3;
6611 curr_state
->accumulated_insns_num
6612 += 3 - curr_state
->accumulated_insns_num
% 3;
6614 else if (ia64_safe_type (insn
) == TYPE_L
)
6615 curr_state
->accumulated_insns_num
++;
6617 if (ia64_safe_type (insn
) == TYPE_B
)
6618 curr_state
->branch_deviation
6619 += 2 - (curr_state
->accumulated_insns_num
- 1) % 3;
6620 if (try_bundle_end_p
&& curr_state
->accumulated_insns_num
% 3 != 0)
6622 if (!only_bundle_end_p
&& insert_bundle_state (curr_state
))
6625 struct bundle_state
*curr_state1
;
6626 struct bundle_state
*allocated_states_chain
;
6628 curr_state1
= get_free_bundle_state ();
6629 dfa_state
= curr_state1
->dfa_state
;
6630 allocated_states_chain
= curr_state1
->allocated_states_chain
;
6631 *curr_state1
= *curr_state
;
6632 curr_state1
->dfa_state
= dfa_state
;
6633 curr_state1
->allocated_states_chain
= allocated_states_chain
;
6634 memcpy (curr_state1
->dfa_state
, curr_state
->dfa_state
,
6636 curr_state
= curr_state1
;
6638 if (!try_issue_nops (curr_state
,
6639 3 - curr_state
->accumulated_insns_num
% 3))
6641 curr_state
->after_nops_num
6642 = 3 - curr_state
->accumulated_insns_num
% 3;
6643 curr_state
->accumulated_insns_num
6644 += 3 - curr_state
->accumulated_insns_num
% 3;
6646 if (!insert_bundle_state (curr_state
))
6647 free_bundle_state (curr_state
);
6651 /* The following function returns position in the two window bundle
6658 if (cpu_unit_reservation_p (state
, pos_6
))
6660 else if (cpu_unit_reservation_p (state
, pos_5
))
6662 else if (cpu_unit_reservation_p (state
, pos_4
))
6664 else if (cpu_unit_reservation_p (state
, pos_3
))
6666 else if (cpu_unit_reservation_p (state
, pos_2
))
6668 else if (cpu_unit_reservation_p (state
, pos_1
))
6674 /* The function returns code of a possible template for given position
6675 and state. The function should be called only with 2 values of
6676 position equal to 3 or 6. */
6679 get_template (state
, pos
)
6686 if (cpu_unit_reservation_p (state
, _0mii_
))
6688 else if (cpu_unit_reservation_p (state
, _0mmi_
))
6690 else if (cpu_unit_reservation_p (state
, _0mfi_
))
6692 else if (cpu_unit_reservation_p (state
, _0mmf_
))
6694 else if (cpu_unit_reservation_p (state
, _0bbb_
))
6696 else if (cpu_unit_reservation_p (state
, _0mbb_
))
6698 else if (cpu_unit_reservation_p (state
, _0mib_
))
6700 else if (cpu_unit_reservation_p (state
, _0mmb_
))
6702 else if (cpu_unit_reservation_p (state
, _0mfb_
))
6704 else if (cpu_unit_reservation_p (state
, _0mlx_
))
6709 if (cpu_unit_reservation_p (state
, _1mii_
))
6711 else if (cpu_unit_reservation_p (state
, _1mmi_
))
6713 else if (cpu_unit_reservation_p (state
, _1mfi_
))
6715 else if (_1mmf_
>= 0 && cpu_unit_reservation_p (state
, _1mmf_
))
6717 else if (cpu_unit_reservation_p (state
, _1bbb_
))
6719 else if (cpu_unit_reservation_p (state
, _1mbb_
))
6721 else if (cpu_unit_reservation_p (state
, _1mib_
))
6723 else if (cpu_unit_reservation_p (state
, _1mmb_
))
6725 else if (cpu_unit_reservation_p (state
, _1mfb_
))
6727 else if (cpu_unit_reservation_p (state
, _1mlx_
))
6736 /* The following function returns an insn important for insn bundling
6737 followed by INSN and before TAIL. */
6740 get_next_important_insn (insn
, tail
)
6743 for (; insn
&& insn
!= tail
; insn
= NEXT_INSN (insn
))
6745 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
6746 && GET_CODE (PATTERN (insn
)) != USE
6747 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
6752 /* The following function does insn bundling. Bundling algorithm is
6753 based on dynamic programming. It tries to insert different number of
6754 nop insns before/after the real insns. At the end of EBB, it chooses the
6755 best alternative and then, moving back in EBB, inserts templates for
6756 the best alternative. The algorithm is directed by information
6757 (changes of simulated processor cycle) created by the 2nd insn
6761 bundling (dump
, verbose
, prev_head_insn
, tail
)
6764 rtx prev_head_insn
, tail
;
6766 struct bundle_state
*curr_state
, *next_state
, *best_state
;
6767 rtx insn
, next_insn
;
6769 int i
, bundle_end_p
, only_bundle_end_p
, asm_p
;
6770 int pos
= 0, max_pos
, template0
, template1
;
6773 enum attr_type type
;
6776 for (insn
= NEXT_INSN (prev_head_insn
);
6777 insn
&& insn
!= tail
;
6778 insn
= NEXT_INSN (insn
))
6784 dfa_clean_insn_cache ();
6785 initiate_bundle_state_table ();
6786 index_to_bundle_states
= xmalloc ((insn_num
+ 2)
6787 * sizeof (struct bundle_state
*));
6788 /* First (forward) pass -- generates states. */
6789 curr_state
= get_free_bundle_state ();
6790 curr_state
->insn
= NULL
;
6791 curr_state
->before_nops_num
= 0;
6792 curr_state
->after_nops_num
= 0;
6793 curr_state
->insn_num
= 0;
6794 curr_state
->cost
= 0;
6795 curr_state
->accumulated_insns_num
= 0;
6796 curr_state
->branch_deviation
= 0;
6797 curr_state
->next
= NULL
;
6798 curr_state
->originator
= NULL
;
6799 state_reset (curr_state
->dfa_state
);
6800 index_to_bundle_states
[0] = curr_state
;
6802 for (insn
= NEXT_INSN (prev_head_insn
);
6804 insn
= NEXT_INSN (insn
))
6806 && (ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IGNORE
6807 || GET_CODE (PATTERN (insn
)) == USE
6808 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6809 && GET_MODE (insn
) == TImode
)
6811 PUT_MODE (insn
, VOIDmode
);
6812 for (next_insn
= NEXT_INSN (insn
);
6814 next_insn
= NEXT_INSN (next_insn
))
6815 if (INSN_P (next_insn
)
6816 && ia64_safe_itanium_class (next_insn
) != ITANIUM_CLASS_IGNORE
6817 && GET_CODE (PATTERN (next_insn
)) != USE
6818 && GET_CODE (PATTERN (next_insn
)) != CLOBBER
)
6820 PUT_MODE (next_insn
, TImode
);
6824 for (insn
= get_next_important_insn (NEXT_INSN (prev_head_insn
), tail
);
6829 || ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IGNORE
6830 || GET_CODE (PATTERN (insn
)) == USE
6831 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6833 type
= ia64_safe_type (insn
);
6834 next_insn
= get_next_important_insn (NEXT_INSN (insn
), tail
);
6836 index_to_bundle_states
[insn_num
] = NULL
;
6837 for (curr_state
= index_to_bundle_states
[insn_num
- 1];
6839 curr_state
= next_state
)
6841 pos
= curr_state
->accumulated_insns_num
% 3;
6842 next_state
= curr_state
->next
;
6843 /* Finish the current bundle in order to start a subsequent
6844 asm insn in a new bundle. */
6846 = (next_insn
!= NULL_RTX
6847 && INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
6848 && ia64_safe_type (next_insn
) == TYPE_UNKNOWN
);
6850 = (only_bundle_end_p
|| next_insn
== NULL_RTX
6851 || (GET_MODE (next_insn
) == TImode
6852 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
));
6853 if (type
== TYPE_F
|| type
== TYPE_B
|| type
== TYPE_L
6855 /* We need to insert 2 Nops for cases like M_MII. */
6856 || (type
== TYPE_M
&& ia64_tune
== PROCESSOR_ITANIUM
6857 && !bundle_end_p
&& pos
== 1))
6858 issue_nops_and_insn (curr_state
, 2, insn
, bundle_end_p
,
6860 issue_nops_and_insn (curr_state
, 1, insn
, bundle_end_p
,
6862 issue_nops_and_insn (curr_state
, 0, insn
, bundle_end_p
,
6865 if (index_to_bundle_states
[insn_num
] == NULL
)
6867 for (curr_state
= index_to_bundle_states
[insn_num
];
6869 curr_state
= curr_state
->next
)
6870 if (verbose
>= 2 && dump
)
6874 unsigned short one_automaton_state
;
6875 unsigned short oneb_automaton_state
;
6876 unsigned short two_automaton_state
;
6877 unsigned short twob_automaton_state
;
6882 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6883 curr_state
->unique_num
,
6884 (curr_state
->originator
== NULL
6885 ? -1 : curr_state
->originator
->unique_num
),
6887 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
6888 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
6889 (ia64_tune
== PROCESSOR_ITANIUM
6890 ? ((struct DFA_chip
*) curr_state
->dfa_state
)->oneb_automaton_state
6891 : ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
),
6895 if (index_to_bundle_states
[insn_num
] == NULL
)
6897 /* Finding state with a minimal cost: */
6899 for (curr_state
= index_to_bundle_states
[insn_num
];
6901 curr_state
= curr_state
->next
)
6902 if (curr_state
->accumulated_insns_num
% 3 == 0
6903 && (best_state
== NULL
|| best_state
->cost
> curr_state
->cost
6904 || (best_state
->cost
== curr_state
->cost
6905 && (curr_state
->accumulated_insns_num
6906 < best_state
->accumulated_insns_num
6907 || (curr_state
->accumulated_insns_num
6908 == best_state
->accumulated_insns_num
6909 && curr_state
->branch_deviation
6910 < best_state
->branch_deviation
)))))
6911 best_state
= curr_state
;
6912 /* Second (backward) pass: adding nops and templates: */
6913 insn_num
= best_state
->before_nops_num
;
6914 template0
= template1
= -1;
6915 for (curr_state
= best_state
;
6916 curr_state
->originator
!= NULL
;
6917 curr_state
= curr_state
->originator
)
6919 insn
= curr_state
->insn
;
6920 asm_p
= (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6921 || asm_noperands (PATTERN (insn
)) >= 0);
6923 if (verbose
>= 2 && dump
)
6927 unsigned short one_automaton_state
;
6928 unsigned short oneb_automaton_state
;
6929 unsigned short two_automaton_state
;
6930 unsigned short twob_automaton_state
;
6935 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6936 curr_state
->unique_num
,
6937 (curr_state
->originator
== NULL
6938 ? -1 : curr_state
->originator
->unique_num
),
6940 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
6941 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
6942 (ia64_tune
== PROCESSOR_ITANIUM
6943 ? ((struct DFA_chip
*) curr_state
->dfa_state
)->oneb_automaton_state
6944 : ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
),
6947 max_pos
= get_max_pos (curr_state
->dfa_state
);
6948 if (max_pos
== 6 || (max_pos
== 3 && template0
< 0))
6952 template0
= get_template (curr_state
->dfa_state
, 3);
6955 template1
= get_template (curr_state
->dfa_state
, 3);
6956 template0
= get_template (curr_state
->dfa_state
, 6);
6959 if (max_pos
> 3 && template1
< 0)
6963 template1
= get_template (curr_state
->dfa_state
, 3);
6967 for (i
= 0; i
< curr_state
->after_nops_num
; i
++)
6970 emit_insn_after (nop
, insn
);
6978 b
= gen_bundle_selector (GEN_INT (template0
));
6979 ia64_emit_insn_before (b
, nop
);
6980 template0
= template1
;
6984 if (INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
6985 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
6986 && asm_noperands (PATTERN (insn
)) < 0)
6988 if (ia64_safe_type (insn
) == TYPE_L
)
6993 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
6994 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
6995 && asm_noperands (PATTERN (insn
)) < 0)
6999 b
= gen_bundle_selector (GEN_INT (template0
));
7000 ia64_emit_insn_before (b
, insn
);
7001 b
= PREV_INSN (insn
);
7003 template0
= template1
;
7006 for (i
= 0; i
< curr_state
->before_nops_num
; i
++)
7009 ia64_emit_insn_before (nop
, insn
);
7010 nop
= PREV_INSN (insn
);
7019 b
= gen_bundle_selector (GEN_INT (template0
));
7020 ia64_emit_insn_before (b
, insn
);
7021 b
= PREV_INSN (insn
);
7023 template0
= template1
;
7028 if (ia64_tune
== PROCESSOR_ITANIUM
)
7029 /* Insert additional cycles for MM-insns: */
7030 for (insn
= get_next_important_insn (NEXT_INSN (prev_head_insn
), tail
);
7035 || ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IGNORE
7036 || GET_CODE (PATTERN (insn
)) == USE
7037 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
7039 next_insn
= get_next_important_insn (NEXT_INSN (insn
), tail
);
7040 if (INSN_UID (insn
) < clocks_length
&& add_cycles
[INSN_UID (insn
)])
7046 last
= prev_active_insn (insn
);
7047 pred_stop_p
= recog_memoized (last
) == CODE_FOR_insn_group_barrier
;
7049 last
= prev_active_insn (last
);
7051 for (;; last
= prev_active_insn (last
))
7052 if (recog_memoized (last
) == CODE_FOR_bundle_selector
)
7054 template0
= XINT (XVECEXP (PATTERN (last
), 0, 0), 0);
7057 = gen_bundle_selector (GEN_INT (2)); /* -> MFI */
7060 else if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
7062 if ((pred_stop_p
&& n
== 0) || n
> 2
7063 || (template0
== 9 && n
!= 0))
7065 for (j
= 3 - n
; j
> 0; j
--)
7066 ia64_emit_insn_before (gen_nop (), insn
);
7067 add_cycles
[INSN_UID (insn
)]--;
7068 if (!pred_stop_p
|| add_cycles
[INSN_UID (insn
)])
7069 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7072 add_cycles
[INSN_UID (insn
)]--;
7073 for (i
= add_cycles
[INSN_UID (insn
)]; i
> 0; i
--)
7075 /* Insert .MII bundle. */
7076 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (0)),
7078 ia64_emit_insn_before (gen_nop (), insn
);
7079 ia64_emit_insn_before (gen_nop (), insn
);
7082 ia64_emit_insn_before
7083 (gen_insn_group_barrier (GEN_INT (3)), insn
);
7086 ia64_emit_insn_before (gen_nop (), insn
);
7087 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7090 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0
)),
7092 for (j
= n
; j
> 0; j
--)
7093 ia64_emit_insn_before (gen_nop (), insn
);
7095 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7099 free (index_to_bundle_states
);
7100 finish_bundle_state_table ();
7102 dfa_clean_insn_cache ();
7105 /* The following function is called at the end of scheduling BB or
7106 EBB. After reload, it inserts stop bits and does insn bundling. */
7109 ia64_sched_finish (dump
, sched_verbose
)
7114 fprintf (dump
, "// Finishing schedule.\n");
7115 if (!reload_completed
)
7117 if (reload_completed
)
7119 final_emit_insn_group_barriers (dump
);
7120 bundling (dump
, sched_verbose
, current_sched_info
->prev_head
,
7121 current_sched_info
->next_tail
);
7122 if (sched_verbose
&& dump
)
7123 fprintf (dump
, "// finishing %d-%d\n",
7124 INSN_UID (NEXT_INSN (current_sched_info
->prev_head
)),
7125 INSN_UID (PREV_INSN (current_sched_info
->next_tail
)));
7131 /* The following function inserts stop bits in scheduled BB or EBB. */
7134 final_emit_insn_group_barriers (dump
)
7135 FILE *dump ATTRIBUTE_UNUSED
;
7138 int need_barrier_p
= 0;
7139 rtx prev_insn
= NULL_RTX
;
7141 init_insn_group_barriers ();
7143 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
7144 insn
!= current_sched_info
->next_tail
;
7145 insn
= NEXT_INSN (insn
))
7147 if (GET_CODE (insn
) == BARRIER
)
7149 rtx last
= prev_active_insn (insn
);
7153 if (GET_CODE (last
) == JUMP_INSN
7154 && GET_CODE (PATTERN (last
)) == ADDR_DIFF_VEC
)
7155 last
= prev_active_insn (last
);
7156 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
7157 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
7159 init_insn_group_barriers ();
7161 prev_insn
= NULL_RTX
;
7163 else if (INSN_P (insn
))
7165 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
7167 init_insn_group_barriers ();
7169 prev_insn
= NULL_RTX
;
7171 else if (need_barrier_p
|| group_barrier_needed_p (insn
))
7173 if (TARGET_EARLY_STOP_BITS
)
7178 last
!= current_sched_info
->prev_head
;
7179 last
= PREV_INSN (last
))
7180 if (INSN_P (last
) && GET_MODE (last
) == TImode
7181 && stops_p
[INSN_UID (last
)])
7183 if (last
== current_sched_info
->prev_head
)
7185 last
= prev_active_insn (last
);
7187 && recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
7188 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
7190 init_insn_group_barriers ();
7191 for (last
= NEXT_INSN (last
);
7193 last
= NEXT_INSN (last
))
7195 group_barrier_needed_p (last
);
7199 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7201 init_insn_group_barriers ();
7203 group_barrier_needed_p (insn
);
7204 prev_insn
= NULL_RTX
;
7206 else if (recog_memoized (insn
) >= 0)
7208 need_barrier_p
= (GET_CODE (insn
) == CALL_INSN
7209 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
7210 || asm_noperands (PATTERN (insn
)) >= 0);
7217 /* If the following function returns TRUE, we will use the the DFA
7221 ia64_use_dfa_pipeline_interface ()
7226 /* If the following function returns TRUE, we will use the the DFA
7230 ia64_first_cycle_multipass_dfa_lookahead ()
7232 return (reload_completed
? 6 : 4);
7235 /* The following function initiates variable `dfa_pre_cycle_insn'. */
7238 ia64_init_dfa_pre_cycle_insn ()
7240 if (temp_dfa_state
== NULL
)
7242 dfa_state_size
= state_size ();
7243 temp_dfa_state
= xmalloc (dfa_state_size
);
7244 prev_cycle_state
= xmalloc (dfa_state_size
);
7246 dfa_pre_cycle_insn
= make_insn_raw (gen_pre_cycle ());
7247 PREV_INSN (dfa_pre_cycle_insn
) = NEXT_INSN (dfa_pre_cycle_insn
) = NULL_RTX
;
7248 recog_memoized (dfa_pre_cycle_insn
);
7249 dfa_stop_insn
= make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
7250 PREV_INSN (dfa_stop_insn
) = NEXT_INSN (dfa_stop_insn
) = NULL_RTX
;
7251 recog_memoized (dfa_stop_insn
);
7254 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
7255 used by the DFA insn scheduler. */
7258 ia64_dfa_pre_cycle_insn ()
7260 return dfa_pre_cycle_insn
;
7263 /* The following function returns TRUE if PRODUCER (of type ilog or
7264 ld) produces address for CONSUMER (of type st or stf). */
7267 ia64_st_address_bypass_p (producer
, consumer
)
7273 if (producer
== NULL_RTX
|| consumer
== NULL_RTX
)
7275 dest
= ia64_single_set (producer
);
7276 if (dest
== NULL_RTX
|| (reg
= SET_DEST (dest
)) == NULL_RTX
7277 || (GET_CODE (reg
) != REG
&& GET_CODE (reg
) != SUBREG
))
7279 if (GET_CODE (reg
) == SUBREG
)
7280 reg
= SUBREG_REG (reg
);
7281 dest
= ia64_single_set (consumer
);
7282 if (dest
== NULL_RTX
|| (mem
= SET_DEST (dest
)) == NULL_RTX
7283 || GET_CODE (mem
) != MEM
)
7285 return reg_mentioned_p (reg
, mem
);
7288 /* The following function returns TRUE if PRODUCER (of type ilog or
7289 ld) produces address for CONSUMER (of type ld or fld). */
7292 ia64_ld_address_bypass_p (producer
, consumer
)
7296 rtx dest
, src
, reg
, mem
;
7298 if (producer
== NULL_RTX
|| consumer
== NULL_RTX
)
7300 dest
= ia64_single_set (producer
);
7301 if (dest
== NULL_RTX
|| (reg
= SET_DEST (dest
)) == NULL_RTX
7302 || (GET_CODE (reg
) != REG
&& GET_CODE (reg
) != SUBREG
))
7304 if (GET_CODE (reg
) == SUBREG
)
7305 reg
= SUBREG_REG (reg
);
7306 src
= ia64_single_set (consumer
);
7307 if (src
== NULL_RTX
|| (mem
= SET_SRC (src
)) == NULL_RTX
)
7309 if (GET_CODE (mem
) == UNSPEC
&& XVECLEN (mem
, 0) > 0)
7310 mem
= XVECEXP (mem
, 0, 0);
7311 while (GET_CODE (mem
) == SUBREG
|| GET_CODE (mem
) == ZERO_EXTEND
)
7312 mem
= XEXP (mem
, 0);
7314 /* Note that LO_SUM is used for GOT loads. */
7315 if (GET_CODE (mem
) != LO_SUM
&& GET_CODE (mem
) != MEM
)
7318 return reg_mentioned_p (reg
, mem
);
7321 /* The following function returns TRUE if INSN produces address for a
7322 load/store insn. We will place such insns into M slot because it
7323 decreases its latency time. */
7326 ia64_produce_address_p (insn
)
7333 /* Emit pseudo-ops for the assembler to describe predicate relations.
7334 At present this assumes that we only consider predicate pairs to
7335 be mutex, and that the assembler can deduce proper values from
7336 straight-line code. */
7339 emit_predicate_relation_info ()
7343 FOR_EACH_BB_REVERSE (bb
)
7346 rtx head
= bb
->head
;
7348 /* We only need such notes at code labels. */
7349 if (GET_CODE (head
) != CODE_LABEL
)
7351 if (GET_CODE (NEXT_INSN (head
)) == NOTE
7352 && NOTE_LINE_NUMBER (NEXT_INSN (head
)) == NOTE_INSN_BASIC_BLOCK
)
7353 head
= NEXT_INSN (head
);
7355 for (r
= PR_REG (0); r
< PR_REG (64); r
+= 2)
7356 if (REGNO_REG_SET_P (bb
->global_live_at_start
, r
))
7358 rtx p
= gen_rtx_REG (BImode
, r
);
7359 rtx n
= emit_insn_after (gen_pred_rel_mutex (p
), head
);
7360 if (head
== bb
->end
)
7366 /* Look for conditional calls that do not return, and protect predicate
7367 relations around them. Otherwise the assembler will assume the call
7368 returns, and complain about uses of call-clobbered predicates after
7370 FOR_EACH_BB_REVERSE (bb
)
7372 rtx insn
= bb
->head
;
7376 if (GET_CODE (insn
) == CALL_INSN
7377 && GET_CODE (PATTERN (insn
)) == COND_EXEC
7378 && find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
7380 rtx b
= emit_insn_before (gen_safe_across_calls_all (), insn
);
7381 rtx a
= emit_insn_after (gen_safe_across_calls_normal (), insn
);
7382 if (bb
->head
== insn
)
7384 if (bb
->end
== insn
)
7388 if (insn
== bb
->end
)
7390 insn
= NEXT_INSN (insn
);
7395 /* Perform machine dependent operations on the rtl chain INSNS. */
7400 /* We are freeing block_for_insn in the toplev to keep compatibility
7401 with old MDEP_REORGS that are not CFG based. Recompute it now. */
7402 compute_bb_for_insn ();
7404 /* If optimizing, we'll have split before scheduling. */
7406 split_all_insns (0);
7408 /* ??? update_life_info_in_dirty_blocks fails to terminate during
7409 non-optimizing bootstrap. */
7410 update_life_info (NULL
, UPDATE_LIFE_GLOBAL_RM_NOTES
, PROP_DEATH_NOTES
);
7412 if (ia64_flag_schedule_insns2
)
7414 timevar_push (TV_SCHED2
);
7415 ia64_final_schedule
= 1;
7417 initiate_bundle_states ();
7418 ia64_nop
= make_insn_raw (gen_nop ());
7419 PREV_INSN (ia64_nop
) = NEXT_INSN (ia64_nop
) = NULL_RTX
;
7420 recog_memoized (ia64_nop
);
7421 clocks_length
= get_max_uid () + 1;
7422 stops_p
= xcalloc (1, clocks_length
);
7423 if (ia64_tune
== PROCESSOR_ITANIUM
)
7425 clocks
= xcalloc (clocks_length
, sizeof (int));
7426 add_cycles
= xcalloc (clocks_length
, sizeof (int));
7428 if (ia64_tune
== PROCESSOR_ITANIUM2
)
7430 pos_1
= get_cpu_unit_code ("2_1");
7431 pos_2
= get_cpu_unit_code ("2_2");
7432 pos_3
= get_cpu_unit_code ("2_3");
7433 pos_4
= get_cpu_unit_code ("2_4");
7434 pos_5
= get_cpu_unit_code ("2_5");
7435 pos_6
= get_cpu_unit_code ("2_6");
7436 _0mii_
= get_cpu_unit_code ("2b_0mii.");
7437 _0mmi_
= get_cpu_unit_code ("2b_0mmi.");
7438 _0mfi_
= get_cpu_unit_code ("2b_0mfi.");
7439 _0mmf_
= get_cpu_unit_code ("2b_0mmf.");
7440 _0bbb_
= get_cpu_unit_code ("2b_0bbb.");
7441 _0mbb_
= get_cpu_unit_code ("2b_0mbb.");
7442 _0mib_
= get_cpu_unit_code ("2b_0mib.");
7443 _0mmb_
= get_cpu_unit_code ("2b_0mmb.");
7444 _0mfb_
= get_cpu_unit_code ("2b_0mfb.");
7445 _0mlx_
= get_cpu_unit_code ("2b_0mlx.");
7446 _1mii_
= get_cpu_unit_code ("2b_1mii.");
7447 _1mmi_
= get_cpu_unit_code ("2b_1mmi.");
7448 _1mfi_
= get_cpu_unit_code ("2b_1mfi.");
7449 _1mmf_
= get_cpu_unit_code ("2b_1mmf.");
7450 _1bbb_
= get_cpu_unit_code ("2b_1bbb.");
7451 _1mbb_
= get_cpu_unit_code ("2b_1mbb.");
7452 _1mib_
= get_cpu_unit_code ("2b_1mib.");
7453 _1mmb_
= get_cpu_unit_code ("2b_1mmb.");
7454 _1mfb_
= get_cpu_unit_code ("2b_1mfb.");
7455 _1mlx_
= get_cpu_unit_code ("2b_1mlx.");
7459 pos_1
= get_cpu_unit_code ("1_1");
7460 pos_2
= get_cpu_unit_code ("1_2");
7461 pos_3
= get_cpu_unit_code ("1_3");
7462 pos_4
= get_cpu_unit_code ("1_4");
7463 pos_5
= get_cpu_unit_code ("1_5");
7464 pos_6
= get_cpu_unit_code ("1_6");
7465 _0mii_
= get_cpu_unit_code ("1b_0mii.");
7466 _0mmi_
= get_cpu_unit_code ("1b_0mmi.");
7467 _0mfi_
= get_cpu_unit_code ("1b_0mfi.");
7468 _0mmf_
= get_cpu_unit_code ("1b_0mmf.");
7469 _0bbb_
= get_cpu_unit_code ("1b_0bbb.");
7470 _0mbb_
= get_cpu_unit_code ("1b_0mbb.");
7471 _0mib_
= get_cpu_unit_code ("1b_0mib.");
7472 _0mmb_
= get_cpu_unit_code ("1b_0mmb.");
7473 _0mfb_
= get_cpu_unit_code ("1b_0mfb.");
7474 _0mlx_
= get_cpu_unit_code ("1b_0mlx.");
7475 _1mii_
= get_cpu_unit_code ("1b_1mii.");
7476 _1mmi_
= get_cpu_unit_code ("1b_1mmi.");
7477 _1mfi_
= get_cpu_unit_code ("1b_1mfi.");
7478 _1mmf_
= get_cpu_unit_code ("1b_1mmf.");
7479 _1bbb_
= get_cpu_unit_code ("1b_1bbb.");
7480 _1mbb_
= get_cpu_unit_code ("1b_1mbb.");
7481 _1mib_
= get_cpu_unit_code ("1b_1mib.");
7482 _1mmb_
= get_cpu_unit_code ("1b_1mmb.");
7483 _1mfb_
= get_cpu_unit_code ("1b_1mfb.");
7484 _1mlx_
= get_cpu_unit_code ("1b_1mlx.");
7486 schedule_ebbs (rtl_dump_file
);
7487 finish_bundle_states ();
7488 if (ia64_tune
== PROCESSOR_ITANIUM
)
7494 emit_insn_group_barriers (rtl_dump_file
);
7496 ia64_final_schedule
= 0;
7497 timevar_pop (TV_SCHED2
);
7500 emit_all_insn_group_barriers (rtl_dump_file
);
7502 /* A call must not be the last instruction in a function, so that the
7503 return address is still within the function, so that unwinding works
7504 properly. Note that IA-64 differs from dwarf2 on this point. */
7505 if (flag_unwind_tables
|| (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
7510 insn
= get_last_insn ();
7511 if (! INSN_P (insn
))
7512 insn
= prev_active_insn (insn
);
7513 if (GET_CODE (insn
) == INSN
7514 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
7515 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
7518 insn
= prev_active_insn (insn
);
7520 if (GET_CODE (insn
) == CALL_INSN
)
7523 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7524 emit_insn (gen_break_f ());
7525 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7530 emit_predicate_relation_info ();
7533 /* Return true if REGNO is used by the epilogue. */
7536 ia64_epilogue_uses (regno
)
7542 /* With a call to a function in another module, we will write a new
7543 value to "gp". After returning from such a call, we need to make
7544 sure the function restores the original gp-value, even if the
7545 function itself does not use the gp anymore. */
7546 return !(TARGET_AUTO_PIC
|| TARGET_NO_PIC
);
7548 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7549 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7550 /* For functions defined with the syscall_linkage attribute, all
7551 input registers are marked as live at all function exits. This
7552 prevents the register allocator from using the input registers,
7553 which in turn makes it possible to restart a system call after
7554 an interrupt without having to save/restore the input registers.
7555 This also prevents kernel data from leaking to application code. */
7556 return lookup_attribute ("syscall_linkage",
7557 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))) != NULL
;
7560 /* Conditional return patterns can't represent the use of `b0' as
7561 the return address, so we force the value live this way. */
7565 /* Likewise for ar.pfs, which is used by br.ret. */
7573 /* Return true if REGNO is used by the frame unwinder. */
7576 ia64_eh_uses (regno
)
7579 if (! reload_completed
)
7582 if (current_frame_info
.reg_save_b0
7583 && regno
== current_frame_info
.reg_save_b0
)
7585 if (current_frame_info
.reg_save_pr
7586 && regno
== current_frame_info
.reg_save_pr
)
7588 if (current_frame_info
.reg_save_ar_pfs
7589 && regno
== current_frame_info
.reg_save_ar_pfs
)
7591 if (current_frame_info
.reg_save_ar_unat
7592 && regno
== current_frame_info
.reg_save_ar_unat
)
7594 if (current_frame_info
.reg_save_ar_lc
7595 && regno
== current_frame_info
.reg_save_ar_lc
)
7601 /* Return true if this goes in small data/bss. */
7603 /* ??? We could also support own long data here. Generating movl/add/ld8
7604 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7605 code faster because there is one less load. This also includes incomplete
7606 types which can't go in sdata/sbss. */
7609 ia64_in_small_data_p (exp
)
7612 if (TARGET_NO_SDATA
)
7615 /* We want to merge strings, so we never consider them small data. */
7616 if (TREE_CODE (exp
) == STRING_CST
)
7619 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
7621 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
7622 if (strcmp (section
, ".sdata") == 0
7623 || strcmp (section
, ".sbss") == 0)
7628 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
7630 /* If this is an incomplete type with size 0, then we can't put it
7631 in sdata because it might be too big when completed. */
7632 if (size
> 0 && size
<= ia64_section_threshold
)
7639 /* Output assembly directives for prologue regions. */
7641 /* The current basic block number. */
7643 static bool last_block
;
7645 /* True if we need a copy_state command at the start of the next block. */
7647 static bool need_copy_state
;
7649 /* The function emits unwind directives for the start of an epilogue. */
7654 /* If this isn't the last block of the function, then we need to label the
7655 current state, and copy it back in at the start of the next block. */
7659 fprintf (asm_out_file
, "\t.label_state 1\n");
7660 need_copy_state
= true;
7663 fprintf (asm_out_file
, "\t.restore sp\n");
7666 /* This function processes a SET pattern looking for specific patterns
7667 which result in emitting an assembly directive required for unwinding. */
7670 process_set (asm_out_file
, pat
)
7674 rtx src
= SET_SRC (pat
);
7675 rtx dest
= SET_DEST (pat
);
7676 int src_regno
, dest_regno
;
7678 /* Look for the ALLOC insn. */
7679 if (GET_CODE (src
) == UNSPEC_VOLATILE
7680 && XINT (src
, 1) == UNSPECV_ALLOC
7681 && GET_CODE (dest
) == REG
)
7683 dest_regno
= REGNO (dest
);
7685 /* If this isn't the final destination for ar.pfs, the alloc
7686 shouldn't have been marked frame related. */
7687 if (dest_regno
!= current_frame_info
.reg_save_ar_pfs
)
7690 fprintf (asm_out_file
, "\t.save ar.pfs, r%d\n",
7691 ia64_dbx_register_number (dest_regno
));
7695 /* Look for SP = .... */
7696 if (GET_CODE (dest
) == REG
&& REGNO (dest
) == STACK_POINTER_REGNUM
)
7698 if (GET_CODE (src
) == PLUS
)
7700 rtx op0
= XEXP (src
, 0);
7701 rtx op1
= XEXP (src
, 1);
7702 if (op0
== dest
&& GET_CODE (op1
) == CONST_INT
)
7704 if (INTVAL (op1
) < 0)
7705 fprintf (asm_out_file
, "\t.fframe "HOST_WIDE_INT_PRINT_DEC
"\n",
7708 process_epilogue ();
7713 else if (GET_CODE (src
) == REG
7714 && REGNO (src
) == HARD_FRAME_POINTER_REGNUM
)
7715 process_epilogue ();
7722 /* Register move we need to look at. */
7723 if (GET_CODE (dest
) == REG
&& GET_CODE (src
) == REG
)
7725 src_regno
= REGNO (src
);
7726 dest_regno
= REGNO (dest
);
7731 /* Saving return address pointer. */
7732 if (dest_regno
!= current_frame_info
.reg_save_b0
)
7734 fprintf (asm_out_file
, "\t.save rp, r%d\n",
7735 ia64_dbx_register_number (dest_regno
));
7739 if (dest_regno
!= current_frame_info
.reg_save_pr
)
7741 fprintf (asm_out_file
, "\t.save pr, r%d\n",
7742 ia64_dbx_register_number (dest_regno
));
7745 case AR_UNAT_REGNUM
:
7746 if (dest_regno
!= current_frame_info
.reg_save_ar_unat
)
7748 fprintf (asm_out_file
, "\t.save ar.unat, r%d\n",
7749 ia64_dbx_register_number (dest_regno
));
7753 if (dest_regno
!= current_frame_info
.reg_save_ar_lc
)
7755 fprintf (asm_out_file
, "\t.save ar.lc, r%d\n",
7756 ia64_dbx_register_number (dest_regno
));
7759 case STACK_POINTER_REGNUM
:
7760 if (dest_regno
!= HARD_FRAME_POINTER_REGNUM
7761 || ! frame_pointer_needed
)
7763 fprintf (asm_out_file
, "\t.vframe r%d\n",
7764 ia64_dbx_register_number (dest_regno
));
7768 /* Everything else should indicate being stored to memory. */
7773 /* Memory store we need to look at. */
7774 if (GET_CODE (dest
) == MEM
&& GET_CODE (src
) == REG
)
7780 if (GET_CODE (XEXP (dest
, 0)) == REG
)
7782 base
= XEXP (dest
, 0);
7785 else if (GET_CODE (XEXP (dest
, 0)) == PLUS
7786 && GET_CODE (XEXP (XEXP (dest
, 0), 1)) == CONST_INT
)
7788 base
= XEXP (XEXP (dest
, 0), 0);
7789 off
= INTVAL (XEXP (XEXP (dest
, 0), 1));
7794 if (base
== hard_frame_pointer_rtx
)
7796 saveop
= ".savepsp";
7799 else if (base
== stack_pointer_rtx
)
7804 src_regno
= REGNO (src
);
7808 if (current_frame_info
.reg_save_b0
!= 0)
7810 fprintf (asm_out_file
, "\t%s rp, %ld\n", saveop
, off
);
7814 if (current_frame_info
.reg_save_pr
!= 0)
7816 fprintf (asm_out_file
, "\t%s pr, %ld\n", saveop
, off
);
7820 if (current_frame_info
.reg_save_ar_lc
!= 0)
7822 fprintf (asm_out_file
, "\t%s ar.lc, %ld\n", saveop
, off
);
7826 if (current_frame_info
.reg_save_ar_pfs
!= 0)
7828 fprintf (asm_out_file
, "\t%s ar.pfs, %ld\n", saveop
, off
);
7831 case AR_UNAT_REGNUM
:
7832 if (current_frame_info
.reg_save_ar_unat
!= 0)
7834 fprintf (asm_out_file
, "\t%s ar.unat, %ld\n", saveop
, off
);
7841 fprintf (asm_out_file
, "\t.save.g 0x%x\n",
7842 1 << (src_regno
- GR_REG (4)));
7850 fprintf (asm_out_file
, "\t.save.b 0x%x\n",
7851 1 << (src_regno
- BR_REG (1)));
7858 fprintf (asm_out_file
, "\t.save.f 0x%x\n",
7859 1 << (src_regno
- FR_REG (2)));
7862 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7863 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7864 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7865 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7866 fprintf (asm_out_file
, "\t.save.gf 0x0, 0x%x\n",
7867 1 << (src_regno
- FR_REG (12)));
7879 /* This function looks at a single insn and emits any directives
7880 required to unwind this insn. */
7882 process_for_unwind_directive (asm_out_file
, insn
)
7886 if (flag_unwind_tables
7887 || (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
7891 if (GET_CODE (insn
) == NOTE
7892 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
7894 last_block
= NOTE_BASIC_BLOCK (insn
)->next_bb
== EXIT_BLOCK_PTR
;
7896 /* Restore unwind state from immediately before the epilogue. */
7897 if (need_copy_state
)
7899 fprintf (asm_out_file
, "\t.body\n");
7900 fprintf (asm_out_file
, "\t.copy_state 1\n");
7901 need_copy_state
= false;
7905 if (GET_CODE (insn
) == NOTE
|| ! RTX_FRAME_RELATED_P (insn
))
7908 pat
= find_reg_note (insn
, REG_FRAME_RELATED_EXPR
, NULL_RTX
);
7910 pat
= XEXP (pat
, 0);
7912 pat
= PATTERN (insn
);
7914 switch (GET_CODE (pat
))
7917 process_set (asm_out_file
, pat
);
7923 int limit
= XVECLEN (pat
, 0);
7924 for (par_index
= 0; par_index
< limit
; par_index
++)
7926 rtx x
= XVECEXP (pat
, 0, par_index
);
7927 if (GET_CODE (x
) == SET
)
7928 process_set (asm_out_file
, x
);
7941 ia64_init_builtins ()
7943 tree psi_type_node
= build_pointer_type (integer_type_node
);
7944 tree pdi_type_node
= build_pointer_type (long_integer_type_node
);
7946 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7947 tree si_ftype_psi_si_si
7948 = build_function_type_list (integer_type_node
,
7949 psi_type_node
, integer_type_node
,
7950 integer_type_node
, NULL_TREE
);
7952 /* __sync_val_compare_and_swap_di */
7953 tree di_ftype_pdi_di_di
7954 = build_function_type_list (long_integer_type_node
,
7955 pdi_type_node
, long_integer_type_node
,
7956 long_integer_type_node
, NULL_TREE
);
7957 /* __sync_bool_compare_and_swap_di */
7958 tree si_ftype_pdi_di_di
7959 = build_function_type_list (integer_type_node
,
7960 pdi_type_node
, long_integer_type_node
,
7961 long_integer_type_node
, NULL_TREE
);
7962 /* __sync_synchronize */
7963 tree void_ftype_void
7964 = build_function_type (void_type_node
, void_list_node
);
7966 /* __sync_lock_test_and_set_si */
7967 tree si_ftype_psi_si
7968 = build_function_type_list (integer_type_node
,
7969 psi_type_node
, integer_type_node
, NULL_TREE
);
7971 /* __sync_lock_test_and_set_di */
7972 tree di_ftype_pdi_di
7973 = build_function_type_list (long_integer_type_node
,
7974 pdi_type_node
, long_integer_type_node
,
7977 /* __sync_lock_release_si */
7979 = build_function_type_list (void_type_node
, psi_type_node
, NULL_TREE
);
7981 /* __sync_lock_release_di */
7983 = build_function_type_list (void_type_node
, pdi_type_node
, NULL_TREE
);
7985 #define def_builtin(name, type, code) \
7986 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE)
7988 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si
,
7989 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
);
7990 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di
,
7991 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
);
7992 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si
,
7993 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
);
7994 def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di
,
7995 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
);
7997 def_builtin ("__sync_synchronize", void_ftype_void
,
7998 IA64_BUILTIN_SYNCHRONIZE
);
8000 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si
,
8001 IA64_BUILTIN_LOCK_TEST_AND_SET_SI
);
8002 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di
,
8003 IA64_BUILTIN_LOCK_TEST_AND_SET_DI
);
8004 def_builtin ("__sync_lock_release_si", void_ftype_psi
,
8005 IA64_BUILTIN_LOCK_RELEASE_SI
);
8006 def_builtin ("__sync_lock_release_di", void_ftype_pdi
,
8007 IA64_BUILTIN_LOCK_RELEASE_DI
);
8009 def_builtin ("__builtin_ia64_bsp",
8010 build_function_type (ptr_type_node
, void_list_node
),
8013 def_builtin ("__builtin_ia64_flushrs",
8014 build_function_type (void_type_node
, void_list_node
),
8015 IA64_BUILTIN_FLUSHRS
);
8017 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si
,
8018 IA64_BUILTIN_FETCH_AND_ADD_SI
);
8019 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si
,
8020 IA64_BUILTIN_FETCH_AND_SUB_SI
);
8021 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si
,
8022 IA64_BUILTIN_FETCH_AND_OR_SI
);
8023 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si
,
8024 IA64_BUILTIN_FETCH_AND_AND_SI
);
8025 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si
,
8026 IA64_BUILTIN_FETCH_AND_XOR_SI
);
8027 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si
,
8028 IA64_BUILTIN_FETCH_AND_NAND_SI
);
8030 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si
,
8031 IA64_BUILTIN_ADD_AND_FETCH_SI
);
8032 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si
,
8033 IA64_BUILTIN_SUB_AND_FETCH_SI
);
8034 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si
,
8035 IA64_BUILTIN_OR_AND_FETCH_SI
);
8036 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si
,
8037 IA64_BUILTIN_AND_AND_FETCH_SI
);
8038 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si
,
8039 IA64_BUILTIN_XOR_AND_FETCH_SI
);
8040 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si
,
8041 IA64_BUILTIN_NAND_AND_FETCH_SI
);
8043 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di
,
8044 IA64_BUILTIN_FETCH_AND_ADD_DI
);
8045 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di
,
8046 IA64_BUILTIN_FETCH_AND_SUB_DI
);
8047 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di
,
8048 IA64_BUILTIN_FETCH_AND_OR_DI
);
8049 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di
,
8050 IA64_BUILTIN_FETCH_AND_AND_DI
);
8051 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di
,
8052 IA64_BUILTIN_FETCH_AND_XOR_DI
);
8053 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di
,
8054 IA64_BUILTIN_FETCH_AND_NAND_DI
);
8056 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di
,
8057 IA64_BUILTIN_ADD_AND_FETCH_DI
);
8058 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di
,
8059 IA64_BUILTIN_SUB_AND_FETCH_DI
);
8060 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di
,
8061 IA64_BUILTIN_OR_AND_FETCH_DI
);
8062 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di
,
8063 IA64_BUILTIN_AND_AND_FETCH_DI
);
8064 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di
,
8065 IA64_BUILTIN_XOR_AND_FETCH_DI
);
8066 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di
,
8067 IA64_BUILTIN_NAND_AND_FETCH_DI
);
8072 /* Expand fetch_and_op intrinsics. The basic code sequence is:
8080 cmpxchgsz.acq tmp = [ptr], tmp
8081 } while (tmp != ret)
8085 ia64_expand_fetch_and_op (binoptab
, mode
, arglist
, target
)
8087 enum machine_mode mode
;
8091 rtx ret
, label
, tmp
, ccv
, insn
, mem
, value
;
8094 arg0
= TREE_VALUE (arglist
);
8095 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8096 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
8097 #ifdef POINTERS_EXTEND_UNSIGNED
8098 if (GET_MODE(mem
) != Pmode
)
8099 mem
= convert_memory_address (Pmode
, mem
);
8101 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
8103 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
8104 MEM_VOLATILE_P (mem
) = 1;
8106 if (target
&& register_operand (target
, mode
))
8109 ret
= gen_reg_rtx (mode
);
8111 emit_insn (gen_mf ());
8113 /* Special case for fetchadd instructions. */
8114 if (binoptab
== add_optab
&& fetchadd_operand (value
, VOIDmode
))
8117 insn
= gen_fetchadd_acq_si (ret
, mem
, value
);
8119 insn
= gen_fetchadd_acq_di (ret
, mem
, value
);
8124 tmp
= gen_reg_rtx (mode
);
8125 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
8126 emit_move_insn (tmp
, mem
);
8128 label
= gen_label_rtx ();
8130 emit_move_insn (ret
, tmp
);
8131 emit_move_insn (ccv
, tmp
);
8133 /* Perform the specific operation. Special case NAND by noticing
8134 one_cmpl_optab instead. */
8135 if (binoptab
== one_cmpl_optab
)
8137 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
8138 binoptab
= and_optab
;
8140 tmp
= expand_binop (mode
, binoptab
, tmp
, value
, tmp
, 1, OPTAB_WIDEN
);
8143 insn
= gen_cmpxchg_acq_si (tmp
, mem
, tmp
, ccv
);
8145 insn
= gen_cmpxchg_acq_di (tmp
, mem
, tmp
, ccv
);
8148 emit_cmp_and_jump_insns (tmp
, ret
, NE
, 0, mode
, 1, label
);
8153 /* Expand op_and_fetch intrinsics. The basic code sequence is:
8160 ret = tmp <op> value;
8161 cmpxchgsz.acq tmp = [ptr], ret
8162 } while (tmp != old)
8166 ia64_expand_op_and_fetch (binoptab
, mode
, arglist
, target
)
8168 enum machine_mode mode
;
8172 rtx old
, label
, tmp
, ret
, ccv
, insn
, mem
, value
;
8175 arg0
= TREE_VALUE (arglist
);
8176 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8177 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
8178 #ifdef POINTERS_EXTEND_UNSIGNED
8179 if (GET_MODE(mem
) != Pmode
)
8180 mem
= convert_memory_address (Pmode
, mem
);
8183 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
8185 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
8186 MEM_VOLATILE_P (mem
) = 1;
8188 if (target
&& ! register_operand (target
, mode
))
8191 emit_insn (gen_mf ());
8192 tmp
= gen_reg_rtx (mode
);
8193 old
= gen_reg_rtx (mode
);
8194 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
8196 emit_move_insn (tmp
, mem
);
8198 label
= gen_label_rtx ();
8200 emit_move_insn (old
, tmp
);
8201 emit_move_insn (ccv
, tmp
);
8203 /* Perform the specific operation. Special case NAND by noticing
8204 one_cmpl_optab instead. */
8205 if (binoptab
== one_cmpl_optab
)
8207 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
8208 binoptab
= and_optab
;
8210 ret
= expand_binop (mode
, binoptab
, tmp
, value
, target
, 1, OPTAB_WIDEN
);
8213 insn
= gen_cmpxchg_acq_si (tmp
, mem
, ret
, ccv
);
8215 insn
= gen_cmpxchg_acq_di (tmp
, mem
, ret
, ccv
);
8218 emit_cmp_and_jump_insns (tmp
, old
, NE
, 0, mode
, 1, label
);
8223 /* Expand val_ and bool_compare_and_swap. For val_ we want:
8227 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
8230 For bool_ it's the same except return ret == oldval.
8234 ia64_expand_compare_and_swap (rmode
, mode
, boolp
, arglist
, target
)
8235 enum machine_mode rmode
;
8236 enum machine_mode mode
;
8241 tree arg0
, arg1
, arg2
;
8242 rtx mem
, old
, new, ccv
, tmp
, insn
;
8244 arg0
= TREE_VALUE (arglist
);
8245 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8246 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
8247 mem
= expand_expr (arg0
, NULL_RTX
, ptr_mode
, 0);
8248 old
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
8249 new = expand_expr (arg2
, NULL_RTX
, mode
, 0);
8251 mem
= gen_rtx_MEM (mode
, force_reg (ptr_mode
, mem
));
8252 MEM_VOLATILE_P (mem
) = 1;
8254 if (! register_operand (old
, mode
))
8255 old
= copy_to_mode_reg (mode
, old
);
8256 if (! register_operand (new, mode
))
8257 new = copy_to_mode_reg (mode
, new);
8259 if (! boolp
&& target
&& register_operand (target
, mode
))
8262 tmp
= gen_reg_rtx (mode
);
8264 ccv
= gen_rtx_REG (DImode
, AR_CCV_REGNUM
);
8266 emit_move_insn (ccv
, old
);
8269 rtx ccvtmp
= gen_reg_rtx (DImode
);
8270 emit_insn (gen_zero_extendsidi2 (ccvtmp
, old
));
8271 emit_move_insn (ccv
, ccvtmp
);
8273 emit_insn (gen_mf ());
8275 insn
= gen_cmpxchg_acq_si (tmp
, mem
, new, ccv
);
8277 insn
= gen_cmpxchg_acq_di (tmp
, mem
, new, ccv
);
8283 target
= gen_reg_rtx (rmode
);
8284 return emit_store_flag_force (target
, EQ
, tmp
, old
, mode
, 1, 1);
8290 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
8293 ia64_expand_lock_test_and_set (mode
, arglist
, target
)
8294 enum machine_mode mode
;
8299 rtx mem
, new, ret
, insn
;
8301 arg0
= TREE_VALUE (arglist
);
8302 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8303 mem
= expand_expr (arg0
, NULL_RTX
, ptr_mode
, 0);
8304 new = expand_expr (arg1
, NULL_RTX
, mode
, 0);
8306 mem
= gen_rtx_MEM (mode
, force_reg (ptr_mode
, mem
));
8307 MEM_VOLATILE_P (mem
) = 1;
8308 if (! register_operand (new, mode
))
8309 new = copy_to_mode_reg (mode
, new);
8311 if (target
&& register_operand (target
, mode
))
8314 ret
= gen_reg_rtx (mode
);
8317 insn
= gen_xchgsi (ret
, mem
, new);
8319 insn
= gen_xchgdi (ret
, mem
, new);
8325 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
8328 ia64_expand_lock_release (mode
, arglist
, target
)
8329 enum machine_mode mode
;
8331 rtx target ATTRIBUTE_UNUSED
;
8336 arg0
= TREE_VALUE (arglist
);
8337 mem
= expand_expr (arg0
, NULL_RTX
, ptr_mode
, 0);
8339 mem
= gen_rtx_MEM (mode
, force_reg (ptr_mode
, mem
));
8340 MEM_VOLATILE_P (mem
) = 1;
8342 emit_move_insn (mem
, const0_rtx
);
8348 ia64_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
8351 rtx subtarget ATTRIBUTE_UNUSED
;
8352 enum machine_mode mode ATTRIBUTE_UNUSED
;
8353 int ignore ATTRIBUTE_UNUSED
;
8355 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
8356 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
8357 tree arglist
= TREE_OPERAND (exp
, 1);
8358 enum machine_mode rmode
= VOIDmode
;
8362 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
8363 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
8368 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
8369 case IA64_BUILTIN_LOCK_RELEASE_SI
:
8370 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
8371 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
8372 case IA64_BUILTIN_FETCH_AND_OR_SI
:
8373 case IA64_BUILTIN_FETCH_AND_AND_SI
:
8374 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
8375 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
8376 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
8377 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
8378 case IA64_BUILTIN_OR_AND_FETCH_SI
:
8379 case IA64_BUILTIN_AND_AND_FETCH_SI
:
8380 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
8381 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
8385 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
8390 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
8395 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
8396 case IA64_BUILTIN_LOCK_RELEASE_DI
:
8397 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
8398 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
8399 case IA64_BUILTIN_FETCH_AND_OR_DI
:
8400 case IA64_BUILTIN_FETCH_AND_AND_DI
:
8401 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
8402 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
8403 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
8404 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
8405 case IA64_BUILTIN_OR_AND_FETCH_DI
:
8406 case IA64_BUILTIN_AND_AND_FETCH_DI
:
8407 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
8408 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
8418 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
8419 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
8420 return ia64_expand_compare_and_swap (rmode
, mode
, 1, arglist
,
8423 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
8424 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
8425 return ia64_expand_compare_and_swap (rmode
, mode
, 0, arglist
,
8428 case IA64_BUILTIN_SYNCHRONIZE
:
8429 emit_insn (gen_mf ());
8432 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
8433 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
8434 return ia64_expand_lock_test_and_set (mode
, arglist
, target
);
8436 case IA64_BUILTIN_LOCK_RELEASE_SI
:
8437 case IA64_BUILTIN_LOCK_RELEASE_DI
:
8438 return ia64_expand_lock_release (mode
, arglist
, target
);
8440 case IA64_BUILTIN_BSP
:
8441 if (! target
|| ! register_operand (target
, DImode
))
8442 target
= gen_reg_rtx (DImode
);
8443 emit_insn (gen_bsp_value (target
));
8444 #ifdef POINTERS_EXTEND_UNSIGNED
8445 target
= convert_memory_address (ptr_mode
, target
);
8449 case IA64_BUILTIN_FLUSHRS
:
8450 emit_insn (gen_flushrs ());
8453 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
8454 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
8455 return ia64_expand_fetch_and_op (add_optab
, mode
, arglist
, target
);
8457 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
8458 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
8459 return ia64_expand_fetch_and_op (sub_optab
, mode
, arglist
, target
);
8461 case IA64_BUILTIN_FETCH_AND_OR_SI
:
8462 case IA64_BUILTIN_FETCH_AND_OR_DI
:
8463 return ia64_expand_fetch_and_op (ior_optab
, mode
, arglist
, target
);
8465 case IA64_BUILTIN_FETCH_AND_AND_SI
:
8466 case IA64_BUILTIN_FETCH_AND_AND_DI
:
8467 return ia64_expand_fetch_and_op (and_optab
, mode
, arglist
, target
);
8469 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
8470 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
8471 return ia64_expand_fetch_and_op (xor_optab
, mode
, arglist
, target
);
8473 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
8474 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
8475 return ia64_expand_fetch_and_op (one_cmpl_optab
, mode
, arglist
, target
);
8477 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
8478 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
8479 return ia64_expand_op_and_fetch (add_optab
, mode
, arglist
, target
);
8481 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
8482 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
8483 return ia64_expand_op_and_fetch (sub_optab
, mode
, arglist
, target
);
8485 case IA64_BUILTIN_OR_AND_FETCH_SI
:
8486 case IA64_BUILTIN_OR_AND_FETCH_DI
:
8487 return ia64_expand_op_and_fetch (ior_optab
, mode
, arglist
, target
);
8489 case IA64_BUILTIN_AND_AND_FETCH_SI
:
8490 case IA64_BUILTIN_AND_AND_FETCH_DI
:
8491 return ia64_expand_op_and_fetch (and_optab
, mode
, arglist
, target
);
8493 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
8494 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
8495 return ia64_expand_op_and_fetch (xor_optab
, mode
, arglist
, target
);
8497 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
8498 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
8499 return ia64_expand_op_and_fetch (one_cmpl_optab
, mode
, arglist
, target
);
8508 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8509 most significant bits of the stack slot. */
8512 ia64_hpux_function_arg_padding (mode
, type
)
8513 enum machine_mode mode
;
8516 /* Exception to normal case for structures/unions/etc. */
8518 if (type
&& AGGREGATE_TYPE_P (type
)
8519 && int_size_in_bytes (type
) < UNITS_PER_WORD
)
8522 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
8523 hardwired to be true. */
8525 return((mode
== BLKmode
8526 ? (type
&& TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
8527 && int_size_in_bytes (type
) < (PARM_BOUNDARY
/ BITS_PER_UNIT
))
8528 : GET_MODE_BITSIZE (mode
) < PARM_BOUNDARY
)
8529 ? downward
: upward
);
8532 /* Linked list of all external functions that are to be emitted by GCC.
8533 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8534 order to avoid putting out names that are never really used. */
8536 struct extern_func_list
8538 struct extern_func_list
*next
; /* next external */
8539 char *name
; /* name of the external */
8540 } *extern_func_head
= 0;
8543 ia64_hpux_add_extern_decl (name
)
8546 struct extern_func_list
*p
;
8548 p
= (struct extern_func_list
*) xmalloc (sizeof (struct extern_func_list
));
8549 p
->name
= xmalloc (strlen (name
) + 1);
8550 strcpy(p
->name
, name
);
8551 p
->next
= extern_func_head
;
8552 extern_func_head
= p
;
8555 /* Print out the list of used global functions. */
8558 ia64_hpux_file_end ()
8560 while (extern_func_head
)
8562 const char *real_name
;
8565 real_name
= (* targetm
.strip_name_encoding
) (extern_func_head
->name
);
8566 decl
= maybe_get_identifier (real_name
);
8569 || (! TREE_ASM_WRITTEN (decl
) && TREE_SYMBOL_REFERENCED (decl
)))
8572 TREE_ASM_WRITTEN (decl
) = 1;
8573 (*targetm
.asm_out
.globalize_label
) (asm_out_file
,
8574 extern_func_head
->name
);
8575 fputs (TYPE_ASM_OP
, asm_out_file
);
8576 assemble_name (asm_out_file
, extern_func_head
->name
);
8577 putc (',', asm_out_file
);
8578 fprintf (asm_out_file
, TYPE_OPERAND_FMT
, "function");
8579 putc ('\n', asm_out_file
);
8581 extern_func_head
= extern_func_head
->next
;
8586 /* Switch to the section to which we should output X. The only thing
8587 special we do here is to honor small data. */
8590 ia64_select_rtx_section (mode
, x
, align
)
8591 enum machine_mode mode
;
8593 unsigned HOST_WIDE_INT align
;
8595 if (GET_MODE_SIZE (mode
) > 0
8596 && GET_MODE_SIZE (mode
) <= ia64_section_threshold
)
8599 default_elf_select_rtx_section (mode
, x
, align
);
8602 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8603 Pretend flag_pic is always set. */
8606 ia64_rwreloc_select_section (exp
, reloc
, align
)
8609 unsigned HOST_WIDE_INT align
;
8611 default_elf_select_section_1 (exp
, reloc
, align
, true);
8615 ia64_rwreloc_unique_section (decl
, reloc
)
8619 default_unique_section_1 (decl
, reloc
, true);
8623 ia64_rwreloc_select_rtx_section (mode
, x
, align
)
8624 enum machine_mode mode
;
8626 unsigned HOST_WIDE_INT align
;
8628 int save_pic
= flag_pic
;
8630 ia64_select_rtx_section (mode
, x
, align
);
8631 flag_pic
= save_pic
;
8635 ia64_rwreloc_section_type_flags (decl
, name
, reloc
)
8640 return default_section_type_flags_1 (decl
, name
, reloc
, true);
8644 /* Output the assembler code for a thunk function. THUNK_DECL is the
8645 declaration for the thunk function itself, FUNCTION is the decl for
8646 the target function. DELTA is an immediate constant offset to be
8647 added to THIS. If VCALL_OFFSET is nonzero, the word at
8648 *(*this + vcall_offset) should be added to THIS. */
8651 ia64_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
)
8653 tree thunk ATTRIBUTE_UNUSED
;
8654 HOST_WIDE_INT delta
;
8655 HOST_WIDE_INT vcall_offset
;
8658 rtx
this, insn
, funexp
;
8660 reload_completed
= 1;
8661 epilogue_completed
= 1;
8664 /* Set things up as ia64_expand_prologue might. */
8665 last_scratch_gr_reg
= 15;
8667 memset (¤t_frame_info
, 0, sizeof (current_frame_info
));
8668 current_frame_info
.spill_cfa_off
= -16;
8669 current_frame_info
.n_input_regs
= 1;
8670 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
8672 if (!TARGET_REG_NAMES
)
8673 reg_names
[IN_REG (0)] = ia64_reg_numbers
[0];
8675 /* Mark the end of the (empty) prologue. */
8676 emit_note (NOTE_INSN_PROLOGUE_END
);
8678 this = gen_rtx_REG (Pmode
, IN_REG (0));
8680 /* Apply the constant offset, if required. */
8683 rtx delta_rtx
= GEN_INT (delta
);
8685 if (!CONST_OK_FOR_I (delta
))
8687 rtx tmp
= gen_rtx_REG (Pmode
, 2);
8688 emit_move_insn (tmp
, delta_rtx
);
8691 emit_insn (gen_adddi3 (this, this, delta_rtx
));
8694 /* Apply the offset from the vtable, if required. */
8697 rtx vcall_offset_rtx
= GEN_INT (vcall_offset
);
8698 rtx tmp
= gen_rtx_REG (Pmode
, 2);
8700 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, this));
8702 if (!CONST_OK_FOR_J (vcall_offset
))
8704 rtx tmp2
= gen_rtx_REG (Pmode
, next_scratch_gr_reg ());
8705 emit_move_insn (tmp2
, vcall_offset_rtx
);
8706 vcall_offset_rtx
= tmp2
;
8708 emit_insn (gen_adddi3 (tmp
, tmp
, vcall_offset_rtx
));
8710 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, tmp
));
8712 emit_insn (gen_adddi3 (this, this, tmp
));
8715 /* Generate a tail call to the target function. */
8716 if (! TREE_USED (function
))
8718 assemble_external (function
);
8719 TREE_USED (function
) = 1;
8721 funexp
= XEXP (DECL_RTL (function
), 0);
8722 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
8723 ia64_expand_call (NULL_RTX
, funexp
, NULL_RTX
, 1);
8724 insn
= get_last_insn ();
8725 SIBLING_CALL_P (insn
) = 1;
8727 /* Code generation for calls relies on splitting. */
8728 reload_completed
= 1;
8729 epilogue_completed
= 1;
8730 try_split (PATTERN (insn
), insn
, 0);
8734 /* Run just enough of rest_of_compilation to get the insns emitted.
8735 There's not really enough bulk here to make other passes such as
8736 instruction scheduling worth while. Note that use_thunk calls
8737 assemble_start_function and assemble_end_function. */
8739 insn_locators_initialize ();
8740 emit_all_insn_group_barriers (NULL
);
8741 insn
= get_insns ();
8742 shorten_branches (insn
);
8743 final_start_function (insn
, file
, 1);
8744 final (insn
, file
, 1, 0);
8745 final_end_function ();
8747 reload_completed
= 0;
8748 epilogue_completed
= 0;
8752 #include "gt-ia64.h"