* basic-block.h (ei_safe_edge): New function.
[official-gcc.git] / gcc / config / ia64 / ia64.c
blob9ffb2cb8d1167cfe6651bd6d35d6cf7c731fcfeb
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson <wilson@cygnus.com> and
5 David Mosberger <davidm@hpl.hp.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "regs.h"
31 #include "hard-reg-set.h"
32 #include "real.h"
33 #include "insn-config.h"
34 #include "conditions.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "recog.h"
39 #include "expr.h"
40 #include "optabs.h"
41 #include "except.h"
42 #include "function.h"
43 #include "ggc.h"
44 #include "basic-block.h"
45 #include "toplev.h"
46 #include "sched-int.h"
47 #include "timevar.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "tm_p.h"
51 #include "hashtab.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
54 #include "tree-gimple.h"
56 /* This is used for communication between ASM_OUTPUT_LABEL and
57 ASM_OUTPUT_LABELREF. */
58 int ia64_asm_output_label = 0;
60 /* Define the information needed to generate branch and scc insns. This is
61 stored from the compare operation. */
62 struct rtx_def * ia64_compare_op0;
63 struct rtx_def * ia64_compare_op1;
65 /* Register names for ia64_expand_prologue. */
66 static const char * const ia64_reg_numbers[96] =
67 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
68 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
69 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
70 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
71 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
72 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
73 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
74 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
75 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
76 "r104","r105","r106","r107","r108","r109","r110","r111",
77 "r112","r113","r114","r115","r116","r117","r118","r119",
78 "r120","r121","r122","r123","r124","r125","r126","r127"};
80 /* ??? These strings could be shared with REGISTER_NAMES. */
81 static const char * const ia64_input_reg_names[8] =
82 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
84 /* ??? These strings could be shared with REGISTER_NAMES. */
85 static const char * const ia64_local_reg_names[80] =
86 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
87 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
88 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
89 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
90 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
91 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
92 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
93 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
94 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
95 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
97 /* ??? These strings could be shared with REGISTER_NAMES. */
98 static const char * const ia64_output_reg_names[8] =
99 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
101 /* String used with the -mfixed-range= option. */
102 const char *ia64_fixed_range_string;
104 /* Determines whether we use adds, addl, or movl to generate our
105 TLS immediate offsets. */
106 int ia64_tls_size = 22;
108 /* String used with the -mtls-size= option. */
109 const char *ia64_tls_size_string;
111 /* Which cpu are we scheduling for. */
112 enum processor_type ia64_tune;
114 /* String used with the -tune= option. */
115 const char *ia64_tune_string;
117 /* Determines whether we run our final scheduling pass or not. We always
118 avoid the normal second scheduling pass. */
119 static int ia64_flag_schedule_insns2;
121 /* Determines whether we run variable tracking in machine dependent
122 reorganization. */
123 static int ia64_flag_var_tracking;
125 /* Variables which are this size or smaller are put in the sdata/sbss
126 sections. */
128 unsigned int ia64_section_threshold;
130 /* The following variable is used by the DFA insn scheduler. The value is
131 TRUE if we do insn bundling instead of insn scheduling. */
132 int bundling_p = 0;
134 /* Structure to be filled in by ia64_compute_frame_size with register
135 save masks and offsets for the current function. */
137 struct ia64_frame_info
139 HOST_WIDE_INT total_size; /* size of the stack frame, not including
140 the caller's scratch area. */
141 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
142 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
143 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
144 HARD_REG_SET mask; /* mask of saved registers. */
145 unsigned int gr_used_mask; /* mask of registers in use as gr spill
146 registers or long-term scratches. */
147 int n_spilled; /* number of spilled registers. */
148 int reg_fp; /* register for fp. */
149 int reg_save_b0; /* save register for b0. */
150 int reg_save_pr; /* save register for prs. */
151 int reg_save_ar_pfs; /* save register for ar.pfs. */
152 int reg_save_ar_unat; /* save register for ar.unat. */
153 int reg_save_ar_lc; /* save register for ar.lc. */
154 int reg_save_gp; /* save register for gp. */
155 int n_input_regs; /* number of input registers used. */
156 int n_local_regs; /* number of local registers used. */
157 int n_output_regs; /* number of output registers used. */
158 int n_rotate_regs; /* number of rotating registers used. */
160 char need_regstk; /* true if a .regstk directive needed. */
161 char initialized; /* true if the data is finalized. */
164 /* Current frame information calculated by ia64_compute_frame_size. */
165 static struct ia64_frame_info current_frame_info;
167 static int ia64_first_cycle_multipass_dfa_lookahead (void);
168 static void ia64_dependencies_evaluation_hook (rtx, rtx);
169 static void ia64_init_dfa_pre_cycle_insn (void);
170 static rtx ia64_dfa_pre_cycle_insn (void);
171 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
172 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
173 static rtx gen_tls_get_addr (void);
174 static rtx gen_thread_pointer (void);
175 static rtx ia64_expand_tls_address (enum tls_model, rtx, rtx);
176 static int find_gr_spill (int);
177 static int next_scratch_gr_reg (void);
178 static void mark_reg_gr_used_mask (rtx, void *);
179 static void ia64_compute_frame_size (HOST_WIDE_INT);
180 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
181 static void finish_spill_pointers (void);
182 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
183 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
184 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
185 static rtx gen_movdi_x (rtx, rtx, rtx);
186 static rtx gen_fr_spill_x (rtx, rtx, rtx);
187 static rtx gen_fr_restore_x (rtx, rtx, rtx);
189 static enum machine_mode hfa_element_mode (tree, int);
190 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
191 tree, int *, int);
192 static bool ia64_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
193 tree, bool);
194 static bool ia64_function_ok_for_sibcall (tree, tree);
195 static bool ia64_return_in_memory (tree, tree);
196 static bool ia64_rtx_costs (rtx, int, int, int *);
197 static void fix_range (const char *);
198 static struct machine_function * ia64_init_machine_status (void);
199 static void emit_insn_group_barriers (FILE *);
200 static void emit_all_insn_group_barriers (FILE *);
201 static void final_emit_insn_group_barriers (FILE *);
202 static void emit_predicate_relation_info (void);
203 static void ia64_reorg (void);
204 static bool ia64_in_small_data_p (tree);
205 static void process_epilogue (void);
206 static int process_set (FILE *, rtx);
208 static rtx ia64_expand_fetch_and_op (optab, enum machine_mode, tree, rtx);
209 static rtx ia64_expand_op_and_fetch (optab, enum machine_mode, tree, rtx);
210 static rtx ia64_expand_compare_and_swap (enum machine_mode, enum machine_mode,
211 int, tree, rtx);
212 static rtx ia64_expand_lock_test_and_set (enum machine_mode, tree, rtx);
213 static rtx ia64_expand_lock_release (enum machine_mode, tree, rtx);
214 static bool ia64_assemble_integer (rtx, unsigned int, int);
215 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
216 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
217 static void ia64_output_function_end_prologue (FILE *);
219 static int ia64_issue_rate (void);
220 static int ia64_adjust_cost (rtx, rtx, rtx, int);
221 static void ia64_sched_init (FILE *, int, int);
222 static void ia64_sched_finish (FILE *, int);
223 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
224 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
225 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
226 static int ia64_variable_issue (FILE *, int, rtx, int);
228 static struct bundle_state *get_free_bundle_state (void);
229 static void free_bundle_state (struct bundle_state *);
230 static void initiate_bundle_states (void);
231 static void finish_bundle_states (void);
232 static unsigned bundle_state_hash (const void *);
233 static int bundle_state_eq_p (const void *, const void *);
234 static int insert_bundle_state (struct bundle_state *);
235 static void initiate_bundle_state_table (void);
236 static void finish_bundle_state_table (void);
237 static int try_issue_nops (struct bundle_state *, int);
238 static int try_issue_insn (struct bundle_state *, rtx);
239 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
240 static int get_max_pos (state_t);
241 static int get_template (state_t, int);
243 static rtx get_next_important_insn (rtx, rtx);
244 static void bundling (FILE *, int, rtx, rtx);
246 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
247 HOST_WIDE_INT, tree);
248 static void ia64_file_start (void);
250 static void ia64_select_rtx_section (enum machine_mode, rtx,
251 unsigned HOST_WIDE_INT);
252 static void ia64_rwreloc_select_section (tree, int, unsigned HOST_WIDE_INT)
253 ATTRIBUTE_UNUSED;
254 static void ia64_rwreloc_unique_section (tree, int)
255 ATTRIBUTE_UNUSED;
256 static void ia64_rwreloc_select_rtx_section (enum machine_mode, rtx,
257 unsigned HOST_WIDE_INT)
258 ATTRIBUTE_UNUSED;
259 static unsigned int ia64_rwreloc_section_type_flags (tree, const char *, int)
260 ATTRIBUTE_UNUSED;
262 static void ia64_hpux_add_extern_decl (tree decl)
263 ATTRIBUTE_UNUSED;
264 static void ia64_hpux_file_end (void)
265 ATTRIBUTE_UNUSED;
266 static void ia64_init_libfuncs (void)
267 ATTRIBUTE_UNUSED;
268 static void ia64_hpux_init_libfuncs (void)
269 ATTRIBUTE_UNUSED;
270 static void ia64_sysv4_init_libfuncs (void)
271 ATTRIBUTE_UNUSED;
272 static void ia64_vms_init_libfuncs (void)
273 ATTRIBUTE_UNUSED;
275 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
276 static void ia64_encode_section_info (tree, rtx, int);
277 static rtx ia64_struct_value_rtx (tree, int);
278 static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
281 /* Table of valid machine attributes. */
282 static const struct attribute_spec ia64_attribute_table[] =
284 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
285 { "syscall_linkage", 0, 0, false, true, true, NULL },
286 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
287 { NULL, 0, 0, false, false, false, NULL }
290 /* Initialize the GCC target structure. */
291 #undef TARGET_ATTRIBUTE_TABLE
292 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
294 #undef TARGET_INIT_BUILTINS
295 #define TARGET_INIT_BUILTINS ia64_init_builtins
297 #undef TARGET_EXPAND_BUILTIN
298 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
300 #undef TARGET_ASM_BYTE_OP
301 #define TARGET_ASM_BYTE_OP "\tdata1\t"
302 #undef TARGET_ASM_ALIGNED_HI_OP
303 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
304 #undef TARGET_ASM_ALIGNED_SI_OP
305 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
306 #undef TARGET_ASM_ALIGNED_DI_OP
307 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
308 #undef TARGET_ASM_UNALIGNED_HI_OP
309 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
310 #undef TARGET_ASM_UNALIGNED_SI_OP
311 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
312 #undef TARGET_ASM_UNALIGNED_DI_OP
313 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
314 #undef TARGET_ASM_INTEGER
315 #define TARGET_ASM_INTEGER ia64_assemble_integer
317 #undef TARGET_ASM_FUNCTION_PROLOGUE
318 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
319 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
320 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
321 #undef TARGET_ASM_FUNCTION_EPILOGUE
322 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
324 #undef TARGET_IN_SMALL_DATA_P
325 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
327 #undef TARGET_SCHED_ADJUST_COST
328 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
329 #undef TARGET_SCHED_ISSUE_RATE
330 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
331 #undef TARGET_SCHED_VARIABLE_ISSUE
332 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
333 #undef TARGET_SCHED_INIT
334 #define TARGET_SCHED_INIT ia64_sched_init
335 #undef TARGET_SCHED_FINISH
336 #define TARGET_SCHED_FINISH ia64_sched_finish
337 #undef TARGET_SCHED_REORDER
338 #define TARGET_SCHED_REORDER ia64_sched_reorder
339 #undef TARGET_SCHED_REORDER2
340 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
342 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
343 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
345 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
346 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
348 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
349 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
350 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
351 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
353 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
354 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
355 ia64_first_cycle_multipass_dfa_lookahead_guard
357 #undef TARGET_SCHED_DFA_NEW_CYCLE
358 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
360 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
361 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
362 #undef TARGET_PASS_BY_REFERENCE
363 #define TARGET_PASS_BY_REFERENCE ia64_pass_by_reference
365 #undef TARGET_ASM_OUTPUT_MI_THUNK
366 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
367 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
368 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
370 #undef TARGET_ASM_FILE_START
371 #define TARGET_ASM_FILE_START ia64_file_start
373 #undef TARGET_RTX_COSTS
374 #define TARGET_RTX_COSTS ia64_rtx_costs
375 #undef TARGET_ADDRESS_COST
376 #define TARGET_ADDRESS_COST hook_int_rtx_0
378 #undef TARGET_MACHINE_DEPENDENT_REORG
379 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
381 #undef TARGET_ENCODE_SECTION_INFO
382 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
384 /* ??? ABI doesn't allow us to define this. */
385 #if 0
386 #undef TARGET_PROMOTE_FUNCTION_ARGS
387 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
388 #endif
390 /* ??? ABI doesn't allow us to define this. */
391 #if 0
392 #undef TARGET_PROMOTE_FUNCTION_RETURN
393 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
394 #endif
396 /* ??? Investigate. */
397 #if 0
398 #undef TARGET_PROMOTE_PROTOTYPES
399 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
400 #endif
402 #undef TARGET_STRUCT_VALUE_RTX
403 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
404 #undef TARGET_RETURN_IN_MEMORY
405 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
406 #undef TARGET_SETUP_INCOMING_VARARGS
407 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
408 #undef TARGET_STRICT_ARGUMENT_NAMING
409 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
410 #undef TARGET_MUST_PASS_IN_STACK
411 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
413 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
414 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
416 #undef TARGET_UNWIND_EMIT
417 #define TARGET_UNWIND_EMIT process_for_unwind_directive
419 struct gcc_target targetm = TARGET_INITIALIZER;
421 typedef enum
423 ADDR_AREA_NORMAL, /* normal address area */
424 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
426 ia64_addr_area;
428 static GTY(()) tree small_ident1;
429 static GTY(()) tree small_ident2;
431 static void
432 init_idents (void)
434 if (small_ident1 == 0)
436 small_ident1 = get_identifier ("small");
437 small_ident2 = get_identifier ("__small__");
441 /* Retrieve the address area that has been chosen for the given decl. */
443 static ia64_addr_area
444 ia64_get_addr_area (tree decl)
446 tree model_attr;
448 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
449 if (model_attr)
451 tree id;
453 init_idents ();
454 id = TREE_VALUE (TREE_VALUE (model_attr));
455 if (id == small_ident1 || id == small_ident2)
456 return ADDR_AREA_SMALL;
458 return ADDR_AREA_NORMAL;
461 static tree
462 ia64_handle_model_attribute (tree *node, tree name, tree args, int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
464 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
465 ia64_addr_area area;
466 tree arg, decl = *node;
468 init_idents ();
469 arg = TREE_VALUE (args);
470 if (arg == small_ident1 || arg == small_ident2)
472 addr_area = ADDR_AREA_SMALL;
474 else
476 warning ("invalid argument of `%s' attribute",
477 IDENTIFIER_POINTER (name));
478 *no_add_attrs = true;
481 switch (TREE_CODE (decl))
483 case VAR_DECL:
484 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
485 == FUNCTION_DECL)
486 && !TREE_STATIC (decl))
488 error ("%Jan address area attribute cannot be specified for "
489 "local variables", decl, decl);
490 *no_add_attrs = true;
492 area = ia64_get_addr_area (decl);
493 if (area != ADDR_AREA_NORMAL && addr_area != area)
495 error ("%Jaddress area of '%s' conflicts with previous "
496 "declaration", decl, decl);
497 *no_add_attrs = true;
499 break;
501 case FUNCTION_DECL:
502 error ("%Jaddress area attribute cannot be specified for functions",
503 decl, decl);
504 *no_add_attrs = true;
505 break;
507 default:
508 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
509 *no_add_attrs = true;
510 break;
513 return NULL_TREE;
516 static void
517 ia64_encode_addr_area (tree decl, rtx symbol)
519 int flags;
521 flags = SYMBOL_REF_FLAGS (symbol);
522 switch (ia64_get_addr_area (decl))
524 case ADDR_AREA_NORMAL: break;
525 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
526 default: abort ();
528 SYMBOL_REF_FLAGS (symbol) = flags;
531 static void
532 ia64_encode_section_info (tree decl, rtx rtl, int first)
534 default_encode_section_info (decl, rtl, first);
536 /* Careful not to prod global register variables. */
537 if (TREE_CODE (decl) == VAR_DECL
538 && GET_CODE (DECL_RTL (decl)) == MEM
539 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
540 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
541 ia64_encode_addr_area (decl, XEXP (rtl, 0));
544 /* Return 1 if the operands of a move are ok. */
547 ia64_move_ok (rtx dst, rtx src)
549 /* If we're under init_recog_no_volatile, we'll not be able to use
550 memory_operand. So check the code directly and don't worry about
551 the validity of the underlying address, which should have been
552 checked elsewhere anyway. */
553 if (GET_CODE (dst) != MEM)
554 return 1;
555 if (GET_CODE (src) == MEM)
556 return 0;
557 if (register_operand (src, VOIDmode))
558 return 1;
560 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
561 if (INTEGRAL_MODE_P (GET_MODE (dst)))
562 return src == const0_rtx;
563 else
564 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
568 addp4_optimize_ok (rtx op1, rtx op2)
570 return (basereg_operand (op1, GET_MODE(op1)) !=
571 basereg_operand (op2, GET_MODE(op2)));
574 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
575 Return the length of the field, or <= 0 on failure. */
578 ia64_depz_field_mask (rtx rop, rtx rshift)
580 unsigned HOST_WIDE_INT op = INTVAL (rop);
581 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
583 /* Get rid of the zero bits we're shifting in. */
584 op >>= shift;
586 /* We must now have a solid block of 1's at bit 0. */
587 return exact_log2 (op + 1);
590 /* Expand a symbolic constant load. */
592 void
593 ia64_expand_load_address (rtx dest, rtx src)
595 if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (src))
596 abort ();
597 if (GET_CODE (dest) != REG)
598 abort ();
600 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
601 having to pointer-extend the value afterward. Other forms of address
602 computation below are also more natural to compute as 64-bit quantities.
603 If we've been given an SImode destination register, change it. */
604 if (GET_MODE (dest) != Pmode)
605 dest = gen_rtx_REG (Pmode, REGNO (dest));
607 if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_SMALL_ADDR_P (src))
609 emit_insn (gen_rtx_SET (VOIDmode, dest, src));
610 return;
612 else if (TARGET_AUTO_PIC)
614 emit_insn (gen_load_gprel64 (dest, src));
615 return;
617 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
619 emit_insn (gen_load_fptr (dest, src));
620 return;
622 else if (sdata_symbolic_operand (src, VOIDmode))
624 emit_insn (gen_load_gprel (dest, src));
625 return;
628 if (GET_CODE (src) == CONST
629 && GET_CODE (XEXP (src, 0)) == PLUS
630 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
631 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x3fff) != 0)
633 rtx sym = XEXP (XEXP (src, 0), 0);
634 HOST_WIDE_INT ofs, hi, lo;
636 /* Split the offset into a sign extended 14-bit low part
637 and a complementary high part. */
638 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
639 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
640 hi = ofs - lo;
642 ia64_expand_load_address (dest, plus_constant (sym, hi));
643 emit_insn (gen_adddi3 (dest, dest, GEN_INT (lo)));
645 else
647 rtx tmp;
649 tmp = gen_rtx_HIGH (Pmode, src);
650 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
651 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
653 tmp = gen_rtx_LO_SUM (GET_MODE (dest), dest, src);
654 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
658 static GTY(()) rtx gen_tls_tga;
659 static rtx
660 gen_tls_get_addr (void)
662 if (!gen_tls_tga)
663 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
664 return gen_tls_tga;
667 static GTY(()) rtx thread_pointer_rtx;
668 static rtx
669 gen_thread_pointer (void)
671 if (!thread_pointer_rtx)
672 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
673 return thread_pointer_rtx;
676 static rtx
677 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1)
679 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
680 rtx orig_op0 = op0;
682 switch (tls_kind)
684 case TLS_MODEL_GLOBAL_DYNAMIC:
685 start_sequence ();
687 tga_op1 = gen_reg_rtx (Pmode);
688 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
689 tga_op1 = gen_const_mem (Pmode, tga_op1);
691 tga_op2 = gen_reg_rtx (Pmode);
692 emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
693 tga_op2 = gen_const_mem (Pmode, tga_op2);
695 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
696 LCT_CONST, Pmode, 2, tga_op1,
697 Pmode, tga_op2, Pmode);
699 insns = get_insns ();
700 end_sequence ();
702 if (GET_MODE (op0) != Pmode)
703 op0 = tga_ret;
704 emit_libcall_block (insns, op0, tga_ret, op1);
705 break;
707 case TLS_MODEL_LOCAL_DYNAMIC:
708 /* ??? This isn't the completely proper way to do local-dynamic
709 If the call to __tls_get_addr is used only by a single symbol,
710 then we should (somehow) move the dtprel to the second arg
711 to avoid the extra add. */
712 start_sequence ();
714 tga_op1 = gen_reg_rtx (Pmode);
715 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
716 tga_op1 = gen_const_mem (Pmode, tga_op1);
718 tga_op2 = const0_rtx;
720 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
721 LCT_CONST, Pmode, 2, tga_op1,
722 Pmode, tga_op2, Pmode);
724 insns = get_insns ();
725 end_sequence ();
727 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
728 UNSPEC_LD_BASE);
729 tmp = gen_reg_rtx (Pmode);
730 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
732 if (!register_operand (op0, Pmode))
733 op0 = gen_reg_rtx (Pmode);
734 if (TARGET_TLS64)
736 emit_insn (gen_load_dtprel (op0, op1));
737 emit_insn (gen_adddi3 (op0, tmp, op0));
739 else
740 emit_insn (gen_add_dtprel (op0, tmp, op1));
741 break;
743 case TLS_MODEL_INITIAL_EXEC:
744 tmp = gen_reg_rtx (Pmode);
745 emit_insn (gen_load_ltoff_tprel (tmp, op1));
746 tmp = gen_const_mem (Pmode, tmp);
747 tmp = force_reg (Pmode, tmp);
749 if (!register_operand (op0, Pmode))
750 op0 = gen_reg_rtx (Pmode);
751 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
752 break;
754 case TLS_MODEL_LOCAL_EXEC:
755 if (!register_operand (op0, Pmode))
756 op0 = gen_reg_rtx (Pmode);
757 if (TARGET_TLS64)
759 emit_insn (gen_load_tprel (op0, op1));
760 emit_insn (gen_adddi3 (op0, gen_thread_pointer (), op0));
762 else
763 emit_insn (gen_add_tprel (op0, gen_thread_pointer (), op1));
764 break;
766 default:
767 abort ();
770 if (orig_op0 == op0)
771 return NULL_RTX;
772 if (GET_MODE (orig_op0) == Pmode)
773 return op0;
774 return gen_lowpart (GET_MODE (orig_op0), op0);
778 ia64_expand_move (rtx op0, rtx op1)
780 enum machine_mode mode = GET_MODE (op0);
782 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
783 op1 = force_reg (mode, op1);
785 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
787 enum tls_model tls_kind;
788 if (GET_CODE (op1) == SYMBOL_REF
789 && (tls_kind = SYMBOL_REF_TLS_MODEL (op1)))
790 return ia64_expand_tls_address (tls_kind, op0, op1);
792 if (!TARGET_NO_PIC && reload_completed)
794 ia64_expand_load_address (op0, op1);
795 return NULL_RTX;
799 return op1;
802 /* Split a move from OP1 to OP0 conditional on COND. */
804 void
805 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
807 rtx insn, first = get_last_insn ();
809 emit_move_insn (op0, op1);
811 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
812 if (INSN_P (insn))
813 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
814 PATTERN (insn));
817 /* Split a post-reload TImode or TFmode reference into two DImode
818 components. This is made extra difficult by the fact that we do
819 not get any scratch registers to work with, because reload cannot
820 be prevented from giving us a scratch that overlaps the register
821 pair involved. So instead, when addressing memory, we tweak the
822 pointer register up and back down with POST_INCs. Or up and not
823 back down when we can get away with it.
825 REVERSED is true when the loads must be done in reversed order
826 (high word first) for correctness. DEAD is true when the pointer
827 dies with the second insn we generate and therefore the second
828 address must not carry a postmodify.
830 May return an insn which is to be emitted after the moves. */
832 static rtx
833 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
835 rtx fixup = 0;
837 switch (GET_CODE (in))
839 case REG:
840 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
841 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
842 break;
844 case CONST_INT:
845 case CONST_DOUBLE:
846 /* Cannot occur reversed. */
847 if (reversed) abort ();
849 if (GET_MODE (in) != TFmode)
850 split_double (in, &out[0], &out[1]);
851 else
852 /* split_double does not understand how to split a TFmode
853 quantity into a pair of DImode constants. */
855 REAL_VALUE_TYPE r;
856 unsigned HOST_WIDE_INT p[2];
857 long l[4]; /* TFmode is 128 bits */
859 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
860 real_to_target (l, &r, TFmode);
862 if (FLOAT_WORDS_BIG_ENDIAN)
864 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
865 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
867 else
869 p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
870 p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
872 out[0] = GEN_INT (p[0]);
873 out[1] = GEN_INT (p[1]);
875 break;
877 case MEM:
879 rtx base = XEXP (in, 0);
880 rtx offset;
882 switch (GET_CODE (base))
884 case REG:
885 if (!reversed)
887 out[0] = adjust_automodify_address
888 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
889 out[1] = adjust_automodify_address
890 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
892 else
894 /* Reversal requires a pre-increment, which can only
895 be done as a separate insn. */
896 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
897 out[0] = adjust_automodify_address
898 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
899 out[1] = adjust_address (in, DImode, 0);
901 break;
903 case POST_INC:
904 if (reversed || dead) abort ();
905 /* Just do the increment in two steps. */
906 out[0] = adjust_automodify_address (in, DImode, 0, 0);
907 out[1] = adjust_automodify_address (in, DImode, 0, 8);
908 break;
910 case POST_DEC:
911 if (reversed || dead) abort ();
912 /* Add 8, subtract 24. */
913 base = XEXP (base, 0);
914 out[0] = adjust_automodify_address
915 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
916 out[1] = adjust_automodify_address
917 (in, DImode,
918 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
920 break;
922 case POST_MODIFY:
923 if (reversed || dead) abort ();
924 /* Extract and adjust the modification. This case is
925 trickier than the others, because we might have an
926 index register, or we might have a combined offset that
927 doesn't fit a signed 9-bit displacement field. We can
928 assume the incoming expression is already legitimate. */
929 offset = XEXP (base, 1);
930 base = XEXP (base, 0);
932 out[0] = adjust_automodify_address
933 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
935 if (GET_CODE (XEXP (offset, 1)) == REG)
937 /* Can't adjust the postmodify to match. Emit the
938 original, then a separate addition insn. */
939 out[1] = adjust_automodify_address (in, DImode, 0, 8);
940 fixup = gen_adddi3 (base, base, GEN_INT (-8));
942 else if (GET_CODE (XEXP (offset, 1)) != CONST_INT)
943 abort ();
944 else if (INTVAL (XEXP (offset, 1)) < -256 + 8)
946 /* Again the postmodify cannot be made to match, but
947 in this case it's more efficient to get rid of the
948 postmodify entirely and fix up with an add insn. */
949 out[1] = adjust_automodify_address (in, DImode, base, 8);
950 fixup = gen_adddi3 (base, base,
951 GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
953 else
955 /* Combined offset still fits in the displacement field.
956 (We cannot overflow it at the high end.) */
957 out[1] = adjust_automodify_address
958 (in, DImode,
959 gen_rtx_POST_MODIFY (Pmode, base,
960 gen_rtx_PLUS (Pmode, base,
961 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
964 break;
966 default:
967 abort ();
969 break;
972 default:
973 abort ();
976 return fixup;
979 /* Split a TImode or TFmode move instruction after reload.
980 This is used by *movtf_internal and *movti_internal. */
981 void
982 ia64_split_tmode_move (rtx operands[])
984 rtx in[2], out[2], insn;
985 rtx fixup[2];
986 bool dead = false;
987 bool reversed = false;
989 /* It is possible for reload to decide to overwrite a pointer with
990 the value it points to. In that case we have to do the loads in
991 the appropriate order so that the pointer is not destroyed too
992 early. Also we must not generate a postmodify for that second
993 load, or rws_access_regno will abort. */
994 if (GET_CODE (operands[1]) == MEM
995 && reg_overlap_mentioned_p (operands[0], operands[1]))
997 rtx base = XEXP (operands[1], 0);
998 while (GET_CODE (base) != REG)
999 base = XEXP (base, 0);
1001 if (REGNO (base) == REGNO (operands[0]))
1002 reversed = true;
1003 dead = true;
1005 /* Another reason to do the moves in reversed order is if the first
1006 element of the target register pair is also the second element of
1007 the source register pair. */
1008 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1009 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1010 reversed = true;
1012 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1013 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1015 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1016 if (GET_CODE (EXP) == MEM \
1017 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1018 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1019 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1020 REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
1021 XEXP (XEXP (EXP, 0), 0), \
1022 REG_NOTES (INSN))
1024 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1025 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1026 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1028 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1029 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1030 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1032 if (fixup[0])
1033 emit_insn (fixup[0]);
1034 if (fixup[1])
1035 emit_insn (fixup[1]);
1037 #undef MAYBE_ADD_REG_INC_NOTE
1040 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1041 through memory plus an extra GR scratch register. Except that you can
1042 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1043 SECONDARY_RELOAD_CLASS, but not both.
1045 We got into problems in the first place by allowing a construct like
1046 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1047 This solution attempts to prevent this situation from occurring. When
1048 we see something like the above, we spill the inner register to memory. */
1051 spill_xfmode_operand (rtx in, int force)
1053 if (GET_CODE (in) == SUBREG
1054 && GET_MODE (SUBREG_REG (in)) == TImode
1055 && GET_CODE (SUBREG_REG (in)) == REG)
1057 rtx memt = assign_stack_temp (TImode, 16, 0);
1058 emit_move_insn (memt, SUBREG_REG (in));
1059 return adjust_address (memt, XFmode, 0);
1061 else if (force && GET_CODE (in) == REG)
1063 rtx memx = assign_stack_temp (XFmode, 16, 0);
1064 emit_move_insn (memx, in);
1065 return memx;
1067 else
1068 return in;
1071 /* Emit comparison instruction if necessary, returning the expression
1072 that holds the compare result in the proper mode. */
1074 static GTY(()) rtx cmptf_libfunc;
1077 ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1079 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1080 rtx cmp;
1082 /* If we have a BImode input, then we already have a compare result, and
1083 do not need to emit another comparison. */
1084 if (GET_MODE (op0) == BImode)
1086 if ((code == NE || code == EQ) && op1 == const0_rtx)
1087 cmp = op0;
1088 else
1089 abort ();
1091 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1092 magic number as its third argument, that indicates what to do.
1093 The return value is an integer to be compared against zero. */
1094 else if (GET_MODE (op0) == TFmode)
1096 enum qfcmp_magic {
1097 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1098 QCMP_UNORD = 2,
1099 QCMP_EQ = 4,
1100 QCMP_LT = 8,
1101 QCMP_GT = 16
1102 } magic;
1103 enum rtx_code ncode;
1104 rtx ret, insns;
1105 if (!cmptf_libfunc || GET_MODE (op1) != TFmode)
1106 abort ();
1107 switch (code)
1109 /* 1 = equal, 0 = not equal. Equality operators do
1110 not raise FP_INVALID when given an SNaN operand. */
1111 case EQ: magic = QCMP_EQ; ncode = NE; break;
1112 case NE: magic = QCMP_EQ; ncode = EQ; break;
1113 /* isunordered() from C99. */
1114 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1115 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1116 /* Relational operators raise FP_INVALID when given
1117 an SNaN operand. */
1118 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1119 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1120 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1121 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1122 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1123 Expanders for buneq etc. weuld have to be added to ia64.md
1124 for this to be useful. */
1125 default: abort ();
1128 start_sequence ();
1130 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1131 op0, TFmode, op1, TFmode,
1132 GEN_INT (magic), DImode);
1133 cmp = gen_reg_rtx (BImode);
1134 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1135 gen_rtx_fmt_ee (ncode, BImode,
1136 ret, const0_rtx)));
1138 insns = get_insns ();
1139 end_sequence ();
1141 emit_libcall_block (insns, cmp, cmp,
1142 gen_rtx_fmt_ee (code, BImode, op0, op1));
1143 code = NE;
1145 else
1147 cmp = gen_reg_rtx (BImode);
1148 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1149 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1150 code = NE;
1153 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1156 /* Emit the appropriate sequence for a call. */
1158 void
1159 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1160 int sibcall_p)
1162 rtx insn, b0;
1164 addr = XEXP (addr, 0);
1165 addr = convert_memory_address (DImode, addr);
1166 b0 = gen_rtx_REG (DImode, R_BR (0));
1168 /* ??? Should do this for functions known to bind local too. */
1169 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1171 if (sibcall_p)
1172 insn = gen_sibcall_nogp (addr);
1173 else if (! retval)
1174 insn = gen_call_nogp (addr, b0);
1175 else
1176 insn = gen_call_value_nogp (retval, addr, b0);
1177 insn = emit_call_insn (insn);
1179 else
1181 if (sibcall_p)
1182 insn = gen_sibcall_gp (addr);
1183 else if (! retval)
1184 insn = gen_call_gp (addr, b0);
1185 else
1186 insn = gen_call_value_gp (retval, addr, b0);
1187 insn = emit_call_insn (insn);
1189 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1192 if (sibcall_p)
1193 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1196 void
1197 ia64_reload_gp (void)
1199 rtx tmp;
1201 if (current_frame_info.reg_save_gp)
1202 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1203 else
1205 HOST_WIDE_INT offset;
1207 offset = (current_frame_info.spill_cfa_off
1208 + current_frame_info.spill_size);
1209 if (frame_pointer_needed)
1211 tmp = hard_frame_pointer_rtx;
1212 offset = -offset;
1214 else
1216 tmp = stack_pointer_rtx;
1217 offset = current_frame_info.total_size - offset;
1220 if (CONST_OK_FOR_I (offset))
1221 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1222 tmp, GEN_INT (offset)));
1223 else
1225 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1226 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1227 pic_offset_table_rtx, tmp));
1230 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1233 emit_move_insn (pic_offset_table_rtx, tmp);
1236 void
1237 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1238 rtx scratch_b, int noreturn_p, int sibcall_p)
1240 rtx insn;
1241 bool is_desc = false;
1243 /* If we find we're calling through a register, then we're actually
1244 calling through a descriptor, so load up the values. */
1245 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1247 rtx tmp;
1248 bool addr_dead_p;
1250 /* ??? We are currently constrained to *not* use peep2, because
1251 we can legitimately change the global lifetime of the GP
1252 (in the form of killing where previously live). This is
1253 because a call through a descriptor doesn't use the previous
1254 value of the GP, while a direct call does, and we do not
1255 commit to either form until the split here.
1257 That said, this means that we lack precise life info for
1258 whether ADDR is dead after this call. This is not terribly
1259 important, since we can fix things up essentially for free
1260 with the POST_DEC below, but it's nice to not use it when we
1261 can immediately tell it's not necessary. */
1262 addr_dead_p = ((noreturn_p || sibcall_p
1263 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1264 REGNO (addr)))
1265 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1267 /* Load the code address into scratch_b. */
1268 tmp = gen_rtx_POST_INC (Pmode, addr);
1269 tmp = gen_rtx_MEM (Pmode, tmp);
1270 emit_move_insn (scratch_r, tmp);
1271 emit_move_insn (scratch_b, scratch_r);
1273 /* Load the GP address. If ADDR is not dead here, then we must
1274 revert the change made above via the POST_INCREMENT. */
1275 if (!addr_dead_p)
1276 tmp = gen_rtx_POST_DEC (Pmode, addr);
1277 else
1278 tmp = addr;
1279 tmp = gen_rtx_MEM (Pmode, tmp);
1280 emit_move_insn (pic_offset_table_rtx, tmp);
1282 is_desc = true;
1283 addr = scratch_b;
1286 if (sibcall_p)
1287 insn = gen_sibcall_nogp (addr);
1288 else if (retval)
1289 insn = gen_call_value_nogp (retval, addr, retaddr);
1290 else
1291 insn = gen_call_nogp (addr, retaddr);
1292 emit_call_insn (insn);
1294 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1295 ia64_reload_gp ();
1298 /* Begin the assembly file. */
1300 static void
1301 ia64_file_start (void)
1303 default_file_start ();
1304 emit_safe_across_calls ();
1307 void
1308 emit_safe_across_calls (void)
1310 unsigned int rs, re;
1311 int out_state;
1313 rs = 1;
1314 out_state = 0;
1315 while (1)
1317 while (rs < 64 && call_used_regs[PR_REG (rs)])
1318 rs++;
1319 if (rs >= 64)
1320 break;
1321 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1322 continue;
1323 if (out_state == 0)
1325 fputs ("\t.pred.safe_across_calls ", asm_out_file);
1326 out_state = 1;
1328 else
1329 fputc (',', asm_out_file);
1330 if (re == rs + 1)
1331 fprintf (asm_out_file, "p%u", rs);
1332 else
1333 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
1334 rs = re + 1;
1336 if (out_state)
1337 fputc ('\n', asm_out_file);
1340 /* Helper function for ia64_compute_frame_size: find an appropriate general
1341 register to spill some special register to. SPECIAL_SPILL_MASK contains
1342 bits in GR0 to GR31 that have already been allocated by this routine.
1343 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1345 static int
1346 find_gr_spill (int try_locals)
1348 int regno;
1350 /* If this is a leaf function, first try an otherwise unused
1351 call-clobbered register. */
1352 if (current_function_is_leaf)
1354 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1355 if (! regs_ever_live[regno]
1356 && call_used_regs[regno]
1357 && ! fixed_regs[regno]
1358 && ! global_regs[regno]
1359 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1361 current_frame_info.gr_used_mask |= 1 << regno;
1362 return regno;
1366 if (try_locals)
1368 regno = current_frame_info.n_local_regs;
1369 /* If there is a frame pointer, then we can't use loc79, because
1370 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1371 reg_name switching code in ia64_expand_prologue. */
1372 if (regno < (80 - frame_pointer_needed))
1374 current_frame_info.n_local_regs = regno + 1;
1375 return LOC_REG (0) + regno;
1379 /* Failed to find a general register to spill to. Must use stack. */
1380 return 0;
1383 /* In order to make for nice schedules, we try to allocate every temporary
1384 to a different register. We must of course stay away from call-saved,
1385 fixed, and global registers. We must also stay away from registers
1386 allocated in current_frame_info.gr_used_mask, since those include regs
1387 used all through the prologue.
1389 Any register allocated here must be used immediately. The idea is to
1390 aid scheduling, not to solve data flow problems. */
1392 static int last_scratch_gr_reg;
1394 static int
1395 next_scratch_gr_reg (void)
1397 int i, regno;
1399 for (i = 0; i < 32; ++i)
1401 regno = (last_scratch_gr_reg + i + 1) & 31;
1402 if (call_used_regs[regno]
1403 && ! fixed_regs[regno]
1404 && ! global_regs[regno]
1405 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1407 last_scratch_gr_reg = regno;
1408 return regno;
1412 /* There must be _something_ available. */
1413 abort ();
1416 /* Helper function for ia64_compute_frame_size, called through
1417 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1419 static void
1420 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
1422 unsigned int regno = REGNO (reg);
1423 if (regno < 32)
1425 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1426 for (i = 0; i < n; ++i)
1427 current_frame_info.gr_used_mask |= 1 << (regno + i);
1431 /* Returns the number of bytes offset between the frame pointer and the stack
1432 pointer for the current function. SIZE is the number of bytes of space
1433 needed for local variables. */
1435 static void
1436 ia64_compute_frame_size (HOST_WIDE_INT size)
1438 HOST_WIDE_INT total_size;
1439 HOST_WIDE_INT spill_size = 0;
1440 HOST_WIDE_INT extra_spill_size = 0;
1441 HOST_WIDE_INT pretend_args_size;
1442 HARD_REG_SET mask;
1443 int n_spilled = 0;
1444 int spilled_gr_p = 0;
1445 int spilled_fr_p = 0;
1446 unsigned int regno;
1447 int i;
1449 if (current_frame_info.initialized)
1450 return;
1452 memset (&current_frame_info, 0, sizeof current_frame_info);
1453 CLEAR_HARD_REG_SET (mask);
1455 /* Don't allocate scratches to the return register. */
1456 diddle_return_value (mark_reg_gr_used_mask, NULL);
1458 /* Don't allocate scratches to the EH scratch registers. */
1459 if (cfun->machine->ia64_eh_epilogue_sp)
1460 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1461 if (cfun->machine->ia64_eh_epilogue_bsp)
1462 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1464 /* Find the size of the register stack frame. We have only 80 local
1465 registers, because we reserve 8 for the inputs and 8 for the
1466 outputs. */
1468 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1469 since we'll be adjusting that down later. */
1470 regno = LOC_REG (78) + ! frame_pointer_needed;
1471 for (; regno >= LOC_REG (0); regno--)
1472 if (regs_ever_live[regno])
1473 break;
1474 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1476 /* For functions marked with the syscall_linkage attribute, we must mark
1477 all eight input registers as in use, so that locals aren't visible to
1478 the caller. */
1480 if (cfun->machine->n_varargs > 0
1481 || lookup_attribute ("syscall_linkage",
1482 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1483 current_frame_info.n_input_regs = 8;
1484 else
1486 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1487 if (regs_ever_live[regno])
1488 break;
1489 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1492 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1493 if (regs_ever_live[regno])
1494 break;
1495 i = regno - OUT_REG (0) + 1;
1497 /* When -p profiling, we need one output register for the mcount argument.
1498 Likewise for -a profiling for the bb_init_func argument. For -ax
1499 profiling, we need two output registers for the two bb_init_trace_func
1500 arguments. */
1501 if (current_function_profile)
1502 i = MAX (i, 1);
1503 current_frame_info.n_output_regs = i;
1505 /* ??? No rotating register support yet. */
1506 current_frame_info.n_rotate_regs = 0;
1508 /* Discover which registers need spilling, and how much room that
1509 will take. Begin with floating point and general registers,
1510 which will always wind up on the stack. */
1512 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1513 if (regs_ever_live[regno] && ! call_used_regs[regno])
1515 SET_HARD_REG_BIT (mask, regno);
1516 spill_size += 16;
1517 n_spilled += 1;
1518 spilled_fr_p = 1;
1521 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1522 if (regs_ever_live[regno] && ! call_used_regs[regno])
1524 SET_HARD_REG_BIT (mask, regno);
1525 spill_size += 8;
1526 n_spilled += 1;
1527 spilled_gr_p = 1;
1530 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1531 if (regs_ever_live[regno] && ! call_used_regs[regno])
1533 SET_HARD_REG_BIT (mask, regno);
1534 spill_size += 8;
1535 n_spilled += 1;
1538 /* Now come all special registers that might get saved in other
1539 general registers. */
1541 if (frame_pointer_needed)
1543 current_frame_info.reg_fp = find_gr_spill (1);
1544 /* If we did not get a register, then we take LOC79. This is guaranteed
1545 to be free, even if regs_ever_live is already set, because this is
1546 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1547 as we don't count loc79 above. */
1548 if (current_frame_info.reg_fp == 0)
1550 current_frame_info.reg_fp = LOC_REG (79);
1551 current_frame_info.n_local_regs++;
1555 if (! current_function_is_leaf)
1557 /* Emit a save of BR0 if we call other functions. Do this even
1558 if this function doesn't return, as EH depends on this to be
1559 able to unwind the stack. */
1560 SET_HARD_REG_BIT (mask, BR_REG (0));
1562 current_frame_info.reg_save_b0 = find_gr_spill (1);
1563 if (current_frame_info.reg_save_b0 == 0)
1565 spill_size += 8;
1566 n_spilled += 1;
1569 /* Similarly for ar.pfs. */
1570 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1571 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1572 if (current_frame_info.reg_save_ar_pfs == 0)
1574 extra_spill_size += 8;
1575 n_spilled += 1;
1578 /* Similarly for gp. Note that if we're calling setjmp, the stacked
1579 registers are clobbered, so we fall back to the stack. */
1580 current_frame_info.reg_save_gp
1581 = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
1582 if (current_frame_info.reg_save_gp == 0)
1584 SET_HARD_REG_BIT (mask, GR_REG (1));
1585 spill_size += 8;
1586 n_spilled += 1;
1589 else
1591 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1593 SET_HARD_REG_BIT (mask, BR_REG (0));
1594 spill_size += 8;
1595 n_spilled += 1;
1598 if (regs_ever_live[AR_PFS_REGNUM])
1600 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1601 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1602 if (current_frame_info.reg_save_ar_pfs == 0)
1604 extra_spill_size += 8;
1605 n_spilled += 1;
1610 /* Unwind descriptor hackery: things are most efficient if we allocate
1611 consecutive GR save registers for RP, PFS, FP in that order. However,
1612 it is absolutely critical that FP get the only hard register that's
1613 guaranteed to be free, so we allocated it first. If all three did
1614 happen to be allocated hard regs, and are consecutive, rearrange them
1615 into the preferred order now. */
1616 if (current_frame_info.reg_fp != 0
1617 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1618 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1620 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1621 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1622 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1625 /* See if we need to store the predicate register block. */
1626 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1627 if (regs_ever_live[regno] && ! call_used_regs[regno])
1628 break;
1629 if (regno <= PR_REG (63))
1631 SET_HARD_REG_BIT (mask, PR_REG (0));
1632 current_frame_info.reg_save_pr = find_gr_spill (1);
1633 if (current_frame_info.reg_save_pr == 0)
1635 extra_spill_size += 8;
1636 n_spilled += 1;
1639 /* ??? Mark them all as used so that register renaming and such
1640 are free to use them. */
1641 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1642 regs_ever_live[regno] = 1;
1645 /* If we're forced to use st8.spill, we're forced to save and restore
1646 ar.unat as well. The check for existing liveness allows inline asm
1647 to touch ar.unat. */
1648 if (spilled_gr_p || cfun->machine->n_varargs
1649 || regs_ever_live[AR_UNAT_REGNUM])
1651 regs_ever_live[AR_UNAT_REGNUM] = 1;
1652 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1653 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1654 if (current_frame_info.reg_save_ar_unat == 0)
1656 extra_spill_size += 8;
1657 n_spilled += 1;
1661 if (regs_ever_live[AR_LC_REGNUM])
1663 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1664 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1665 if (current_frame_info.reg_save_ar_lc == 0)
1667 extra_spill_size += 8;
1668 n_spilled += 1;
1672 /* If we have an odd number of words of pretend arguments written to
1673 the stack, then the FR save area will be unaligned. We round the
1674 size of this area up to keep things 16 byte aligned. */
1675 if (spilled_fr_p)
1676 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1677 else
1678 pretend_args_size = current_function_pretend_args_size;
1680 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1681 + current_function_outgoing_args_size);
1682 total_size = IA64_STACK_ALIGN (total_size);
1684 /* We always use the 16-byte scratch area provided by the caller, but
1685 if we are a leaf function, there's no one to which we need to provide
1686 a scratch area. */
1687 if (current_function_is_leaf)
1688 total_size = MAX (0, total_size - 16);
1690 current_frame_info.total_size = total_size;
1691 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1692 current_frame_info.spill_size = spill_size;
1693 current_frame_info.extra_spill_size = extra_spill_size;
1694 COPY_HARD_REG_SET (current_frame_info.mask, mask);
1695 current_frame_info.n_spilled = n_spilled;
1696 current_frame_info.initialized = reload_completed;
1699 /* Compute the initial difference between the specified pair of registers. */
1701 HOST_WIDE_INT
1702 ia64_initial_elimination_offset (int from, int to)
1704 HOST_WIDE_INT offset;
1706 ia64_compute_frame_size (get_frame_size ());
1707 switch (from)
1709 case FRAME_POINTER_REGNUM:
1710 if (to == HARD_FRAME_POINTER_REGNUM)
1712 if (current_function_is_leaf)
1713 offset = -current_frame_info.total_size;
1714 else
1715 offset = -(current_frame_info.total_size
1716 - current_function_outgoing_args_size - 16);
1718 else if (to == STACK_POINTER_REGNUM)
1720 if (current_function_is_leaf)
1721 offset = 0;
1722 else
1723 offset = 16 + current_function_outgoing_args_size;
1725 else
1726 abort ();
1727 break;
1729 case ARG_POINTER_REGNUM:
1730 /* Arguments start above the 16 byte save area, unless stdarg
1731 in which case we store through the 16 byte save area. */
1732 if (to == HARD_FRAME_POINTER_REGNUM)
1733 offset = 16 - current_function_pretend_args_size;
1734 else if (to == STACK_POINTER_REGNUM)
1735 offset = (current_frame_info.total_size
1736 + 16 - current_function_pretend_args_size);
1737 else
1738 abort ();
1739 break;
1741 default:
1742 abort ();
1745 return offset;
1748 /* If there are more than a trivial number of register spills, we use
1749 two interleaved iterators so that we can get two memory references
1750 per insn group.
1752 In order to simplify things in the prologue and epilogue expanders,
1753 we use helper functions to fix up the memory references after the
1754 fact with the appropriate offsets to a POST_MODIFY memory mode.
1755 The following data structure tracks the state of the two iterators
1756 while insns are being emitted. */
1758 struct spill_fill_data
1760 rtx init_after; /* point at which to emit initializations */
1761 rtx init_reg[2]; /* initial base register */
1762 rtx iter_reg[2]; /* the iterator registers */
1763 rtx *prev_addr[2]; /* address of last memory use */
1764 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
1765 HOST_WIDE_INT prev_off[2]; /* last offset */
1766 int n_iter; /* number of iterators in use */
1767 int next_iter; /* next iterator to use */
1768 unsigned int save_gr_used_mask;
1771 static struct spill_fill_data spill_fill_data;
1773 static void
1774 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
1776 int i;
1778 spill_fill_data.init_after = get_last_insn ();
1779 spill_fill_data.init_reg[0] = init_reg;
1780 spill_fill_data.init_reg[1] = init_reg;
1781 spill_fill_data.prev_addr[0] = NULL;
1782 spill_fill_data.prev_addr[1] = NULL;
1783 spill_fill_data.prev_insn[0] = NULL;
1784 spill_fill_data.prev_insn[1] = NULL;
1785 spill_fill_data.prev_off[0] = cfa_off;
1786 spill_fill_data.prev_off[1] = cfa_off;
1787 spill_fill_data.next_iter = 0;
1788 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1790 spill_fill_data.n_iter = 1 + (n_spills > 2);
1791 for (i = 0; i < spill_fill_data.n_iter; ++i)
1793 int regno = next_scratch_gr_reg ();
1794 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1795 current_frame_info.gr_used_mask |= 1 << regno;
1799 static void
1800 finish_spill_pointers (void)
1802 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1805 static rtx
1806 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
1808 int iter = spill_fill_data.next_iter;
1809 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1810 rtx disp_rtx = GEN_INT (disp);
1811 rtx mem;
1813 if (spill_fill_data.prev_addr[iter])
1815 if (CONST_OK_FOR_N (disp))
1817 *spill_fill_data.prev_addr[iter]
1818 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1819 gen_rtx_PLUS (DImode,
1820 spill_fill_data.iter_reg[iter],
1821 disp_rtx));
1822 REG_NOTES (spill_fill_data.prev_insn[iter])
1823 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
1824 REG_NOTES (spill_fill_data.prev_insn[iter]));
1826 else
1828 /* ??? Could use register post_modify for loads. */
1829 if (! CONST_OK_FOR_I (disp))
1831 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1832 emit_move_insn (tmp, disp_rtx);
1833 disp_rtx = tmp;
1835 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1836 spill_fill_data.iter_reg[iter], disp_rtx));
1839 /* Micro-optimization: if we've created a frame pointer, it's at
1840 CFA 0, which may allow the real iterator to be initialized lower,
1841 slightly increasing parallelism. Also, if there are few saves
1842 it may eliminate the iterator entirely. */
1843 else if (disp == 0
1844 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1845 && frame_pointer_needed)
1847 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1848 set_mem_alias_set (mem, get_varargs_alias_set ());
1849 return mem;
1851 else
1853 rtx seq, insn;
1855 if (disp == 0)
1856 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1857 spill_fill_data.init_reg[iter]);
1858 else
1860 start_sequence ();
1862 if (! CONST_OK_FOR_I (disp))
1864 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1865 emit_move_insn (tmp, disp_rtx);
1866 disp_rtx = tmp;
1869 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1870 spill_fill_data.init_reg[iter],
1871 disp_rtx));
1873 seq = get_insns ();
1874 end_sequence ();
1877 /* Careful for being the first insn in a sequence. */
1878 if (spill_fill_data.init_after)
1879 insn = emit_insn_after (seq, spill_fill_data.init_after);
1880 else
1882 rtx first = get_insns ();
1883 if (first)
1884 insn = emit_insn_before (seq, first);
1885 else
1886 insn = emit_insn (seq);
1888 spill_fill_data.init_after = insn;
1890 /* If DISP is 0, we may or may not have a further adjustment
1891 afterward. If we do, then the load/store insn may be modified
1892 to be a post-modify. If we don't, then this copy may be
1893 eliminated by copyprop_hardreg_forward, which makes this
1894 insn garbage, which runs afoul of the sanity check in
1895 propagate_one_insn. So mark this insn as legal to delete. */
1896 if (disp == 0)
1897 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1898 REG_NOTES (insn));
1901 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
1903 /* ??? Not all of the spills are for varargs, but some of them are.
1904 The rest of the spills belong in an alias set of their own. But
1905 it doesn't actually hurt to include them here. */
1906 set_mem_alias_set (mem, get_varargs_alias_set ());
1908 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1909 spill_fill_data.prev_off[iter] = cfa_off;
1911 if (++iter >= spill_fill_data.n_iter)
1912 iter = 0;
1913 spill_fill_data.next_iter = iter;
1915 return mem;
1918 static void
1919 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
1920 rtx frame_reg)
1922 int iter = spill_fill_data.next_iter;
1923 rtx mem, insn;
1925 mem = spill_restore_mem (reg, cfa_off);
1926 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
1927 spill_fill_data.prev_insn[iter] = insn;
1929 if (frame_reg)
1931 rtx base;
1932 HOST_WIDE_INT off;
1934 RTX_FRAME_RELATED_P (insn) = 1;
1936 /* Don't even pretend that the unwind code can intuit its way
1937 through a pair of interleaved post_modify iterators. Just
1938 provide the correct answer. */
1940 if (frame_pointer_needed)
1942 base = hard_frame_pointer_rtx;
1943 off = - cfa_off;
1945 else
1947 base = stack_pointer_rtx;
1948 off = current_frame_info.total_size - cfa_off;
1951 REG_NOTES (insn)
1952 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1953 gen_rtx_SET (VOIDmode,
1954 gen_rtx_MEM (GET_MODE (reg),
1955 plus_constant (base, off)),
1956 frame_reg),
1957 REG_NOTES (insn));
1961 static void
1962 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
1964 int iter = spill_fill_data.next_iter;
1965 rtx insn;
1967 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1968 GEN_INT (cfa_off)));
1969 spill_fill_data.prev_insn[iter] = insn;
1972 /* Wrapper functions that discards the CONST_INT spill offset. These
1973 exist so that we can give gr_spill/gr_fill the offset they need and
1974 use a consistent function interface. */
1976 static rtx
1977 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
1979 return gen_movdi (dest, src);
1982 static rtx
1983 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
1985 return gen_fr_spill (dest, src);
1988 static rtx
1989 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
1991 return gen_fr_restore (dest, src);
1994 /* Called after register allocation to add any instructions needed for the
1995 prologue. Using a prologue insn is favored compared to putting all of the
1996 instructions in output_function_prologue(), since it allows the scheduler
1997 to intermix instructions with the saves of the caller saved registers. In
1998 some cases, it might be necessary to emit a barrier instruction as the last
1999 insn to prevent such scheduling.
2001 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2002 so that the debug info generation code can handle them properly.
2004 The register save area is layed out like so:
2005 cfa+16
2006 [ varargs spill area ]
2007 [ fr register spill area ]
2008 [ br register spill area ]
2009 [ ar register spill area ]
2010 [ pr register spill area ]
2011 [ gr register spill area ] */
2013 /* ??? Get inefficient code when the frame size is larger than can fit in an
2014 adds instruction. */
2016 void
2017 ia64_expand_prologue (void)
2019 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2020 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2021 rtx reg, alt_reg;
2023 ia64_compute_frame_size (get_frame_size ());
2024 last_scratch_gr_reg = 15;
2026 /* If there is no epilogue, then we don't need some prologue insns.
2027 We need to avoid emitting the dead prologue insns, because flow
2028 will complain about them. */
2029 if (optimize)
2031 edge e;
2033 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2035 if ((e->flags & EDGE_FAKE) == 0
2036 && (e->flags & EDGE_FALLTHRU) != 0)
2037 break;
2039 epilogue_p = (e != NULL);
2041 else
2042 epilogue_p = 1;
2044 /* Set the local, input, and output register names. We need to do this
2045 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2046 half. If we use in/loc/out register names, then we get assembler errors
2047 in crtn.S because there is no alloc insn or regstk directive in there. */
2048 if (! TARGET_REG_NAMES)
2050 int inputs = current_frame_info.n_input_regs;
2051 int locals = current_frame_info.n_local_regs;
2052 int outputs = current_frame_info.n_output_regs;
2054 for (i = 0; i < inputs; i++)
2055 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2056 for (i = 0; i < locals; i++)
2057 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2058 for (i = 0; i < outputs; i++)
2059 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2062 /* Set the frame pointer register name. The regnum is logically loc79,
2063 but of course we'll not have allocated that many locals. Rather than
2064 worrying about renumbering the existing rtxs, we adjust the name. */
2065 /* ??? This code means that we can never use one local register when
2066 there is a frame pointer. loc79 gets wasted in this case, as it is
2067 renamed to a register that will never be used. See also the try_locals
2068 code in find_gr_spill. */
2069 if (current_frame_info.reg_fp)
2071 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2072 reg_names[HARD_FRAME_POINTER_REGNUM]
2073 = reg_names[current_frame_info.reg_fp];
2074 reg_names[current_frame_info.reg_fp] = tmp;
2077 /* We don't need an alloc instruction if we've used no outputs or locals. */
2078 if (current_frame_info.n_local_regs == 0
2079 && current_frame_info.n_output_regs == 0
2080 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2081 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2083 /* If there is no alloc, but there are input registers used, then we
2084 need a .regstk directive. */
2085 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2086 ar_pfs_save_reg = NULL_RTX;
2088 else
2090 current_frame_info.need_regstk = 0;
2092 if (current_frame_info.reg_save_ar_pfs)
2093 regno = current_frame_info.reg_save_ar_pfs;
2094 else
2095 regno = next_scratch_gr_reg ();
2096 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2098 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2099 GEN_INT (current_frame_info.n_input_regs),
2100 GEN_INT (current_frame_info.n_local_regs),
2101 GEN_INT (current_frame_info.n_output_regs),
2102 GEN_INT (current_frame_info.n_rotate_regs)));
2103 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2106 /* Set up frame pointer, stack pointer, and spill iterators. */
2108 n_varargs = cfun->machine->n_varargs;
2109 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2110 stack_pointer_rtx, 0);
2112 if (frame_pointer_needed)
2114 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2115 RTX_FRAME_RELATED_P (insn) = 1;
2118 if (current_frame_info.total_size != 0)
2120 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2121 rtx offset;
2123 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2124 offset = frame_size_rtx;
2125 else
2127 regno = next_scratch_gr_reg ();
2128 offset = gen_rtx_REG (DImode, regno);
2129 emit_move_insn (offset, frame_size_rtx);
2132 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2133 stack_pointer_rtx, offset));
2135 if (! frame_pointer_needed)
2137 RTX_FRAME_RELATED_P (insn) = 1;
2138 if (GET_CODE (offset) != CONST_INT)
2140 REG_NOTES (insn)
2141 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2142 gen_rtx_SET (VOIDmode,
2143 stack_pointer_rtx,
2144 gen_rtx_PLUS (DImode,
2145 stack_pointer_rtx,
2146 frame_size_rtx)),
2147 REG_NOTES (insn));
2151 /* ??? At this point we must generate a magic insn that appears to
2152 modify the stack pointer, the frame pointer, and all spill
2153 iterators. This would allow the most scheduling freedom. For
2154 now, just hard stop. */
2155 emit_insn (gen_blockage ());
2158 /* Must copy out ar.unat before doing any integer spills. */
2159 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2161 if (current_frame_info.reg_save_ar_unat)
2162 ar_unat_save_reg
2163 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2164 else
2166 alt_regno = next_scratch_gr_reg ();
2167 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2168 current_frame_info.gr_used_mask |= 1 << alt_regno;
2171 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2172 insn = emit_move_insn (ar_unat_save_reg, reg);
2173 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2175 /* Even if we're not going to generate an epilogue, we still
2176 need to save the register so that EH works. */
2177 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2178 emit_insn (gen_prologue_use (ar_unat_save_reg));
2180 else
2181 ar_unat_save_reg = NULL_RTX;
2183 /* Spill all varargs registers. Do this before spilling any GR registers,
2184 since we want the UNAT bits for the GR registers to override the UNAT
2185 bits from varargs, which we don't care about. */
2187 cfa_off = -16;
2188 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2190 reg = gen_rtx_REG (DImode, regno);
2191 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2194 /* Locate the bottom of the register save area. */
2195 cfa_off = (current_frame_info.spill_cfa_off
2196 + current_frame_info.spill_size
2197 + current_frame_info.extra_spill_size);
2199 /* Save the predicate register block either in a register or in memory. */
2200 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2202 reg = gen_rtx_REG (DImode, PR_REG (0));
2203 if (current_frame_info.reg_save_pr != 0)
2205 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2206 insn = emit_move_insn (alt_reg, reg);
2208 /* ??? Denote pr spill/fill by a DImode move that modifies all
2209 64 hard registers. */
2210 RTX_FRAME_RELATED_P (insn) = 1;
2211 REG_NOTES (insn)
2212 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2213 gen_rtx_SET (VOIDmode, alt_reg, reg),
2214 REG_NOTES (insn));
2216 /* Even if we're not going to generate an epilogue, we still
2217 need to save the register so that EH works. */
2218 if (! epilogue_p)
2219 emit_insn (gen_prologue_use (alt_reg));
2221 else
2223 alt_regno = next_scratch_gr_reg ();
2224 alt_reg = gen_rtx_REG (DImode, alt_regno);
2225 insn = emit_move_insn (alt_reg, reg);
2226 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2227 cfa_off -= 8;
2231 /* Handle AR regs in numerical order. All of them get special handling. */
2232 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2233 && current_frame_info.reg_save_ar_unat == 0)
2235 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2236 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2237 cfa_off -= 8;
2240 /* The alloc insn already copied ar.pfs into a general register. The
2241 only thing we have to do now is copy that register to a stack slot
2242 if we'd not allocated a local register for the job. */
2243 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2244 && current_frame_info.reg_save_ar_pfs == 0)
2246 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2247 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2248 cfa_off -= 8;
2251 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2253 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2254 if (current_frame_info.reg_save_ar_lc != 0)
2256 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2257 insn = emit_move_insn (alt_reg, reg);
2258 RTX_FRAME_RELATED_P (insn) = 1;
2260 /* Even if we're not going to generate an epilogue, we still
2261 need to save the register so that EH works. */
2262 if (! epilogue_p)
2263 emit_insn (gen_prologue_use (alt_reg));
2265 else
2267 alt_regno = next_scratch_gr_reg ();
2268 alt_reg = gen_rtx_REG (DImode, alt_regno);
2269 emit_move_insn (alt_reg, reg);
2270 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2271 cfa_off -= 8;
2275 if (current_frame_info.reg_save_gp)
2277 insn = emit_move_insn (gen_rtx_REG (DImode,
2278 current_frame_info.reg_save_gp),
2279 pic_offset_table_rtx);
2280 /* We don't know for sure yet if this is actually needed, since
2281 we've not split the PIC call patterns. If all of the calls
2282 are indirect, and not followed by any uses of the gp, then
2283 this save is dead. Allow it to go away. */
2284 REG_NOTES (insn)
2285 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2288 /* We should now be at the base of the gr/br/fr spill area. */
2289 if (cfa_off != (current_frame_info.spill_cfa_off
2290 + current_frame_info.spill_size))
2291 abort ();
2293 /* Spill all general registers. */
2294 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2295 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2297 reg = gen_rtx_REG (DImode, regno);
2298 do_spill (gen_gr_spill, reg, cfa_off, reg);
2299 cfa_off -= 8;
2302 /* Handle BR0 specially -- it may be getting stored permanently in
2303 some GR register. */
2304 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2306 reg = gen_rtx_REG (DImode, BR_REG (0));
2307 if (current_frame_info.reg_save_b0 != 0)
2309 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2310 insn = emit_move_insn (alt_reg, reg);
2311 RTX_FRAME_RELATED_P (insn) = 1;
2313 /* Even if we're not going to generate an epilogue, we still
2314 need to save the register so that EH works. */
2315 if (! epilogue_p)
2316 emit_insn (gen_prologue_use (alt_reg));
2318 else
2320 alt_regno = next_scratch_gr_reg ();
2321 alt_reg = gen_rtx_REG (DImode, alt_regno);
2322 emit_move_insn (alt_reg, reg);
2323 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2324 cfa_off -= 8;
2328 /* Spill the rest of the BR registers. */
2329 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2330 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2332 alt_regno = next_scratch_gr_reg ();
2333 alt_reg = gen_rtx_REG (DImode, alt_regno);
2334 reg = gen_rtx_REG (DImode, regno);
2335 emit_move_insn (alt_reg, reg);
2336 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2337 cfa_off -= 8;
2340 /* Align the frame and spill all FR registers. */
2341 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2342 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2344 if (cfa_off & 15)
2345 abort ();
2346 reg = gen_rtx_REG (XFmode, regno);
2347 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2348 cfa_off -= 16;
2351 if (cfa_off != current_frame_info.spill_cfa_off)
2352 abort ();
2354 finish_spill_pointers ();
2357 /* Called after register allocation to add any instructions needed for the
2358 epilogue. Using an epilogue insn is favored compared to putting all of the
2359 instructions in output_function_prologue(), since it allows the scheduler
2360 to intermix instructions with the saves of the caller saved registers. In
2361 some cases, it might be necessary to emit a barrier instruction as the last
2362 insn to prevent such scheduling. */
2364 void
2365 ia64_expand_epilogue (int sibcall_p)
2367 rtx insn, reg, alt_reg, ar_unat_save_reg;
2368 int regno, alt_regno, cfa_off;
2370 ia64_compute_frame_size (get_frame_size ());
2372 /* If there is a frame pointer, then we use it instead of the stack
2373 pointer, so that the stack pointer does not need to be valid when
2374 the epilogue starts. See EXIT_IGNORE_STACK. */
2375 if (frame_pointer_needed)
2376 setup_spill_pointers (current_frame_info.n_spilled,
2377 hard_frame_pointer_rtx, 0);
2378 else
2379 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2380 current_frame_info.total_size);
2382 if (current_frame_info.total_size != 0)
2384 /* ??? At this point we must generate a magic insn that appears to
2385 modify the spill iterators and the frame pointer. This would
2386 allow the most scheduling freedom. For now, just hard stop. */
2387 emit_insn (gen_blockage ());
2390 /* Locate the bottom of the register save area. */
2391 cfa_off = (current_frame_info.spill_cfa_off
2392 + current_frame_info.spill_size
2393 + current_frame_info.extra_spill_size);
2395 /* Restore the predicate registers. */
2396 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2398 if (current_frame_info.reg_save_pr != 0)
2399 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2400 else
2402 alt_regno = next_scratch_gr_reg ();
2403 alt_reg = gen_rtx_REG (DImode, alt_regno);
2404 do_restore (gen_movdi_x, alt_reg, cfa_off);
2405 cfa_off -= 8;
2407 reg = gen_rtx_REG (DImode, PR_REG (0));
2408 emit_move_insn (reg, alt_reg);
2411 /* Restore the application registers. */
2413 /* Load the saved unat from the stack, but do not restore it until
2414 after the GRs have been restored. */
2415 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2417 if (current_frame_info.reg_save_ar_unat != 0)
2418 ar_unat_save_reg
2419 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2420 else
2422 alt_regno = next_scratch_gr_reg ();
2423 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2424 current_frame_info.gr_used_mask |= 1 << alt_regno;
2425 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2426 cfa_off -= 8;
2429 else
2430 ar_unat_save_reg = NULL_RTX;
2432 if (current_frame_info.reg_save_ar_pfs != 0)
2434 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2435 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2436 emit_move_insn (reg, alt_reg);
2438 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2440 alt_regno = next_scratch_gr_reg ();
2441 alt_reg = gen_rtx_REG (DImode, alt_regno);
2442 do_restore (gen_movdi_x, alt_reg, cfa_off);
2443 cfa_off -= 8;
2444 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2445 emit_move_insn (reg, alt_reg);
2448 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2450 if (current_frame_info.reg_save_ar_lc != 0)
2451 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2452 else
2454 alt_regno = next_scratch_gr_reg ();
2455 alt_reg = gen_rtx_REG (DImode, alt_regno);
2456 do_restore (gen_movdi_x, alt_reg, cfa_off);
2457 cfa_off -= 8;
2459 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2460 emit_move_insn (reg, alt_reg);
2463 /* We should now be at the base of the gr/br/fr spill area. */
2464 if (cfa_off != (current_frame_info.spill_cfa_off
2465 + current_frame_info.spill_size))
2466 abort ();
2468 /* The GP may be stored on the stack in the prologue, but it's
2469 never restored in the epilogue. Skip the stack slot. */
2470 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
2471 cfa_off -= 8;
2473 /* Restore all general registers. */
2474 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
2475 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2477 reg = gen_rtx_REG (DImode, regno);
2478 do_restore (gen_gr_restore, reg, cfa_off);
2479 cfa_off -= 8;
2482 /* Restore the branch registers. Handle B0 specially, as it may
2483 have gotten stored in some GR register. */
2484 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2486 if (current_frame_info.reg_save_b0 != 0)
2487 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2488 else
2490 alt_regno = next_scratch_gr_reg ();
2491 alt_reg = gen_rtx_REG (DImode, alt_regno);
2492 do_restore (gen_movdi_x, alt_reg, cfa_off);
2493 cfa_off -= 8;
2495 reg = gen_rtx_REG (DImode, BR_REG (0));
2496 emit_move_insn (reg, alt_reg);
2499 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2500 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2502 alt_regno = next_scratch_gr_reg ();
2503 alt_reg = gen_rtx_REG (DImode, alt_regno);
2504 do_restore (gen_movdi_x, alt_reg, cfa_off);
2505 cfa_off -= 8;
2506 reg = gen_rtx_REG (DImode, regno);
2507 emit_move_insn (reg, alt_reg);
2510 /* Restore floating point registers. */
2511 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2512 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2514 if (cfa_off & 15)
2515 abort ();
2516 reg = gen_rtx_REG (XFmode, regno);
2517 do_restore (gen_fr_restore_x, reg, cfa_off);
2518 cfa_off -= 16;
2521 /* Restore ar.unat for real. */
2522 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2524 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2525 emit_move_insn (reg, ar_unat_save_reg);
2528 if (cfa_off != current_frame_info.spill_cfa_off)
2529 abort ();
2531 finish_spill_pointers ();
2533 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2535 /* ??? At this point we must generate a magic insn that appears to
2536 modify the spill iterators, the stack pointer, and the frame
2537 pointer. This would allow the most scheduling freedom. For now,
2538 just hard stop. */
2539 emit_insn (gen_blockage ());
2542 if (cfun->machine->ia64_eh_epilogue_sp)
2543 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2544 else if (frame_pointer_needed)
2546 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2547 RTX_FRAME_RELATED_P (insn) = 1;
2549 else if (current_frame_info.total_size)
2551 rtx offset, frame_size_rtx;
2553 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2554 if (CONST_OK_FOR_I (current_frame_info.total_size))
2555 offset = frame_size_rtx;
2556 else
2558 regno = next_scratch_gr_reg ();
2559 offset = gen_rtx_REG (DImode, regno);
2560 emit_move_insn (offset, frame_size_rtx);
2563 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2564 offset));
2566 RTX_FRAME_RELATED_P (insn) = 1;
2567 if (GET_CODE (offset) != CONST_INT)
2569 REG_NOTES (insn)
2570 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2571 gen_rtx_SET (VOIDmode,
2572 stack_pointer_rtx,
2573 gen_rtx_PLUS (DImode,
2574 stack_pointer_rtx,
2575 frame_size_rtx)),
2576 REG_NOTES (insn));
2580 if (cfun->machine->ia64_eh_epilogue_bsp)
2581 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2583 if (! sibcall_p)
2584 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2585 else
2587 int fp = GR_REG (2);
2588 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2589 first available call clobbered register. If there was a frame_pointer
2590 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2591 so we have to make sure we're using the string "r2" when emitting
2592 the register name for the assembler. */
2593 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2594 fp = HARD_FRAME_POINTER_REGNUM;
2596 /* We must emit an alloc to force the input registers to become output
2597 registers. Otherwise, if the callee tries to pass its parameters
2598 through to another call without an intervening alloc, then these
2599 values get lost. */
2600 /* ??? We don't need to preserve all input registers. We only need to
2601 preserve those input registers used as arguments to the sibling call.
2602 It is unclear how to compute that number here. */
2603 if (current_frame_info.n_input_regs != 0)
2604 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2605 const0_rtx, const0_rtx,
2606 GEN_INT (current_frame_info.n_input_regs),
2607 const0_rtx));
2611 /* Return 1 if br.ret can do all the work required to return from a
2612 function. */
2615 ia64_direct_return (void)
2617 if (reload_completed && ! frame_pointer_needed)
2619 ia64_compute_frame_size (get_frame_size ());
2621 return (current_frame_info.total_size == 0
2622 && current_frame_info.n_spilled == 0
2623 && current_frame_info.reg_save_b0 == 0
2624 && current_frame_info.reg_save_pr == 0
2625 && current_frame_info.reg_save_ar_pfs == 0
2626 && current_frame_info.reg_save_ar_unat == 0
2627 && current_frame_info.reg_save_ar_lc == 0);
2629 return 0;
2632 /* Return the magic cookie that we use to hold the return address
2633 during early compilation. */
2636 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
2638 if (count != 0)
2639 return NULL;
2640 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
2643 /* Split this value after reload, now that we know where the return
2644 address is saved. */
2646 void
2647 ia64_split_return_addr_rtx (rtx dest)
2649 rtx src;
2651 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2653 if (current_frame_info.reg_save_b0 != 0)
2654 src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2655 else
2657 HOST_WIDE_INT off;
2658 unsigned int regno;
2660 /* Compute offset from CFA for BR0. */
2661 /* ??? Must be kept in sync with ia64_expand_prologue. */
2662 off = (current_frame_info.spill_cfa_off
2663 + current_frame_info.spill_size);
2664 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2665 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2666 off -= 8;
2668 /* Convert CFA offset to a register based offset. */
2669 if (frame_pointer_needed)
2670 src = hard_frame_pointer_rtx;
2671 else
2673 src = stack_pointer_rtx;
2674 off += current_frame_info.total_size;
2677 /* Load address into scratch register. */
2678 if (CONST_OK_FOR_I (off))
2679 emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
2680 else
2682 emit_move_insn (dest, GEN_INT (off));
2683 emit_insn (gen_adddi3 (dest, src, dest));
2686 src = gen_rtx_MEM (Pmode, dest);
2689 else
2690 src = gen_rtx_REG (DImode, BR_REG (0));
2692 emit_move_insn (dest, src);
2696 ia64_hard_regno_rename_ok (int from, int to)
2698 /* Don't clobber any of the registers we reserved for the prologue. */
2699 if (to == current_frame_info.reg_fp
2700 || to == current_frame_info.reg_save_b0
2701 || to == current_frame_info.reg_save_pr
2702 || to == current_frame_info.reg_save_ar_pfs
2703 || to == current_frame_info.reg_save_ar_unat
2704 || to == current_frame_info.reg_save_ar_lc)
2705 return 0;
2707 if (from == current_frame_info.reg_fp
2708 || from == current_frame_info.reg_save_b0
2709 || from == current_frame_info.reg_save_pr
2710 || from == current_frame_info.reg_save_ar_pfs
2711 || from == current_frame_info.reg_save_ar_unat
2712 || from == current_frame_info.reg_save_ar_lc)
2713 return 0;
2715 /* Don't use output registers outside the register frame. */
2716 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2717 return 0;
2719 /* Retain even/oddness on predicate register pairs. */
2720 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2721 return (from & 1) == (to & 1);
2723 return 1;
2726 /* Target hook for assembling integer objects. Handle word-sized
2727 aligned objects and detect the cases when @fptr is needed. */
2729 static bool
2730 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
2732 if (size == POINTER_SIZE / BITS_PER_UNIT
2733 && aligned_p
2734 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
2735 && GET_CODE (x) == SYMBOL_REF
2736 && SYMBOL_REF_FUNCTION_P (x))
2738 if (POINTER_SIZE == 32)
2739 fputs ("\tdata4\t@fptr(", asm_out_file);
2740 else
2741 fputs ("\tdata8\t@fptr(", asm_out_file);
2742 output_addr_const (asm_out_file, x);
2743 fputs (")\n", asm_out_file);
2744 return true;
2746 return default_assemble_integer (x, size, aligned_p);
2749 /* Emit the function prologue. */
2751 static void
2752 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
2754 int mask, grsave, grsave_prev;
2756 if (current_frame_info.need_regstk)
2757 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2758 current_frame_info.n_input_regs,
2759 current_frame_info.n_local_regs,
2760 current_frame_info.n_output_regs,
2761 current_frame_info.n_rotate_regs);
2763 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2764 return;
2766 /* Emit the .prologue directive. */
2768 mask = 0;
2769 grsave = grsave_prev = 0;
2770 if (current_frame_info.reg_save_b0 != 0)
2772 mask |= 8;
2773 grsave = grsave_prev = current_frame_info.reg_save_b0;
2775 if (current_frame_info.reg_save_ar_pfs != 0
2776 && (grsave_prev == 0
2777 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2779 mask |= 4;
2780 if (grsave_prev == 0)
2781 grsave = current_frame_info.reg_save_ar_pfs;
2782 grsave_prev = current_frame_info.reg_save_ar_pfs;
2784 if (current_frame_info.reg_fp != 0
2785 && (grsave_prev == 0
2786 || current_frame_info.reg_fp == grsave_prev + 1))
2788 mask |= 2;
2789 if (grsave_prev == 0)
2790 grsave = HARD_FRAME_POINTER_REGNUM;
2791 grsave_prev = current_frame_info.reg_fp;
2793 if (current_frame_info.reg_save_pr != 0
2794 && (grsave_prev == 0
2795 || current_frame_info.reg_save_pr == grsave_prev + 1))
2797 mask |= 1;
2798 if (grsave_prev == 0)
2799 grsave = current_frame_info.reg_save_pr;
2802 if (mask && TARGET_GNU_AS)
2803 fprintf (file, "\t.prologue %d, %d\n", mask,
2804 ia64_dbx_register_number (grsave));
2805 else
2806 fputs ("\t.prologue\n", file);
2808 /* Emit a .spill directive, if necessary, to relocate the base of
2809 the register spill area. */
2810 if (current_frame_info.spill_cfa_off != -16)
2811 fprintf (file, "\t.spill %ld\n",
2812 (long) (current_frame_info.spill_cfa_off
2813 + current_frame_info.spill_size));
2816 /* Emit the .body directive at the scheduled end of the prologue. */
2818 static void
2819 ia64_output_function_end_prologue (FILE *file)
2821 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2822 return;
2824 fputs ("\t.body\n", file);
2827 /* Emit the function epilogue. */
2829 static void
2830 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
2831 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
2833 int i;
2835 if (current_frame_info.reg_fp)
2837 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2838 reg_names[HARD_FRAME_POINTER_REGNUM]
2839 = reg_names[current_frame_info.reg_fp];
2840 reg_names[current_frame_info.reg_fp] = tmp;
2842 if (! TARGET_REG_NAMES)
2844 for (i = 0; i < current_frame_info.n_input_regs; i++)
2845 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2846 for (i = 0; i < current_frame_info.n_local_regs; i++)
2847 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2848 for (i = 0; i < current_frame_info.n_output_regs; i++)
2849 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2852 current_frame_info.initialized = 0;
2856 ia64_dbx_register_number (int regno)
2858 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2859 from its home at loc79 to something inside the register frame. We
2860 must perform the same renumbering here for the debug info. */
2861 if (current_frame_info.reg_fp)
2863 if (regno == HARD_FRAME_POINTER_REGNUM)
2864 regno = current_frame_info.reg_fp;
2865 else if (regno == current_frame_info.reg_fp)
2866 regno = HARD_FRAME_POINTER_REGNUM;
2869 if (IN_REGNO_P (regno))
2870 return 32 + regno - IN_REG (0);
2871 else if (LOC_REGNO_P (regno))
2872 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2873 else if (OUT_REGNO_P (regno))
2874 return (32 + current_frame_info.n_input_regs
2875 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2876 else
2877 return regno;
2880 void
2881 ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
2883 rtx addr_reg, eight = GEN_INT (8);
2885 /* The Intel assembler requires that the global __ia64_trampoline symbol
2886 be declared explicitly */
2887 if (!TARGET_GNU_AS)
2889 static bool declared_ia64_trampoline = false;
2891 if (!declared_ia64_trampoline)
2893 declared_ia64_trampoline = true;
2894 (*targetm.asm_out.globalize_label) (asm_out_file,
2895 "__ia64_trampoline");
2899 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
2900 addr = convert_memory_address (Pmode, addr);
2901 fnaddr = convert_memory_address (Pmode, fnaddr);
2902 static_chain = convert_memory_address (Pmode, static_chain);
2904 /* Load up our iterator. */
2905 addr_reg = gen_reg_rtx (Pmode);
2906 emit_move_insn (addr_reg, addr);
2908 /* The first two words are the fake descriptor:
2909 __ia64_trampoline, ADDR+16. */
2910 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2911 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2912 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2914 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2915 copy_to_reg (plus_constant (addr, 16)));
2916 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2918 /* The third word is the target descriptor. */
2919 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2920 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2922 /* The fourth word is the static chain. */
2923 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2926 /* Do any needed setup for a variadic function. CUM has not been updated
2927 for the last named argument which has type TYPE and mode MODE.
2929 We generate the actual spill instructions during prologue generation. */
2931 static void
2932 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2933 tree type, int * pretend_size,
2934 int second_time ATTRIBUTE_UNUSED)
2936 CUMULATIVE_ARGS next_cum = *cum;
2938 /* Skip the current argument. */
2939 ia64_function_arg_advance (&next_cum, mode, type, 1);
2941 if (next_cum.words < MAX_ARGUMENT_SLOTS)
2943 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
2944 *pretend_size = n * UNITS_PER_WORD;
2945 cfun->machine->n_varargs = n;
2949 /* Check whether TYPE is a homogeneous floating point aggregate. If
2950 it is, return the mode of the floating point type that appears
2951 in all leafs. If it is not, return VOIDmode.
2953 An aggregate is a homogeneous floating point aggregate is if all
2954 fields/elements in it have the same floating point type (e.g,
2955 SFmode). 128-bit quad-precision floats are excluded. */
2957 static enum machine_mode
2958 hfa_element_mode (tree type, int nested)
2960 enum machine_mode element_mode = VOIDmode;
2961 enum machine_mode mode;
2962 enum tree_code code = TREE_CODE (type);
2963 int know_element_mode = 0;
2964 tree t;
2966 switch (code)
2968 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2969 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2970 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2971 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2972 case FUNCTION_TYPE:
2973 return VOIDmode;
2975 /* Fortran complex types are supposed to be HFAs, so we need to handle
2976 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2977 types though. */
2978 case COMPLEX_TYPE:
2979 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
2980 && TYPE_MODE (type) != TCmode)
2981 return GET_MODE_INNER (TYPE_MODE (type));
2982 else
2983 return VOIDmode;
2985 case REAL_TYPE:
2986 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2987 mode if this is contained within an aggregate. */
2988 if (nested && TYPE_MODE (type) != TFmode)
2989 return TYPE_MODE (type);
2990 else
2991 return VOIDmode;
2993 case ARRAY_TYPE:
2994 return hfa_element_mode (TREE_TYPE (type), 1);
2996 case RECORD_TYPE:
2997 case UNION_TYPE:
2998 case QUAL_UNION_TYPE:
2999 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3001 if (TREE_CODE (t) != FIELD_DECL)
3002 continue;
3004 mode = hfa_element_mode (TREE_TYPE (t), 1);
3005 if (know_element_mode)
3007 if (mode != element_mode)
3008 return VOIDmode;
3010 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3011 return VOIDmode;
3012 else
3014 know_element_mode = 1;
3015 element_mode = mode;
3018 return element_mode;
3020 default:
3021 /* If we reach here, we probably have some front-end specific type
3022 that the backend doesn't know about. This can happen via the
3023 aggregate_value_p call in init_function_start. All we can do is
3024 ignore unknown tree types. */
3025 return VOIDmode;
3028 return VOIDmode;
3031 /* Return the number of words required to hold a quantity of TYPE and MODE
3032 when passed as an argument. */
3033 static int
3034 ia64_function_arg_words (tree type, enum machine_mode mode)
3036 int words;
3038 if (mode == BLKmode)
3039 words = int_size_in_bytes (type);
3040 else
3041 words = GET_MODE_SIZE (mode);
3043 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
3046 /* Return the number of registers that should be skipped so the current
3047 argument (described by TYPE and WORDS) will be properly aligned.
3049 Integer and float arguments larger than 8 bytes start at the next
3050 even boundary. Aggregates larger than 8 bytes start at the next
3051 even boundary if the aggregate has 16 byte alignment. Note that
3052 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3053 but are still to be aligned in registers.
3055 ??? The ABI does not specify how to handle aggregates with
3056 alignment from 9 to 15 bytes, or greater than 16. We handle them
3057 all as if they had 16 byte alignment. Such aggregates can occur
3058 only if gcc extensions are used. */
3059 static int
3060 ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3062 if ((cum->words & 1) == 0)
3063 return 0;
3065 if (type
3066 && TREE_CODE (type) != INTEGER_TYPE
3067 && TREE_CODE (type) != REAL_TYPE)
3068 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3069 else
3070 return words > 1;
3073 /* Return rtx for register where argument is passed, or zero if it is passed
3074 on the stack. */
3075 /* ??? 128-bit quad-precision floats are always passed in general
3076 registers. */
3079 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3080 int named, int incoming)
3082 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3083 int words = ia64_function_arg_words (type, mode);
3084 int offset = ia64_function_arg_offset (cum, type, words);
3085 enum machine_mode hfa_mode = VOIDmode;
3087 /* If all argument slots are used, then it must go on the stack. */
3088 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3089 return 0;
3091 /* Check for and handle homogeneous FP aggregates. */
3092 if (type)
3093 hfa_mode = hfa_element_mode (type, 0);
3095 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3096 and unprototyped hfas are passed specially. */
3097 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3099 rtx loc[16];
3100 int i = 0;
3101 int fp_regs = cum->fp_regs;
3102 int int_regs = cum->words + offset;
3103 int hfa_size = GET_MODE_SIZE (hfa_mode);
3104 int byte_size;
3105 int args_byte_size;
3107 /* If prototyped, pass it in FR regs then GR regs.
3108 If not prototyped, pass it in both FR and GR regs.
3110 If this is an SFmode aggregate, then it is possible to run out of
3111 FR regs while GR regs are still left. In that case, we pass the
3112 remaining part in the GR regs. */
3114 /* Fill the FP regs. We do this always. We stop if we reach the end
3115 of the argument, the last FP register, or the last argument slot. */
3117 byte_size = ((mode == BLKmode)
3118 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3119 args_byte_size = int_regs * UNITS_PER_WORD;
3120 offset = 0;
3121 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3122 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3124 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3125 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3126 + fp_regs)),
3127 GEN_INT (offset));
3128 offset += hfa_size;
3129 args_byte_size += hfa_size;
3130 fp_regs++;
3133 /* If no prototype, then the whole thing must go in GR regs. */
3134 if (! cum->prototype)
3135 offset = 0;
3136 /* If this is an SFmode aggregate, then we might have some left over
3137 that needs to go in GR regs. */
3138 else if (byte_size != offset)
3139 int_regs += offset / UNITS_PER_WORD;
3141 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3143 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3145 enum machine_mode gr_mode = DImode;
3146 unsigned int gr_size;
3148 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3149 then this goes in a GR reg left adjusted/little endian, right
3150 adjusted/big endian. */
3151 /* ??? Currently this is handled wrong, because 4-byte hunks are
3152 always right adjusted/little endian. */
3153 if (offset & 0x4)
3154 gr_mode = SImode;
3155 /* If we have an even 4 byte hunk because the aggregate is a
3156 multiple of 4 bytes in size, then this goes in a GR reg right
3157 adjusted/little endian. */
3158 else if (byte_size - offset == 4)
3159 gr_mode = SImode;
3161 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3162 gen_rtx_REG (gr_mode, (basereg
3163 + int_regs)),
3164 GEN_INT (offset));
3166 gr_size = GET_MODE_SIZE (gr_mode);
3167 offset += gr_size;
3168 if (gr_size == UNITS_PER_WORD
3169 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
3170 int_regs++;
3171 else if (gr_size > UNITS_PER_WORD)
3172 int_regs += gr_size / UNITS_PER_WORD;
3174 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3177 /* Integral and aggregates go in general registers. If we have run out of
3178 FR registers, then FP values must also go in general registers. This can
3179 happen when we have a SFmode HFA. */
3180 else if (mode == TFmode || mode == TCmode
3181 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3183 int byte_size = ((mode == BLKmode)
3184 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3185 if (BYTES_BIG_ENDIAN
3186 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3187 && byte_size < UNITS_PER_WORD
3188 && byte_size > 0)
3190 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3191 gen_rtx_REG (DImode,
3192 (basereg + cum->words
3193 + offset)),
3194 const0_rtx);
3195 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3197 else
3198 return gen_rtx_REG (mode, basereg + cum->words + offset);
3202 /* If there is a prototype, then FP values go in a FR register when
3203 named, and in a GR register when unnamed. */
3204 else if (cum->prototype)
3206 if (named)
3207 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3208 /* In big-endian mode, an anonymous SFmode value must be represented
3209 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
3210 the value into the high half of the general register. */
3211 else if (BYTES_BIG_ENDIAN && mode == SFmode)
3212 return gen_rtx_PARALLEL (mode,
3213 gen_rtvec (1,
3214 gen_rtx_EXPR_LIST (VOIDmode,
3215 gen_rtx_REG (DImode, basereg + cum->words + offset),
3216 const0_rtx)));
3217 else
3218 return gen_rtx_REG (mode, basereg + cum->words + offset);
3220 /* If there is no prototype, then FP values go in both FR and GR
3221 registers. */
3222 else
3224 /* See comment above. */
3225 enum machine_mode inner_mode =
3226 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
3228 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3229 gen_rtx_REG (mode, (FR_ARG_FIRST
3230 + cum->fp_regs)),
3231 const0_rtx);
3232 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3233 gen_rtx_REG (inner_mode,
3234 (basereg + cum->words
3235 + offset)),
3236 const0_rtx);
3238 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3242 /* Return number of words, at the beginning of the argument, that must be
3243 put in registers. 0 is the argument is entirely in registers or entirely
3244 in memory. */
3247 ia64_function_arg_partial_nregs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3248 tree type, int named ATTRIBUTE_UNUSED)
3250 int words = ia64_function_arg_words (type, mode);
3251 int offset = ia64_function_arg_offset (cum, type, words);
3253 /* If all argument slots are used, then it must go on the stack. */
3254 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3255 return 0;
3257 /* It doesn't matter whether the argument goes in FR or GR regs. If
3258 it fits within the 8 argument slots, then it goes entirely in
3259 registers. If it extends past the last argument slot, then the rest
3260 goes on the stack. */
3262 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3263 return 0;
3265 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3268 /* Update CUM to point after this argument. This is patterned after
3269 ia64_function_arg. */
3271 void
3272 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3273 tree type, int named)
3275 int words = ia64_function_arg_words (type, mode);
3276 int offset = ia64_function_arg_offset (cum, type, words);
3277 enum machine_mode hfa_mode = VOIDmode;
3279 /* If all arg slots are already full, then there is nothing to do. */
3280 if (cum->words >= MAX_ARGUMENT_SLOTS)
3281 return;
3283 cum->words += words + offset;
3285 /* Check for and handle homogeneous FP aggregates. */
3286 if (type)
3287 hfa_mode = hfa_element_mode (type, 0);
3289 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3290 and unprototyped hfas are passed specially. */
3291 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3293 int fp_regs = cum->fp_regs;
3294 /* This is the original value of cum->words + offset. */
3295 int int_regs = cum->words - words;
3296 int hfa_size = GET_MODE_SIZE (hfa_mode);
3297 int byte_size;
3298 int args_byte_size;
3300 /* If prototyped, pass it in FR regs then GR regs.
3301 If not prototyped, pass it in both FR and GR regs.
3303 If this is an SFmode aggregate, then it is possible to run out of
3304 FR regs while GR regs are still left. In that case, we pass the
3305 remaining part in the GR regs. */
3307 /* Fill the FP regs. We do this always. We stop if we reach the end
3308 of the argument, the last FP register, or the last argument slot. */
3310 byte_size = ((mode == BLKmode)
3311 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3312 args_byte_size = int_regs * UNITS_PER_WORD;
3313 offset = 0;
3314 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3315 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3317 offset += hfa_size;
3318 args_byte_size += hfa_size;
3319 fp_regs++;
3322 cum->fp_regs = fp_regs;
3325 /* Integral and aggregates go in general registers. If we have run out of
3326 FR registers, then FP values must also go in general registers. This can
3327 happen when we have a SFmode HFA. */
3328 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3329 cum->int_regs = cum->words;
3331 /* If there is a prototype, then FP values go in a FR register when
3332 named, and in a GR register when unnamed. */
3333 else if (cum->prototype)
3335 if (! named)
3336 cum->int_regs = cum->words;
3337 else
3338 /* ??? Complex types should not reach here. */
3339 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3341 /* If there is no prototype, then FP values go in both FR and GR
3342 registers. */
3343 else
3345 /* ??? Complex types should not reach here. */
3346 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3347 cum->int_regs = cum->words;
3351 /* Variable sized types are passed by reference. */
3352 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3354 static bool
3355 ia64_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3356 enum machine_mode mode ATTRIBUTE_UNUSED,
3357 tree type, bool named ATTRIBUTE_UNUSED)
3359 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3362 /* True if it is OK to do sibling call optimization for the specified
3363 call expression EXP. DECL will be the called function, or NULL if
3364 this is an indirect call. */
3365 static bool
3366 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3368 /* We must always return with our current GP. This means we can
3369 only sibcall to functions defined in the current module. */
3370 return decl && (*targetm.binds_local_p) (decl);
3374 /* Implement va_arg. */
3376 static tree
3377 ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3379 /* Variable sized types are passed by reference. */
3380 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
3382 tree ptrtype = build_pointer_type (type);
3383 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
3384 return build_va_arg_indirect_ref (addr);
3387 /* Aggregate arguments with alignment larger than 8 bytes start at
3388 the next even boundary. Integer and floating point arguments
3389 do so if they are larger than 8 bytes, whether or not they are
3390 also aligned larger than 8 bytes. */
3391 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
3392 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3394 tree t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3395 build_int_cst (NULL_TREE, 2 * UNITS_PER_WORD - 1));
3396 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3397 build_int_cst (NULL_TREE, -2 * UNITS_PER_WORD));
3398 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3399 gimplify_and_add (t, pre_p);
3402 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3405 /* Return 1 if function return value returned in memory. Return 0 if it is
3406 in a register. */
3408 static bool
3409 ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
3411 enum machine_mode mode;
3412 enum machine_mode hfa_mode;
3413 HOST_WIDE_INT byte_size;
3415 mode = TYPE_MODE (valtype);
3416 byte_size = GET_MODE_SIZE (mode);
3417 if (mode == BLKmode)
3419 byte_size = int_size_in_bytes (valtype);
3420 if (byte_size < 0)
3421 return true;
3424 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3426 hfa_mode = hfa_element_mode (valtype, 0);
3427 if (hfa_mode != VOIDmode)
3429 int hfa_size = GET_MODE_SIZE (hfa_mode);
3431 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3432 return true;
3433 else
3434 return false;
3436 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3437 return true;
3438 else
3439 return false;
3442 /* Return rtx for register that holds the function return value. */
3445 ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
3447 enum machine_mode mode;
3448 enum machine_mode hfa_mode;
3450 mode = TYPE_MODE (valtype);
3451 hfa_mode = hfa_element_mode (valtype, 0);
3453 if (hfa_mode != VOIDmode)
3455 rtx loc[8];
3456 int i;
3457 int hfa_size;
3458 int byte_size;
3459 int offset;
3461 hfa_size = GET_MODE_SIZE (hfa_mode);
3462 byte_size = ((mode == BLKmode)
3463 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3464 offset = 0;
3465 for (i = 0; offset < byte_size; i++)
3467 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3468 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3469 GEN_INT (offset));
3470 offset += hfa_size;
3472 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3474 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
3475 return gen_rtx_REG (mode, FR_ARG_FIRST);
3476 else
3478 if (BYTES_BIG_ENDIAN
3479 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
3481 rtx loc[8];
3482 int offset;
3483 int bytesize;
3484 int i;
3486 offset = 0;
3487 bytesize = int_size_in_bytes (valtype);
3488 for (i = 0; offset < bytesize; i++)
3490 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3491 gen_rtx_REG (DImode,
3492 GR_RET_FIRST + i),
3493 GEN_INT (offset));
3494 offset += UNITS_PER_WORD;
3496 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3498 else
3499 return gen_rtx_REG (mode, GR_RET_FIRST);
3503 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
3504 We need to emit DTP-relative relocations. */
3506 void
3507 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
3509 if (size != 8)
3510 abort ();
3511 fputs ("\tdata8.ua\t@dtprel(", file);
3512 output_addr_const (file, x);
3513 fputs (")", file);
3516 /* Print a memory address as an operand to reference that memory location. */
3518 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3519 also call this from ia64_print_operand for memory addresses. */
3521 void
3522 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
3523 rtx address ATTRIBUTE_UNUSED)
3527 /* Print an operand to an assembler instruction.
3528 C Swap and print a comparison operator.
3529 D Print an FP comparison operator.
3530 E Print 32 - constant, for SImode shifts as extract.
3531 e Print 64 - constant, for DImode rotates.
3532 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3533 a floating point register emitted normally.
3534 I Invert a predicate register by adding 1.
3535 J Select the proper predicate register for a condition.
3536 j Select the inverse predicate register for a condition.
3537 O Append .acq for volatile load.
3538 P Postincrement of a MEM.
3539 Q Append .rel for volatile store.
3540 S Shift amount for shladd instruction.
3541 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3542 for Intel assembler.
3543 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3544 for Intel assembler.
3545 r Print register name, or constant 0 as r0. HP compatibility for
3546 Linux kernel. */
3547 void
3548 ia64_print_operand (FILE * file, rtx x, int code)
3550 const char *str;
3552 switch (code)
3554 case 0:
3555 /* Handled below. */
3556 break;
3558 case 'C':
3560 enum rtx_code c = swap_condition (GET_CODE (x));
3561 fputs (GET_RTX_NAME (c), file);
3562 return;
3565 case 'D':
3566 switch (GET_CODE (x))
3568 case NE:
3569 str = "neq";
3570 break;
3571 case UNORDERED:
3572 str = "unord";
3573 break;
3574 case ORDERED:
3575 str = "ord";
3576 break;
3577 default:
3578 str = GET_RTX_NAME (GET_CODE (x));
3579 break;
3581 fputs (str, file);
3582 return;
3584 case 'E':
3585 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3586 return;
3588 case 'e':
3589 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3590 return;
3592 case 'F':
3593 if (x == CONST0_RTX (GET_MODE (x)))
3594 str = reg_names [FR_REG (0)];
3595 else if (x == CONST1_RTX (GET_MODE (x)))
3596 str = reg_names [FR_REG (1)];
3597 else if (GET_CODE (x) == REG)
3598 str = reg_names [REGNO (x)];
3599 else
3600 abort ();
3601 fputs (str, file);
3602 return;
3604 case 'I':
3605 fputs (reg_names [REGNO (x) + 1], file);
3606 return;
3608 case 'J':
3609 case 'j':
3611 unsigned int regno = REGNO (XEXP (x, 0));
3612 if (GET_CODE (x) == EQ)
3613 regno += 1;
3614 if (code == 'j')
3615 regno ^= 1;
3616 fputs (reg_names [regno], file);
3618 return;
3620 case 'O':
3621 if (MEM_VOLATILE_P (x))
3622 fputs(".acq", file);
3623 return;
3625 case 'P':
3627 HOST_WIDE_INT value;
3629 switch (GET_CODE (XEXP (x, 0)))
3631 default:
3632 return;
3634 case POST_MODIFY:
3635 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3636 if (GET_CODE (x) == CONST_INT)
3637 value = INTVAL (x);
3638 else if (GET_CODE (x) == REG)
3640 fprintf (file, ", %s", reg_names[REGNO (x)]);
3641 return;
3643 else
3644 abort ();
3645 break;
3647 case POST_INC:
3648 value = GET_MODE_SIZE (GET_MODE (x));
3649 break;
3651 case POST_DEC:
3652 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3653 break;
3656 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
3657 return;
3660 case 'Q':
3661 if (MEM_VOLATILE_P (x))
3662 fputs(".rel", file);
3663 return;
3665 case 'S':
3666 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3667 return;
3669 case 'T':
3670 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3672 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3673 return;
3675 break;
3677 case 'U':
3678 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3680 const char *prefix = "0x";
3681 if (INTVAL (x) & 0x80000000)
3683 fprintf (file, "0xffffffff");
3684 prefix = "";
3686 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3687 return;
3689 break;
3691 case 'r':
3692 /* If this operand is the constant zero, write it as register zero.
3693 Any register, zero, or CONST_INT value is OK here. */
3694 if (GET_CODE (x) == REG)
3695 fputs (reg_names[REGNO (x)], file);
3696 else if (x == CONST0_RTX (GET_MODE (x)))
3697 fputs ("r0", file);
3698 else if (GET_CODE (x) == CONST_INT)
3699 output_addr_const (file, x);
3700 else
3701 output_operand_lossage ("invalid %%r value");
3702 return;
3704 case '+':
3706 const char *which;
3708 /* For conditional branches, returns or calls, substitute
3709 sptk, dptk, dpnt, or spnt for %s. */
3710 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3711 if (x)
3713 int pred_val = INTVAL (XEXP (x, 0));
3715 /* Guess top and bottom 10% statically predicted. */
3716 if (pred_val < REG_BR_PROB_BASE / 50)
3717 which = ".spnt";
3718 else if (pred_val < REG_BR_PROB_BASE / 2)
3719 which = ".dpnt";
3720 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3721 which = ".dptk";
3722 else
3723 which = ".sptk";
3725 else if (GET_CODE (current_output_insn) == CALL_INSN)
3726 which = ".sptk";
3727 else
3728 which = ".dptk";
3730 fputs (which, file);
3731 return;
3734 case ',':
3735 x = current_insn_predicate;
3736 if (x)
3738 unsigned int regno = REGNO (XEXP (x, 0));
3739 if (GET_CODE (x) == EQ)
3740 regno += 1;
3741 fprintf (file, "(%s) ", reg_names [regno]);
3743 return;
3745 default:
3746 output_operand_lossage ("ia64_print_operand: unknown code");
3747 return;
3750 switch (GET_CODE (x))
3752 /* This happens for the spill/restore instructions. */
3753 case POST_INC:
3754 case POST_DEC:
3755 case POST_MODIFY:
3756 x = XEXP (x, 0);
3757 /* ... fall through ... */
3759 case REG:
3760 fputs (reg_names [REGNO (x)], file);
3761 break;
3763 case MEM:
3765 rtx addr = XEXP (x, 0);
3766 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
3767 addr = XEXP (addr, 0);
3768 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3769 break;
3772 default:
3773 output_addr_const (file, x);
3774 break;
3777 return;
3780 /* Compute a (partial) cost for rtx X. Return true if the complete
3781 cost has been computed, and false if subexpressions should be
3782 scanned. In either case, *TOTAL contains the cost result. */
3783 /* ??? This is incomplete. */
3785 static bool
3786 ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
3788 switch (code)
3790 case CONST_INT:
3791 switch (outer_code)
3793 case SET:
3794 *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
3795 return true;
3796 case PLUS:
3797 if (CONST_OK_FOR_I (INTVAL (x)))
3798 *total = 0;
3799 else if (CONST_OK_FOR_J (INTVAL (x)))
3800 *total = 1;
3801 else
3802 *total = COSTS_N_INSNS (1);
3803 return true;
3804 default:
3805 if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
3806 *total = 0;
3807 else
3808 *total = COSTS_N_INSNS (1);
3809 return true;
3812 case CONST_DOUBLE:
3813 *total = COSTS_N_INSNS (1);
3814 return true;
3816 case CONST:
3817 case SYMBOL_REF:
3818 case LABEL_REF:
3819 *total = COSTS_N_INSNS (3);
3820 return true;
3822 case MULT:
3823 /* For multiplies wider than HImode, we have to go to the FPU,
3824 which normally involves copies. Plus there's the latency
3825 of the multiply itself, and the latency of the instructions to
3826 transfer integer regs to FP regs. */
3827 /* ??? Check for FP mode. */
3828 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
3829 *total = COSTS_N_INSNS (10);
3830 else
3831 *total = COSTS_N_INSNS (2);
3832 return true;
3834 case PLUS:
3835 case MINUS:
3836 case ASHIFT:
3837 case ASHIFTRT:
3838 case LSHIFTRT:
3839 *total = COSTS_N_INSNS (1);
3840 return true;
3842 case DIV:
3843 case UDIV:
3844 case MOD:
3845 case UMOD:
3846 /* We make divide expensive, so that divide-by-constant will be
3847 optimized to a multiply. */
3848 *total = COSTS_N_INSNS (60);
3849 return true;
3851 default:
3852 return false;
3856 /* Calculate the cost of moving data from a register in class FROM to
3857 one in class TO, using MODE. */
3860 ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
3861 enum reg_class to)
3863 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
3864 if (to == ADDL_REGS)
3865 to = GR_REGS;
3866 if (from == ADDL_REGS)
3867 from = GR_REGS;
3869 /* All costs are symmetric, so reduce cases by putting the
3870 lower number class as the destination. */
3871 if (from < to)
3873 enum reg_class tmp = to;
3874 to = from, from = tmp;
3877 /* Moving from FR<->GR in XFmode must be more expensive than 2,
3878 so that we get secondary memory reloads. Between FR_REGS,
3879 we have to make this at least as expensive as MEMORY_MOVE_COST
3880 to avoid spectacularly poor register class preferencing. */
3881 if (mode == XFmode)
3883 if (to != GR_REGS || from != GR_REGS)
3884 return MEMORY_MOVE_COST (mode, to, 0);
3885 else
3886 return 3;
3889 switch (to)
3891 case PR_REGS:
3892 /* Moving between PR registers takes two insns. */
3893 if (from == PR_REGS)
3894 return 3;
3895 /* Moving between PR and anything but GR is impossible. */
3896 if (from != GR_REGS)
3897 return MEMORY_MOVE_COST (mode, to, 0);
3898 break;
3900 case BR_REGS:
3901 /* Moving between BR and anything but GR is impossible. */
3902 if (from != GR_REGS && from != GR_AND_BR_REGS)
3903 return MEMORY_MOVE_COST (mode, to, 0);
3904 break;
3906 case AR_I_REGS:
3907 case AR_M_REGS:
3908 /* Moving between AR and anything but GR is impossible. */
3909 if (from != GR_REGS)
3910 return MEMORY_MOVE_COST (mode, to, 0);
3911 break;
3913 case GR_REGS:
3914 case FR_REGS:
3915 case GR_AND_FR_REGS:
3916 case GR_AND_BR_REGS:
3917 case ALL_REGS:
3918 break;
3920 default:
3921 abort ();
3924 return 2;
3927 /* This function returns the register class required for a secondary
3928 register when copying between one of the registers in CLASS, and X,
3929 using MODE. A return value of NO_REGS means that no secondary register
3930 is required. */
3932 enum reg_class
3933 ia64_secondary_reload_class (enum reg_class class,
3934 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
3936 int regno = -1;
3938 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3939 regno = true_regnum (x);
3941 switch (class)
3943 case BR_REGS:
3944 case AR_M_REGS:
3945 case AR_I_REGS:
3946 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
3947 interaction. We end up with two pseudos with overlapping lifetimes
3948 both of which are equiv to the same constant, and both which need
3949 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
3950 changes depending on the path length, which means the qty_first_reg
3951 check in make_regs_eqv can give different answers at different times.
3952 At some point I'll probably need a reload_indi pattern to handle
3953 this.
3955 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
3956 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
3957 non-general registers for good measure. */
3958 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
3959 return GR_REGS;
3961 /* This is needed if a pseudo used as a call_operand gets spilled to a
3962 stack slot. */
3963 if (GET_CODE (x) == MEM)
3964 return GR_REGS;
3965 break;
3967 case FR_REGS:
3968 /* Need to go through general registers to get to other class regs. */
3969 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
3970 return GR_REGS;
3972 /* This can happen when a paradoxical subreg is an operand to the
3973 muldi3 pattern. */
3974 /* ??? This shouldn't be necessary after instruction scheduling is
3975 enabled, because paradoxical subregs are not accepted by
3976 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3977 stop the paradoxical subreg stupidity in the *_operand functions
3978 in recog.c. */
3979 if (GET_CODE (x) == MEM
3980 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3981 || GET_MODE (x) == QImode))
3982 return GR_REGS;
3984 /* This can happen because of the ior/and/etc patterns that accept FP
3985 registers as operands. If the third operand is a constant, then it
3986 needs to be reloaded into a FP register. */
3987 if (GET_CODE (x) == CONST_INT)
3988 return GR_REGS;
3990 /* This can happen because of register elimination in a muldi3 insn.
3991 E.g. `26107 * (unsigned long)&u'. */
3992 if (GET_CODE (x) == PLUS)
3993 return GR_REGS;
3994 break;
3996 case PR_REGS:
3997 /* ??? This happens if we cse/gcse a BImode value across a call,
3998 and the function has a nonlocal goto. This is because global
3999 does not allocate call crossing pseudos to hard registers when
4000 current_function_has_nonlocal_goto is true. This is relatively
4001 common for C++ programs that use exceptions. To reproduce,
4002 return NO_REGS and compile libstdc++. */
4003 if (GET_CODE (x) == MEM)
4004 return GR_REGS;
4006 /* This can happen when we take a BImode subreg of a DImode value,
4007 and that DImode value winds up in some non-GR register. */
4008 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4009 return GR_REGS;
4010 break;
4012 default:
4013 break;
4016 return NO_REGS;
4020 /* Emit text to declare externally defined variables and functions, because
4021 the Intel assembler does not support undefined externals. */
4023 void
4024 ia64_asm_output_external (FILE *file, tree decl, const char *name)
4026 int save_referenced;
4028 /* GNU as does not need anything here, but the HP linker does need
4029 something for external functions. */
4031 if (TARGET_GNU_AS
4032 && (!TARGET_HPUX_LD
4033 || TREE_CODE (decl) != FUNCTION_DECL
4034 || strstr (name, "__builtin_") == name))
4035 return;
4037 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4038 the linker when we do this, so we need to be careful not to do this for
4039 builtin functions which have no library equivalent. Unfortunately, we
4040 can't tell here whether or not a function will actually be called by
4041 expand_expr, so we pull in library functions even if we may not need
4042 them later. */
4043 if (! strcmp (name, "__builtin_next_arg")
4044 || ! strcmp (name, "alloca")
4045 || ! strcmp (name, "__builtin_constant_p")
4046 || ! strcmp (name, "__builtin_args_info"))
4047 return;
4049 if (TARGET_HPUX_LD)
4050 ia64_hpux_add_extern_decl (decl);
4051 else
4053 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4054 restore it. */
4055 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4056 if (TREE_CODE (decl) == FUNCTION_DECL)
4057 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4058 (*targetm.asm_out.globalize_label) (file, name);
4059 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4063 /* Parse the -mfixed-range= option string. */
4065 static void
4066 fix_range (const char *const_str)
4068 int i, first, last;
4069 char *str, *dash, *comma;
4071 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4072 REG2 are either register names or register numbers. The effect
4073 of this option is to mark the registers in the range from REG1 to
4074 REG2 as ``fixed'' so they won't be used by the compiler. This is
4075 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4077 i = strlen (const_str);
4078 str = (char *) alloca (i + 1);
4079 memcpy (str, const_str, i + 1);
4081 while (1)
4083 dash = strchr (str, '-');
4084 if (!dash)
4086 warning ("value of -mfixed-range must have form REG1-REG2");
4087 return;
4089 *dash = '\0';
4091 comma = strchr (dash + 1, ',');
4092 if (comma)
4093 *comma = '\0';
4095 first = decode_reg_name (str);
4096 if (first < 0)
4098 warning ("unknown register name: %s", str);
4099 return;
4102 last = decode_reg_name (dash + 1);
4103 if (last < 0)
4105 warning ("unknown register name: %s", dash + 1);
4106 return;
4109 *dash = '-';
4111 if (first > last)
4113 warning ("%s-%s is an empty range", str, dash + 1);
4114 return;
4117 for (i = first; i <= last; ++i)
4118 fixed_regs[i] = call_used_regs[i] = 1;
4120 if (!comma)
4121 break;
4123 *comma = ',';
4124 str = comma + 1;
4128 static struct machine_function *
4129 ia64_init_machine_status (void)
4131 return ggc_alloc_cleared (sizeof (struct machine_function));
4134 /* Handle TARGET_OPTIONS switches. */
4136 void
4137 ia64_override_options (void)
4139 static struct pta
4141 const char *const name; /* processor name or nickname. */
4142 const enum processor_type processor;
4144 const processor_alias_table[] =
4146 {"itanium", PROCESSOR_ITANIUM},
4147 {"itanium1", PROCESSOR_ITANIUM},
4148 {"merced", PROCESSOR_ITANIUM},
4149 {"itanium2", PROCESSOR_ITANIUM2},
4150 {"mckinley", PROCESSOR_ITANIUM2},
4153 int const pta_size = ARRAY_SIZE (processor_alias_table);
4154 int i;
4156 if (TARGET_AUTO_PIC)
4157 target_flags |= MASK_CONST_GP;
4159 if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
4161 if ((target_flags_explicit & MASK_INLINE_FLOAT_DIV_LAT)
4162 && (target_flags_explicit & MASK_INLINE_FLOAT_DIV_THR))
4164 warning ("cannot optimize floating point division for both latency and throughput");
4165 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4167 else
4169 if (target_flags_explicit & MASK_INLINE_FLOAT_DIV_THR)
4170 target_flags &= ~MASK_INLINE_FLOAT_DIV_LAT;
4171 else
4172 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4176 if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
4178 if ((target_flags_explicit & MASK_INLINE_INT_DIV_LAT)
4179 && (target_flags_explicit & MASK_INLINE_INT_DIV_THR))
4181 warning ("cannot optimize integer division for both latency and throughput");
4182 target_flags &= ~MASK_INLINE_INT_DIV_THR;
4184 else
4186 if (target_flags_explicit & MASK_INLINE_INT_DIV_THR)
4187 target_flags &= ~MASK_INLINE_INT_DIV_LAT;
4188 else
4189 target_flags &= ~MASK_INLINE_INT_DIV_THR;
4193 if (TARGET_INLINE_SQRT_LAT && TARGET_INLINE_SQRT_THR)
4195 if ((target_flags_explicit & MASK_INLINE_SQRT_LAT)
4196 && (target_flags_explicit & MASK_INLINE_SQRT_THR))
4198 warning ("cannot optimize square root for both latency and throughput");
4199 target_flags &= ~MASK_INLINE_SQRT_THR;
4201 else
4203 if (target_flags_explicit & MASK_INLINE_SQRT_THR)
4204 target_flags &= ~MASK_INLINE_SQRT_LAT;
4205 else
4206 target_flags &= ~MASK_INLINE_SQRT_THR;
4210 if (TARGET_INLINE_SQRT_LAT)
4212 warning ("not yet implemented: latency-optimized inline square root");
4213 target_flags &= ~MASK_INLINE_SQRT_LAT;
4216 if (ia64_fixed_range_string)
4217 fix_range (ia64_fixed_range_string);
4219 if (ia64_tls_size_string)
4221 char *end;
4222 unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4223 if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4224 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4225 else
4226 ia64_tls_size = tmp;
4229 if (!ia64_tune_string)
4230 ia64_tune_string = "itanium2";
4232 for (i = 0; i < pta_size; i++)
4233 if (! strcmp (ia64_tune_string, processor_alias_table[i].name))
4235 ia64_tune = processor_alias_table[i].processor;
4236 break;
4239 if (i == pta_size)
4240 error ("bad value (%s) for -tune= switch", ia64_tune_string);
4242 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4243 flag_schedule_insns_after_reload = 0;
4245 /* Variable tracking should be run after all optimizations which change order
4246 of insns. It also needs a valid CFG. */
4247 ia64_flag_var_tracking = flag_var_tracking;
4248 flag_var_tracking = 0;
4250 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4252 init_machine_status = ia64_init_machine_status;
4255 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
4256 static enum attr_type ia64_safe_type (rtx);
4258 static enum attr_itanium_class
4259 ia64_safe_itanium_class (rtx insn)
4261 if (recog_memoized (insn) >= 0)
4262 return get_attr_itanium_class (insn);
4263 else
4264 return ITANIUM_CLASS_UNKNOWN;
4267 static enum attr_type
4268 ia64_safe_type (rtx insn)
4270 if (recog_memoized (insn) >= 0)
4271 return get_attr_type (insn);
4272 else
4273 return TYPE_UNKNOWN;
4276 /* The following collection of routines emit instruction group stop bits as
4277 necessary to avoid dependencies. */
4279 /* Need to track some additional registers as far as serialization is
4280 concerned so we can properly handle br.call and br.ret. We could
4281 make these registers visible to gcc, but since these registers are
4282 never explicitly used in gcc generated code, it seems wasteful to
4283 do so (plus it would make the call and return patterns needlessly
4284 complex). */
4285 #define REG_RP (BR_REG (0))
4286 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4287 /* This is used for volatile asms which may require a stop bit immediately
4288 before and after them. */
4289 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4290 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4291 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4293 /* For each register, we keep track of how it has been written in the
4294 current instruction group.
4296 If a register is written unconditionally (no qualifying predicate),
4297 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4299 If a register is written if its qualifying predicate P is true, we
4300 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4301 may be written again by the complement of P (P^1) and when this happens,
4302 WRITE_COUNT gets set to 2.
4304 The result of this is that whenever an insn attempts to write a register
4305 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4307 If a predicate register is written by a floating-point insn, we set
4308 WRITTEN_BY_FP to true.
4310 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4311 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4313 struct reg_write_state
4315 unsigned int write_count : 2;
4316 unsigned int first_pred : 16;
4317 unsigned int written_by_fp : 1;
4318 unsigned int written_by_and : 1;
4319 unsigned int written_by_or : 1;
4322 /* Cumulative info for the current instruction group. */
4323 struct reg_write_state rws_sum[NUM_REGS];
4324 /* Info for the current instruction. This gets copied to rws_sum after a
4325 stop bit is emitted. */
4326 struct reg_write_state rws_insn[NUM_REGS];
4328 /* Indicates whether this is the first instruction after a stop bit,
4329 in which case we don't need another stop bit. Without this, we hit
4330 the abort in ia64_variable_issue when scheduling an alloc. */
4331 static int first_instruction;
4333 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4334 RTL for one instruction. */
4335 struct reg_flags
4337 unsigned int is_write : 1; /* Is register being written? */
4338 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4339 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4340 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4341 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4342 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4345 static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
4346 static int rws_access_regno (int, struct reg_flags, int);
4347 static int rws_access_reg (rtx, struct reg_flags, int);
4348 static void update_set_flags (rtx, struct reg_flags *, int *, rtx *);
4349 static int set_src_needs_barrier (rtx, struct reg_flags, int, rtx);
4350 static int rtx_needs_barrier (rtx, struct reg_flags, int);
4351 static void init_insn_group_barriers (void);
4352 static int group_barrier_needed_p (rtx);
4353 static int safe_group_barrier_needed_p (rtx);
4355 /* Update *RWS for REGNO, which is being written by the current instruction,
4356 with predicate PRED, and associated register flags in FLAGS. */
4358 static void
4359 rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
4361 if (pred)
4362 rws[regno].write_count++;
4363 else
4364 rws[regno].write_count = 2;
4365 rws[regno].written_by_fp |= flags.is_fp;
4366 /* ??? Not tracking and/or across differing predicates. */
4367 rws[regno].written_by_and = flags.is_and;
4368 rws[regno].written_by_or = flags.is_or;
4369 rws[regno].first_pred = pred;
4372 /* Handle an access to register REGNO of type FLAGS using predicate register
4373 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4374 a dependency with an earlier instruction in the same group. */
4376 static int
4377 rws_access_regno (int regno, struct reg_flags flags, int pred)
4379 int need_barrier = 0;
4381 if (regno >= NUM_REGS)
4382 abort ();
4384 if (! PR_REGNO_P (regno))
4385 flags.is_and = flags.is_or = 0;
4387 if (flags.is_write)
4389 int write_count;
4391 /* One insn writes same reg multiple times? */
4392 if (rws_insn[regno].write_count > 0)
4393 abort ();
4395 /* Update info for current instruction. */
4396 rws_update (rws_insn, regno, flags, pred);
4397 write_count = rws_sum[regno].write_count;
4399 switch (write_count)
4401 case 0:
4402 /* The register has not been written yet. */
4403 rws_update (rws_sum, regno, flags, pred);
4404 break;
4406 case 1:
4407 /* The register has been written via a predicate. If this is
4408 not a complementary predicate, then we need a barrier. */
4409 /* ??? This assumes that P and P+1 are always complementary
4410 predicates for P even. */
4411 if (flags.is_and && rws_sum[regno].written_by_and)
4413 else if (flags.is_or && rws_sum[regno].written_by_or)
4415 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4416 need_barrier = 1;
4417 rws_update (rws_sum, regno, flags, pred);
4418 break;
4420 case 2:
4421 /* The register has been unconditionally written already. We
4422 need a barrier. */
4423 if (flags.is_and && rws_sum[regno].written_by_and)
4425 else if (flags.is_or && rws_sum[regno].written_by_or)
4427 else
4428 need_barrier = 1;
4429 rws_sum[regno].written_by_and = flags.is_and;
4430 rws_sum[regno].written_by_or = flags.is_or;
4431 break;
4433 default:
4434 abort ();
4437 else
4439 if (flags.is_branch)
4441 /* Branches have several RAW exceptions that allow to avoid
4442 barriers. */
4444 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4445 /* RAW dependencies on branch regs are permissible as long
4446 as the writer is a non-branch instruction. Since we
4447 never generate code that uses a branch register written
4448 by a branch instruction, handling this case is
4449 easy. */
4450 return 0;
4452 if (REGNO_REG_CLASS (regno) == PR_REGS
4453 && ! rws_sum[regno].written_by_fp)
4454 /* The predicates of a branch are available within the
4455 same insn group as long as the predicate was written by
4456 something other than a floating-point instruction. */
4457 return 0;
4460 if (flags.is_and && rws_sum[regno].written_by_and)
4461 return 0;
4462 if (flags.is_or && rws_sum[regno].written_by_or)
4463 return 0;
4465 switch (rws_sum[regno].write_count)
4467 case 0:
4468 /* The register has not been written yet. */
4469 break;
4471 case 1:
4472 /* The register has been written via a predicate. If this is
4473 not a complementary predicate, then we need a barrier. */
4474 /* ??? This assumes that P and P+1 are always complementary
4475 predicates for P even. */
4476 if ((rws_sum[regno].first_pred ^ 1) != pred)
4477 need_barrier = 1;
4478 break;
4480 case 2:
4481 /* The register has been unconditionally written already. We
4482 need a barrier. */
4483 need_barrier = 1;
4484 break;
4486 default:
4487 abort ();
4491 return need_barrier;
4494 static int
4495 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
4497 int regno = REGNO (reg);
4498 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4500 if (n == 1)
4501 return rws_access_regno (regno, flags, pred);
4502 else
4504 int need_barrier = 0;
4505 while (--n >= 0)
4506 need_barrier |= rws_access_regno (regno + n, flags, pred);
4507 return need_barrier;
4511 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4512 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4514 static void
4515 update_set_flags (rtx x, struct reg_flags *pflags, int *ppred, rtx *pcond)
4517 rtx src = SET_SRC (x);
4519 *pcond = 0;
4521 switch (GET_CODE (src))
4523 case CALL:
4524 return;
4526 case IF_THEN_ELSE:
4527 if (SET_DEST (x) == pc_rtx)
4528 /* X is a conditional branch. */
4529 return;
4530 else
4532 int is_complemented = 0;
4534 /* X is a conditional move. */
4535 rtx cond = XEXP (src, 0);
4536 if (GET_CODE (cond) == EQ)
4537 is_complemented = 1;
4538 cond = XEXP (cond, 0);
4539 if (GET_CODE (cond) != REG
4540 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4541 abort ();
4542 *pcond = cond;
4543 if (XEXP (src, 1) == SET_DEST (x)
4544 || XEXP (src, 2) == SET_DEST (x))
4546 /* X is a conditional move that conditionally writes the
4547 destination. */
4549 /* We need another complement in this case. */
4550 if (XEXP (src, 1) == SET_DEST (x))
4551 is_complemented = ! is_complemented;
4553 *ppred = REGNO (cond);
4554 if (is_complemented)
4555 ++*ppred;
4558 /* ??? If this is a conditional write to the dest, then this
4559 instruction does not actually read one source. This probably
4560 doesn't matter, because that source is also the dest. */
4561 /* ??? Multiple writes to predicate registers are allowed
4562 if they are all AND type compares, or if they are all OR
4563 type compares. We do not generate such instructions
4564 currently. */
4566 /* ... fall through ... */
4568 default:
4569 if (COMPARISON_P (src)
4570 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4571 /* Set pflags->is_fp to 1 so that we know we're dealing
4572 with a floating point comparison when processing the
4573 destination of the SET. */
4574 pflags->is_fp = 1;
4576 /* Discover if this is a parallel comparison. We only handle
4577 and.orcm and or.andcm at present, since we must retain a
4578 strict inverse on the predicate pair. */
4579 else if (GET_CODE (src) == AND)
4580 pflags->is_and = 1;
4581 else if (GET_CODE (src) == IOR)
4582 pflags->is_or = 1;
4584 break;
4588 /* Subroutine of rtx_needs_barrier; this function determines whether the
4589 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4590 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4591 for this insn. */
4593 static int
4594 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred, rtx cond)
4596 int need_barrier = 0;
4597 rtx dst;
4598 rtx src = SET_SRC (x);
4600 if (GET_CODE (src) == CALL)
4601 /* We don't need to worry about the result registers that
4602 get written by subroutine call. */
4603 return rtx_needs_barrier (src, flags, pred);
4604 else if (SET_DEST (x) == pc_rtx)
4606 /* X is a conditional branch. */
4607 /* ??? This seems redundant, as the caller sets this bit for
4608 all JUMP_INSNs. */
4609 flags.is_branch = 1;
4610 return rtx_needs_barrier (src, flags, pred);
4613 need_barrier = rtx_needs_barrier (src, flags, pred);
4615 /* This instruction unconditionally uses a predicate register. */
4616 if (cond)
4617 need_barrier |= rws_access_reg (cond, flags, 0);
4619 dst = SET_DEST (x);
4620 if (GET_CODE (dst) == ZERO_EXTRACT)
4622 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4623 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4624 dst = XEXP (dst, 0);
4626 return need_barrier;
4629 /* Handle an access to rtx X of type FLAGS using predicate register
4630 PRED. Return 1 if this access creates a dependency with an earlier
4631 instruction in the same group. */
4633 static int
4634 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
4636 int i, j;
4637 int is_complemented = 0;
4638 int need_barrier = 0;
4639 const char *format_ptr;
4640 struct reg_flags new_flags;
4641 rtx cond = 0;
4643 if (! x)
4644 return 0;
4646 new_flags = flags;
4648 switch (GET_CODE (x))
4650 case SET:
4651 update_set_flags (x, &new_flags, &pred, &cond);
4652 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4653 if (GET_CODE (SET_SRC (x)) != CALL)
4655 new_flags.is_write = 1;
4656 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4658 break;
4660 case CALL:
4661 new_flags.is_write = 0;
4662 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4664 /* Avoid multiple register writes, in case this is a pattern with
4665 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4666 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4668 new_flags.is_write = 1;
4669 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4670 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4671 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4673 break;
4675 case COND_EXEC:
4676 /* X is a predicated instruction. */
4678 cond = COND_EXEC_TEST (x);
4679 if (pred)
4680 abort ();
4681 need_barrier = rtx_needs_barrier (cond, flags, 0);
4683 if (GET_CODE (cond) == EQ)
4684 is_complemented = 1;
4685 cond = XEXP (cond, 0);
4686 if (GET_CODE (cond) != REG
4687 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4688 abort ();
4689 pred = REGNO (cond);
4690 if (is_complemented)
4691 ++pred;
4693 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4694 return need_barrier;
4696 case CLOBBER:
4697 case USE:
4698 /* Clobber & use are for earlier compiler-phases only. */
4699 break;
4701 case ASM_OPERANDS:
4702 case ASM_INPUT:
4703 /* We always emit stop bits for traditional asms. We emit stop bits
4704 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4705 if (GET_CODE (x) != ASM_OPERANDS
4706 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4708 /* Avoid writing the register multiple times if we have multiple
4709 asm outputs. This avoids an abort in rws_access_reg. */
4710 if (! rws_insn[REG_VOLATILE].write_count)
4712 new_flags.is_write = 1;
4713 rws_access_regno (REG_VOLATILE, new_flags, pred);
4715 return 1;
4718 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4719 We cannot just fall through here since then we would be confused
4720 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4721 traditional asms unlike their normal usage. */
4723 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4724 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4725 need_barrier = 1;
4726 break;
4728 case PARALLEL:
4729 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4731 rtx pat = XVECEXP (x, 0, i);
4732 if (GET_CODE (pat) == SET)
4734 update_set_flags (pat, &new_flags, &pred, &cond);
4735 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4737 else if (GET_CODE (pat) == USE
4738 || GET_CODE (pat) == CALL
4739 || GET_CODE (pat) == ASM_OPERANDS)
4740 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4741 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4742 abort ();
4744 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4746 rtx pat = XVECEXP (x, 0, i);
4747 if (GET_CODE (pat) == SET)
4749 if (GET_CODE (SET_SRC (pat)) != CALL)
4751 new_flags.is_write = 1;
4752 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4753 pred);
4756 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
4757 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4759 break;
4761 case SUBREG:
4762 x = SUBREG_REG (x);
4763 /* FALLTHRU */
4764 case REG:
4765 if (REGNO (x) == AR_UNAT_REGNUM)
4767 for (i = 0; i < 64; ++i)
4768 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4770 else
4771 need_barrier = rws_access_reg (x, flags, pred);
4772 break;
4774 case MEM:
4775 /* Find the regs used in memory address computation. */
4776 new_flags.is_write = 0;
4777 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4778 break;
4780 case CONST_INT: case CONST_DOUBLE:
4781 case SYMBOL_REF: case LABEL_REF: case CONST:
4782 break;
4784 /* Operators with side-effects. */
4785 case POST_INC: case POST_DEC:
4786 if (GET_CODE (XEXP (x, 0)) != REG)
4787 abort ();
4789 new_flags.is_write = 0;
4790 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4791 new_flags.is_write = 1;
4792 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4793 break;
4795 case POST_MODIFY:
4796 if (GET_CODE (XEXP (x, 0)) != REG)
4797 abort ();
4799 new_flags.is_write = 0;
4800 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4801 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4802 new_flags.is_write = 1;
4803 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4804 break;
4806 /* Handle common unary and binary ops for efficiency. */
4807 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4808 case MOD: case UDIV: case UMOD: case AND: case IOR:
4809 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4810 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4811 case NE: case EQ: case GE: case GT: case LE:
4812 case LT: case GEU: case GTU: case LEU: case LTU:
4813 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4814 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4815 break;
4817 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4818 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4819 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4820 case SQRT: case FFS: case POPCOUNT:
4821 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4822 break;
4824 case UNSPEC:
4825 switch (XINT (x, 1))
4827 case UNSPEC_LTOFF_DTPMOD:
4828 case UNSPEC_LTOFF_DTPREL:
4829 case UNSPEC_DTPREL:
4830 case UNSPEC_LTOFF_TPREL:
4831 case UNSPEC_TPREL:
4832 case UNSPEC_PRED_REL_MUTEX:
4833 case UNSPEC_PIC_CALL:
4834 case UNSPEC_MF:
4835 case UNSPEC_FETCHADD_ACQ:
4836 case UNSPEC_BSP_VALUE:
4837 case UNSPEC_FLUSHRS:
4838 case UNSPEC_BUNDLE_SELECTOR:
4839 break;
4841 case UNSPEC_GR_SPILL:
4842 case UNSPEC_GR_RESTORE:
4844 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4845 HOST_WIDE_INT bit = (offset >> 3) & 63;
4847 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4848 new_flags.is_write = (XINT (x, 1) == 1);
4849 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4850 new_flags, pred);
4851 break;
4854 case UNSPEC_FR_SPILL:
4855 case UNSPEC_FR_RESTORE:
4856 case UNSPEC_GETF_EXP:
4857 case UNSPEC_SETF_EXP:
4858 case UNSPEC_ADDP4:
4859 case UNSPEC_FR_SQRT_RECIP_APPROX:
4860 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4861 break;
4863 case UNSPEC_FR_RECIP_APPROX:
4864 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4865 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4866 break;
4868 case UNSPEC_CMPXCHG_ACQ:
4869 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4870 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4871 break;
4873 default:
4874 abort ();
4876 break;
4878 case UNSPEC_VOLATILE:
4879 switch (XINT (x, 1))
4881 case UNSPECV_ALLOC:
4882 /* Alloc must always be the first instruction of a group.
4883 We force this by always returning true. */
4884 /* ??? We might get better scheduling if we explicitly check for
4885 input/local/output register dependencies, and modify the
4886 scheduler so that alloc is always reordered to the start of
4887 the current group. We could then eliminate all of the
4888 first_instruction code. */
4889 rws_access_regno (AR_PFS_REGNUM, flags, pred);
4891 new_flags.is_write = 1;
4892 rws_access_regno (REG_AR_CFM, new_flags, pred);
4893 return 1;
4895 case UNSPECV_SET_BSP:
4896 need_barrier = 1;
4897 break;
4899 case UNSPECV_BLOCKAGE:
4900 case UNSPECV_INSN_GROUP_BARRIER:
4901 case UNSPECV_BREAK:
4902 case UNSPECV_PSAC_ALL:
4903 case UNSPECV_PSAC_NORMAL:
4904 return 0;
4906 default:
4907 abort ();
4909 break;
4911 case RETURN:
4912 new_flags.is_write = 0;
4913 need_barrier = rws_access_regno (REG_RP, flags, pred);
4914 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
4916 new_flags.is_write = 1;
4917 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4918 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4919 break;
4921 default:
4922 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4923 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4924 switch (format_ptr[i])
4926 case '0': /* unused field */
4927 case 'i': /* integer */
4928 case 'n': /* note */
4929 case 'w': /* wide integer */
4930 case 's': /* pointer to string */
4931 case 'S': /* optional pointer to string */
4932 break;
4934 case 'e':
4935 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4936 need_barrier = 1;
4937 break;
4939 case 'E':
4940 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4941 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4942 need_barrier = 1;
4943 break;
4945 default:
4946 abort ();
4948 break;
4950 return need_barrier;
4953 /* Clear out the state for group_barrier_needed_p at the start of a
4954 sequence of insns. */
4956 static void
4957 init_insn_group_barriers (void)
4959 memset (rws_sum, 0, sizeof (rws_sum));
4960 first_instruction = 1;
4963 /* Given the current state, recorded by previous calls to this function,
4964 determine whether a group barrier (a stop bit) is necessary before INSN.
4965 Return nonzero if so. */
4967 static int
4968 group_barrier_needed_p (rtx insn)
4970 rtx pat;
4971 int need_barrier = 0;
4972 struct reg_flags flags;
4974 memset (&flags, 0, sizeof (flags));
4975 switch (GET_CODE (insn))
4977 case NOTE:
4978 break;
4980 case BARRIER:
4981 /* A barrier doesn't imply an instruction group boundary. */
4982 break;
4984 case CODE_LABEL:
4985 memset (rws_insn, 0, sizeof (rws_insn));
4986 return 1;
4988 case CALL_INSN:
4989 flags.is_branch = 1;
4990 flags.is_sibcall = SIBLING_CALL_P (insn);
4991 memset (rws_insn, 0, sizeof (rws_insn));
4993 /* Don't bundle a call following another call. */
4994 if ((pat = prev_active_insn (insn))
4995 && GET_CODE (pat) == CALL_INSN)
4997 need_barrier = 1;
4998 break;
5001 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5002 break;
5004 case JUMP_INSN:
5005 flags.is_branch = 1;
5007 /* Don't bundle a jump following a call. */
5008 if ((pat = prev_active_insn (insn))
5009 && GET_CODE (pat) == CALL_INSN)
5011 need_barrier = 1;
5012 break;
5014 /* FALLTHRU */
5016 case INSN:
5017 if (GET_CODE (PATTERN (insn)) == USE
5018 || GET_CODE (PATTERN (insn)) == CLOBBER)
5019 /* Don't care about USE and CLOBBER "insns"---those are used to
5020 indicate to the optimizer that it shouldn't get rid of
5021 certain operations. */
5022 break;
5024 pat = PATTERN (insn);
5026 /* Ug. Hack hacks hacked elsewhere. */
5027 switch (recog_memoized (insn))
5029 /* We play dependency tricks with the epilogue in order
5030 to get proper schedules. Undo this for dv analysis. */
5031 case CODE_FOR_epilogue_deallocate_stack:
5032 case CODE_FOR_prologue_allocate_stack:
5033 pat = XVECEXP (pat, 0, 0);
5034 break;
5036 /* The pattern we use for br.cloop confuses the code above.
5037 The second element of the vector is representative. */
5038 case CODE_FOR_doloop_end_internal:
5039 pat = XVECEXP (pat, 0, 1);
5040 break;
5042 /* Doesn't generate code. */
5043 case CODE_FOR_pred_rel_mutex:
5044 case CODE_FOR_prologue_use:
5045 return 0;
5047 default:
5048 break;
5051 memset (rws_insn, 0, sizeof (rws_insn));
5052 need_barrier = rtx_needs_barrier (pat, flags, 0);
5054 /* Check to see if the previous instruction was a volatile
5055 asm. */
5056 if (! need_barrier)
5057 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5058 break;
5060 default:
5061 abort ();
5064 if (first_instruction && INSN_P (insn)
5065 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5066 && GET_CODE (PATTERN (insn)) != USE
5067 && GET_CODE (PATTERN (insn)) != CLOBBER)
5069 need_barrier = 0;
5070 first_instruction = 0;
5073 return need_barrier;
5076 /* Like group_barrier_needed_p, but do not clobber the current state. */
5078 static int
5079 safe_group_barrier_needed_p (rtx insn)
5081 struct reg_write_state rws_saved[NUM_REGS];
5082 int saved_first_instruction;
5083 int t;
5085 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5086 saved_first_instruction = first_instruction;
5088 t = group_barrier_needed_p (insn);
5090 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5091 first_instruction = saved_first_instruction;
5093 return t;
5096 /* Scan the current function and insert stop bits as necessary to
5097 eliminate dependencies. This function assumes that a final
5098 instruction scheduling pass has been run which has already
5099 inserted most of the necessary stop bits. This function only
5100 inserts new ones at basic block boundaries, since these are
5101 invisible to the scheduler. */
5103 static void
5104 emit_insn_group_barriers (FILE *dump)
5106 rtx insn;
5107 rtx last_label = 0;
5108 int insns_since_last_label = 0;
5110 init_insn_group_barriers ();
5112 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5114 if (GET_CODE (insn) == CODE_LABEL)
5116 if (insns_since_last_label)
5117 last_label = insn;
5118 insns_since_last_label = 0;
5120 else if (GET_CODE (insn) == NOTE
5121 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5123 if (insns_since_last_label)
5124 last_label = insn;
5125 insns_since_last_label = 0;
5127 else if (GET_CODE (insn) == INSN
5128 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5129 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5131 init_insn_group_barriers ();
5132 last_label = 0;
5134 else if (INSN_P (insn))
5136 insns_since_last_label = 1;
5138 if (group_barrier_needed_p (insn))
5140 if (last_label)
5142 if (dump)
5143 fprintf (dump, "Emitting stop before label %d\n",
5144 INSN_UID (last_label));
5145 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5146 insn = last_label;
5148 init_insn_group_barriers ();
5149 last_label = 0;
5156 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5157 This function has to emit all necessary group barriers. */
5159 static void
5160 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
5162 rtx insn;
5164 init_insn_group_barriers ();
5166 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5168 if (GET_CODE (insn) == BARRIER)
5170 rtx last = prev_active_insn (insn);
5172 if (! last)
5173 continue;
5174 if (GET_CODE (last) == JUMP_INSN
5175 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5176 last = prev_active_insn (last);
5177 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5178 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5180 init_insn_group_barriers ();
5182 else if (INSN_P (insn))
5184 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5185 init_insn_group_barriers ();
5186 else if (group_barrier_needed_p (insn))
5188 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5189 init_insn_group_barriers ();
5190 group_barrier_needed_p (insn);
5197 static int errata_find_address_regs (rtx *, void *);
5198 static void errata_emit_nops (rtx);
5199 static void fixup_errata (void);
5201 /* This structure is used to track some details about the previous insns
5202 groups so we can determine if it may be necessary to insert NOPs to
5203 workaround hardware errata. */
5204 static struct group
5206 HARD_REG_SET p_reg_set;
5207 HARD_REG_SET gr_reg_conditionally_set;
5208 } last_group[2];
5210 /* Index into the last_group array. */
5211 static int group_idx;
5213 /* Called through for_each_rtx; determines if a hard register that was
5214 conditionally set in the previous group is used as an address register.
5215 It ensures that for_each_rtx returns 1 in that case. */
5216 static int
5217 errata_find_address_regs (rtx *xp, void *data ATTRIBUTE_UNUSED)
5219 rtx x = *xp;
5220 if (GET_CODE (x) != MEM)
5221 return 0;
5222 x = XEXP (x, 0);
5223 if (GET_CODE (x) == POST_MODIFY)
5224 x = XEXP (x, 0);
5225 if (GET_CODE (x) == REG)
5227 struct group *prev_group = last_group + (group_idx ^ 1);
5228 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5229 REGNO (x)))
5230 return 1;
5231 return -1;
5233 return 0;
5236 /* Called for each insn; this function keeps track of the state in
5237 last_group and emits additional NOPs if necessary to work around
5238 an Itanium A/B step erratum. */
5239 static void
5240 errata_emit_nops (rtx insn)
5242 struct group *this_group = last_group + group_idx;
5243 struct group *prev_group = last_group + (group_idx ^ 1);
5244 rtx pat = PATTERN (insn);
5245 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5246 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5247 enum attr_type type;
5248 rtx set = real_pat;
5250 if (GET_CODE (real_pat) == USE
5251 || GET_CODE (real_pat) == CLOBBER
5252 || GET_CODE (real_pat) == ASM_INPUT
5253 || GET_CODE (real_pat) == ADDR_VEC
5254 || GET_CODE (real_pat) == ADDR_DIFF_VEC
5255 || asm_noperands (PATTERN (insn)) >= 0)
5256 return;
5258 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5259 parts of it. */
5261 if (GET_CODE (set) == PARALLEL)
5263 int i;
5264 set = XVECEXP (real_pat, 0, 0);
5265 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5266 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5267 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5269 set = 0;
5270 break;
5274 if (set && GET_CODE (set) != SET)
5275 set = 0;
5277 type = get_attr_type (insn);
5279 if (type == TYPE_F
5280 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5281 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5283 if ((type == TYPE_M || type == TYPE_A) && cond && set
5284 && REG_P (SET_DEST (set))
5285 && GET_CODE (SET_SRC (set)) != PLUS
5286 && GET_CODE (SET_SRC (set)) != MINUS
5287 && (GET_CODE (SET_SRC (set)) != ASHIFT
5288 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5289 && (GET_CODE (SET_SRC (set)) != MEM
5290 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5291 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5293 if (!COMPARISON_P (cond)
5294 || !REG_P (XEXP (cond, 0)))
5295 abort ();
5297 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5298 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5300 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5302 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5303 emit_insn_before (gen_nop (), insn);
5304 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5305 group_idx = 0;
5306 memset (last_group, 0, sizeof last_group);
5310 /* Emit extra nops if they are required to work around hardware errata. */
5312 static void
5313 fixup_errata (void)
5315 rtx insn;
5317 if (! TARGET_B_STEP)
5318 return;
5320 group_idx = 0;
5321 memset (last_group, 0, sizeof last_group);
5323 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5325 if (!INSN_P (insn))
5326 continue;
5328 if (ia64_safe_type (insn) == TYPE_S)
5330 group_idx ^= 1;
5331 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5333 else
5334 errata_emit_nops (insn);
5339 /* Instruction scheduling support. */
5341 #define NR_BUNDLES 10
5343 /* A list of names of all available bundles. */
5345 static const char *bundle_name [NR_BUNDLES] =
5347 ".mii",
5348 ".mmi",
5349 ".mfi",
5350 ".mmf",
5351 #if NR_BUNDLES == 10
5352 ".bbb",
5353 ".mbb",
5354 #endif
5355 ".mib",
5356 ".mmb",
5357 ".mfb",
5358 ".mlx"
5361 /* Nonzero if we should insert stop bits into the schedule. */
5363 int ia64_final_schedule = 0;
5365 /* Codes of the corresponding quieryied units: */
5367 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
5368 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
5370 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
5371 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
5373 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
5375 /* The following variable value is an insn group barrier. */
5377 static rtx dfa_stop_insn;
5379 /* The following variable value is the last issued insn. */
5381 static rtx last_scheduled_insn;
5383 /* The following variable value is size of the DFA state. */
5385 static size_t dfa_state_size;
5387 /* The following variable value is pointer to a DFA state used as
5388 temporary variable. */
5390 static state_t temp_dfa_state = NULL;
5392 /* The following variable value is DFA state after issuing the last
5393 insn. */
5395 static state_t prev_cycle_state = NULL;
5397 /* The following array element values are TRUE if the corresponding
5398 insn requires to add stop bits before it. */
5400 static char *stops_p;
5402 /* The following variable is used to set up the mentioned above array. */
5404 static int stop_before_p = 0;
5406 /* The following variable value is length of the arrays `clocks' and
5407 `add_cycles'. */
5409 static int clocks_length;
5411 /* The following array element values are cycles on which the
5412 corresponding insn will be issued. The array is used only for
5413 Itanium1. */
5415 static int *clocks;
5417 /* The following array element values are numbers of cycles should be
5418 added to improve insn scheduling for MM_insns for Itanium1. */
5420 static int *add_cycles;
5422 static rtx ia64_single_set (rtx);
5423 static void ia64_emit_insn_before (rtx, rtx);
5425 /* Map a bundle number to its pseudo-op. */
5427 const char *
5428 get_bundle_name (int b)
5430 return bundle_name[b];
5434 /* Return the maximum number of instructions a cpu can issue. */
5436 static int
5437 ia64_issue_rate (void)
5439 return 6;
5442 /* Helper function - like single_set, but look inside COND_EXEC. */
5444 static rtx
5445 ia64_single_set (rtx insn)
5447 rtx x = PATTERN (insn), ret;
5448 if (GET_CODE (x) == COND_EXEC)
5449 x = COND_EXEC_CODE (x);
5450 if (GET_CODE (x) == SET)
5451 return x;
5453 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5454 Although they are not classical single set, the second set is there just
5455 to protect it from moving past FP-relative stack accesses. */
5456 switch (recog_memoized (insn))
5458 case CODE_FOR_prologue_allocate_stack:
5459 case CODE_FOR_epilogue_deallocate_stack:
5460 ret = XVECEXP (x, 0, 0);
5461 break;
5463 default:
5464 ret = single_set_2 (insn, x);
5465 break;
5468 return ret;
5471 /* Adjust the cost of a scheduling dependency. Return the new cost of
5472 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5474 static int
5475 ia64_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
5477 enum attr_itanium_class dep_class;
5478 enum attr_itanium_class insn_class;
5480 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
5481 return cost;
5483 insn_class = ia64_safe_itanium_class (insn);
5484 dep_class = ia64_safe_itanium_class (dep_insn);
5485 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
5486 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
5487 return 0;
5489 return cost;
5492 /* Like emit_insn_before, but skip cycle_display notes.
5493 ??? When cycle display notes are implemented, update this. */
5495 static void
5496 ia64_emit_insn_before (rtx insn, rtx before)
5498 emit_insn_before (insn, before);
5501 /* The following function marks insns who produce addresses for load
5502 and store insns. Such insns will be placed into M slots because it
5503 decrease latency time for Itanium1 (see function
5504 `ia64_produce_address_p' and the DFA descriptions). */
5506 static void
5507 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
5509 rtx insn, link, next, next_tail;
5511 next_tail = NEXT_INSN (tail);
5512 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5513 if (INSN_P (insn))
5514 insn->call = 0;
5515 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5516 if (INSN_P (insn)
5517 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
5519 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
5521 next = XEXP (link, 0);
5522 if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
5523 || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
5524 && ia64_st_address_bypass_p (insn, next))
5525 break;
5526 else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
5527 || ia64_safe_itanium_class (next)
5528 == ITANIUM_CLASS_FLD)
5529 && ia64_ld_address_bypass_p (insn, next))
5530 break;
5532 insn->call = link != 0;
5536 /* We're beginning a new block. Initialize data structures as necessary. */
5538 static void
5539 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
5540 int sched_verbose ATTRIBUTE_UNUSED,
5541 int max_ready ATTRIBUTE_UNUSED)
5543 #ifdef ENABLE_CHECKING
5544 rtx insn;
5546 if (reload_completed)
5547 for (insn = NEXT_INSN (current_sched_info->prev_head);
5548 insn != current_sched_info->next_tail;
5549 insn = NEXT_INSN (insn))
5550 if (SCHED_GROUP_P (insn))
5551 abort ();
5552 #endif
5553 last_scheduled_insn = NULL_RTX;
5554 init_insn_group_barriers ();
5557 /* We are about to being issuing insns for this clock cycle.
5558 Override the default sort algorithm to better slot instructions. */
5560 static int
5561 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
5562 int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
5563 int reorder_type)
5565 int n_asms;
5566 int n_ready = *pn_ready;
5567 rtx *e_ready = ready + n_ready;
5568 rtx *insnp;
5570 if (sched_verbose)
5571 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
5573 if (reorder_type == 0)
5575 /* First, move all USEs, CLOBBERs and other crud out of the way. */
5576 n_asms = 0;
5577 for (insnp = ready; insnp < e_ready; insnp++)
5578 if (insnp < e_ready)
5580 rtx insn = *insnp;
5581 enum attr_type t = ia64_safe_type (insn);
5582 if (t == TYPE_UNKNOWN)
5584 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
5585 || asm_noperands (PATTERN (insn)) >= 0)
5587 rtx lowest = ready[n_asms];
5588 ready[n_asms] = insn;
5589 *insnp = lowest;
5590 n_asms++;
5592 else
5594 rtx highest = ready[n_ready - 1];
5595 ready[n_ready - 1] = insn;
5596 *insnp = highest;
5597 return 1;
5602 if (n_asms < n_ready)
5604 /* Some normal insns to process. Skip the asms. */
5605 ready += n_asms;
5606 n_ready -= n_asms;
5608 else if (n_ready > 0)
5609 return 1;
5612 if (ia64_final_schedule)
5614 int deleted = 0;
5615 int nr_need_stop = 0;
5617 for (insnp = ready; insnp < e_ready; insnp++)
5618 if (safe_group_barrier_needed_p (*insnp))
5619 nr_need_stop++;
5621 if (reorder_type == 1 && n_ready == nr_need_stop)
5622 return 0;
5623 if (reorder_type == 0)
5624 return 1;
5625 insnp = e_ready;
5626 /* Move down everything that needs a stop bit, preserving
5627 relative order. */
5628 while (insnp-- > ready + deleted)
5629 while (insnp >= ready + deleted)
5631 rtx insn = *insnp;
5632 if (! safe_group_barrier_needed_p (insn))
5633 break;
5634 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
5635 *ready = insn;
5636 deleted++;
5638 n_ready -= deleted;
5639 ready += deleted;
5642 return 1;
5645 /* We are about to being issuing insns for this clock cycle. Override
5646 the default sort algorithm to better slot instructions. */
5648 static int
5649 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
5650 int clock_var)
5652 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
5653 pn_ready, clock_var, 0);
5656 /* Like ia64_sched_reorder, but called after issuing each insn.
5657 Override the default sort algorithm to better slot instructions. */
5659 static int
5660 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
5661 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
5662 int *pn_ready, int clock_var)
5664 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
5665 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
5666 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
5667 clock_var, 1);
5670 /* We are about to issue INSN. Return the number of insns left on the
5671 ready queue that can be issued this cycle. */
5673 static int
5674 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
5675 int sched_verbose ATTRIBUTE_UNUSED,
5676 rtx insn ATTRIBUTE_UNUSED,
5677 int can_issue_more ATTRIBUTE_UNUSED)
5679 last_scheduled_insn = insn;
5680 memcpy (prev_cycle_state, curr_state, dfa_state_size);
5681 if (reload_completed)
5683 if (group_barrier_needed_p (insn))
5684 abort ();
5685 if (GET_CODE (insn) == CALL_INSN)
5686 init_insn_group_barriers ();
5687 stops_p [INSN_UID (insn)] = stop_before_p;
5688 stop_before_p = 0;
5690 return 1;
5693 /* We are choosing insn from the ready queue. Return nonzero if INSN
5694 can be chosen. */
5696 static int
5697 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
5699 if (insn == NULL_RTX || !INSN_P (insn))
5700 abort ();
5701 return (!reload_completed
5702 || !safe_group_barrier_needed_p (insn));
5705 /* The following variable value is pseudo-insn used by the DFA insn
5706 scheduler to change the DFA state when the simulated clock is
5707 increased. */
5709 static rtx dfa_pre_cycle_insn;
5711 /* We are about to being issuing INSN. Return nonzero if we cannot
5712 issue it on given cycle CLOCK and return zero if we should not sort
5713 the ready queue on the next clock start. */
5715 static int
5716 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
5717 int clock, int *sort_p)
5719 int setup_clocks_p = FALSE;
5721 if (insn == NULL_RTX || !INSN_P (insn))
5722 abort ();
5723 if ((reload_completed && safe_group_barrier_needed_p (insn))
5724 || (last_scheduled_insn
5725 && (GET_CODE (last_scheduled_insn) == CALL_INSN
5726 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
5727 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
5729 init_insn_group_barriers ();
5730 if (verbose && dump)
5731 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
5732 last_clock == clock ? " + cycle advance" : "");
5733 stop_before_p = 1;
5734 if (last_clock == clock)
5736 state_transition (curr_state, dfa_stop_insn);
5737 if (TARGET_EARLY_STOP_BITS)
5738 *sort_p = (last_scheduled_insn == NULL_RTX
5739 || GET_CODE (last_scheduled_insn) != CALL_INSN);
5740 else
5741 *sort_p = 0;
5742 return 1;
5744 else if (reload_completed)
5745 setup_clocks_p = TRUE;
5746 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
5747 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
5748 state_reset (curr_state);
5749 else
5751 memcpy (curr_state, prev_cycle_state, dfa_state_size);
5752 state_transition (curr_state, dfa_stop_insn);
5753 state_transition (curr_state, dfa_pre_cycle_insn);
5754 state_transition (curr_state, NULL);
5757 else if (reload_completed)
5758 setup_clocks_p = TRUE;
5759 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
5760 && GET_CODE (PATTERN (insn)) != ASM_INPUT
5761 && asm_noperands (PATTERN (insn)) < 0)
5763 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
5765 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
5767 rtx link;
5768 int d = -1;
5770 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
5771 if (REG_NOTE_KIND (link) == 0)
5773 enum attr_itanium_class dep_class;
5774 rtx dep_insn = XEXP (link, 0);
5776 dep_class = ia64_safe_itanium_class (dep_insn);
5777 if ((dep_class == ITANIUM_CLASS_MMMUL
5778 || dep_class == ITANIUM_CLASS_MMSHF)
5779 && last_clock - clocks [INSN_UID (dep_insn)] < 4
5780 && (d < 0
5781 || last_clock - clocks [INSN_UID (dep_insn)] < d))
5782 d = last_clock - clocks [INSN_UID (dep_insn)];
5784 if (d >= 0)
5785 add_cycles [INSN_UID (insn)] = 3 - d;
5788 return 0;
5793 /* The following page contains abstract data `bundle states' which are
5794 used for bundling insns (inserting nops and template generation). */
5796 /* The following describes state of insn bundling. */
5798 struct bundle_state
5800 /* Unique bundle state number to identify them in the debugging
5801 output */
5802 int unique_num;
5803 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
5804 /* number nops before and after the insn */
5805 short before_nops_num, after_nops_num;
5806 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
5807 insn */
5808 int cost; /* cost of the state in cycles */
5809 int accumulated_insns_num; /* number of all previous insns including
5810 nops. L is considered as 2 insns */
5811 int branch_deviation; /* deviation of previous branches from 3rd slots */
5812 struct bundle_state *next; /* next state with the same insn_num */
5813 struct bundle_state *originator; /* originator (previous insn state) */
5814 /* All bundle states are in the following chain. */
5815 struct bundle_state *allocated_states_chain;
5816 /* The DFA State after issuing the insn and the nops. */
5817 state_t dfa_state;
5820 /* The following is map insn number to the corresponding bundle state. */
5822 static struct bundle_state **index_to_bundle_states;
5824 /* The unique number of next bundle state. */
5826 static int bundle_states_num;
5828 /* All allocated bundle states are in the following chain. */
5830 static struct bundle_state *allocated_bundle_states_chain;
5832 /* All allocated but not used bundle states are in the following
5833 chain. */
5835 static struct bundle_state *free_bundle_state_chain;
5838 /* The following function returns a free bundle state. */
5840 static struct bundle_state *
5841 get_free_bundle_state (void)
5843 struct bundle_state *result;
5845 if (free_bundle_state_chain != NULL)
5847 result = free_bundle_state_chain;
5848 free_bundle_state_chain = result->next;
5850 else
5852 result = xmalloc (sizeof (struct bundle_state));
5853 result->dfa_state = xmalloc (dfa_state_size);
5854 result->allocated_states_chain = allocated_bundle_states_chain;
5855 allocated_bundle_states_chain = result;
5857 result->unique_num = bundle_states_num++;
5858 return result;
5862 /* The following function frees given bundle state. */
5864 static void
5865 free_bundle_state (struct bundle_state *state)
5867 state->next = free_bundle_state_chain;
5868 free_bundle_state_chain = state;
5871 /* Start work with abstract data `bundle states'. */
5873 static void
5874 initiate_bundle_states (void)
5876 bundle_states_num = 0;
5877 free_bundle_state_chain = NULL;
5878 allocated_bundle_states_chain = NULL;
5881 /* Finish work with abstract data `bundle states'. */
5883 static void
5884 finish_bundle_states (void)
5886 struct bundle_state *curr_state, *next_state;
5888 for (curr_state = allocated_bundle_states_chain;
5889 curr_state != NULL;
5890 curr_state = next_state)
5892 next_state = curr_state->allocated_states_chain;
5893 free (curr_state->dfa_state);
5894 free (curr_state);
5898 /* Hash table of the bundle states. The key is dfa_state and insn_num
5899 of the bundle states. */
5901 static htab_t bundle_state_table;
5903 /* The function returns hash of BUNDLE_STATE. */
5905 static unsigned
5906 bundle_state_hash (const void *bundle_state)
5908 const struct bundle_state *state = (struct bundle_state *) bundle_state;
5909 unsigned result, i;
5911 for (result = i = 0; i < dfa_state_size; i++)
5912 result += (((unsigned char *) state->dfa_state) [i]
5913 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
5914 return result + state->insn_num;
5917 /* The function returns nonzero if the bundle state keys are equal. */
5919 static int
5920 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
5922 const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
5923 const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
5925 return (state1->insn_num == state2->insn_num
5926 && memcmp (state1->dfa_state, state2->dfa_state,
5927 dfa_state_size) == 0);
5930 /* The function inserts the BUNDLE_STATE into the hash table. The
5931 function returns nonzero if the bundle has been inserted into the
5932 table. The table contains the best bundle state with given key. */
5934 static int
5935 insert_bundle_state (struct bundle_state *bundle_state)
5937 void **entry_ptr;
5939 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
5940 if (*entry_ptr == NULL)
5942 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
5943 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
5944 *entry_ptr = (void *) bundle_state;
5945 return TRUE;
5947 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
5948 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
5949 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
5950 > bundle_state->accumulated_insns_num
5951 || (((struct bundle_state *)
5952 *entry_ptr)->accumulated_insns_num
5953 == bundle_state->accumulated_insns_num
5954 && ((struct bundle_state *)
5955 *entry_ptr)->branch_deviation
5956 > bundle_state->branch_deviation))))
5959 struct bundle_state temp;
5961 temp = *(struct bundle_state *) *entry_ptr;
5962 *(struct bundle_state *) *entry_ptr = *bundle_state;
5963 ((struct bundle_state *) *entry_ptr)->next = temp.next;
5964 *bundle_state = temp;
5966 return FALSE;
5969 /* Start work with the hash table. */
5971 static void
5972 initiate_bundle_state_table (void)
5974 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
5975 (htab_del) 0);
5978 /* Finish work with the hash table. */
5980 static void
5981 finish_bundle_state_table (void)
5983 htab_delete (bundle_state_table);
5988 /* The following variable is a insn `nop' used to check bundle states
5989 with different number of inserted nops. */
5991 static rtx ia64_nop;
5993 /* The following function tries to issue NOPS_NUM nops for the current
5994 state without advancing processor cycle. If it failed, the
5995 function returns FALSE and frees the current state. */
5997 static int
5998 try_issue_nops (struct bundle_state *curr_state, int nops_num)
6000 int i;
6002 for (i = 0; i < nops_num; i++)
6003 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6005 free_bundle_state (curr_state);
6006 return FALSE;
6008 return TRUE;
6011 /* The following function tries to issue INSN for the current
6012 state without advancing processor cycle. If it failed, the
6013 function returns FALSE and frees the current state. */
6015 static int
6016 try_issue_insn (struct bundle_state *curr_state, rtx insn)
6018 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6020 free_bundle_state (curr_state);
6021 return FALSE;
6023 return TRUE;
6026 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6027 starting with ORIGINATOR without advancing processor cycle. If
6028 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6029 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6030 If it was successful, the function creates new bundle state and
6031 insert into the hash table and into `index_to_bundle_states'. */
6033 static void
6034 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
6035 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
6037 struct bundle_state *curr_state;
6039 curr_state = get_free_bundle_state ();
6040 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6041 curr_state->insn = insn;
6042 curr_state->insn_num = originator->insn_num + 1;
6043 curr_state->cost = originator->cost;
6044 curr_state->originator = originator;
6045 curr_state->before_nops_num = before_nops_num;
6046 curr_state->after_nops_num = 0;
6047 curr_state->accumulated_insns_num
6048 = originator->accumulated_insns_num + before_nops_num;
6049 curr_state->branch_deviation = originator->branch_deviation;
6050 if (insn == NULL_RTX)
6051 abort ();
6052 else if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6054 if (GET_MODE (insn) == TImode)
6055 abort ();
6056 if (!try_issue_nops (curr_state, before_nops_num))
6057 return;
6058 if (!try_issue_insn (curr_state, insn))
6059 return;
6060 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6061 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6062 && curr_state->accumulated_insns_num % 3 != 0)
6064 free_bundle_state (curr_state);
6065 return;
6068 else if (GET_MODE (insn) != TImode)
6070 if (!try_issue_nops (curr_state, before_nops_num))
6071 return;
6072 if (!try_issue_insn (curr_state, insn))
6073 return;
6074 curr_state->accumulated_insns_num++;
6075 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6076 || asm_noperands (PATTERN (insn)) >= 0)
6077 abort ();
6078 if (ia64_safe_type (insn) == TYPE_L)
6079 curr_state->accumulated_insns_num++;
6081 else
6083 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6084 state_transition (curr_state->dfa_state, NULL);
6085 curr_state->cost++;
6086 if (!try_issue_nops (curr_state, before_nops_num))
6087 return;
6088 if (!try_issue_insn (curr_state, insn))
6089 return;
6090 curr_state->accumulated_insns_num++;
6091 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6092 || asm_noperands (PATTERN (insn)) >= 0)
6094 /* Finish bundle containing asm insn. */
6095 curr_state->after_nops_num
6096 = 3 - curr_state->accumulated_insns_num % 3;
6097 curr_state->accumulated_insns_num
6098 += 3 - curr_state->accumulated_insns_num % 3;
6100 else if (ia64_safe_type (insn) == TYPE_L)
6101 curr_state->accumulated_insns_num++;
6103 if (ia64_safe_type (insn) == TYPE_B)
6104 curr_state->branch_deviation
6105 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6106 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6108 if (!only_bundle_end_p && insert_bundle_state (curr_state))
6110 state_t dfa_state;
6111 struct bundle_state *curr_state1;
6112 struct bundle_state *allocated_states_chain;
6114 curr_state1 = get_free_bundle_state ();
6115 dfa_state = curr_state1->dfa_state;
6116 allocated_states_chain = curr_state1->allocated_states_chain;
6117 *curr_state1 = *curr_state;
6118 curr_state1->dfa_state = dfa_state;
6119 curr_state1->allocated_states_chain = allocated_states_chain;
6120 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6121 dfa_state_size);
6122 curr_state = curr_state1;
6124 if (!try_issue_nops (curr_state,
6125 3 - curr_state->accumulated_insns_num % 3))
6126 return;
6127 curr_state->after_nops_num
6128 = 3 - curr_state->accumulated_insns_num % 3;
6129 curr_state->accumulated_insns_num
6130 += 3 - curr_state->accumulated_insns_num % 3;
6132 if (!insert_bundle_state (curr_state))
6133 free_bundle_state (curr_state);
6134 return;
6137 /* The following function returns position in the two window bundle
6138 for given STATE. */
6140 static int
6141 get_max_pos (state_t state)
6143 if (cpu_unit_reservation_p (state, pos_6))
6144 return 6;
6145 else if (cpu_unit_reservation_p (state, pos_5))
6146 return 5;
6147 else if (cpu_unit_reservation_p (state, pos_4))
6148 return 4;
6149 else if (cpu_unit_reservation_p (state, pos_3))
6150 return 3;
6151 else if (cpu_unit_reservation_p (state, pos_2))
6152 return 2;
6153 else if (cpu_unit_reservation_p (state, pos_1))
6154 return 1;
6155 else
6156 return 0;
6159 /* The function returns code of a possible template for given position
6160 and state. The function should be called only with 2 values of
6161 position equal to 3 or 6. */
6163 static int
6164 get_template (state_t state, int pos)
6166 switch (pos)
6168 case 3:
6169 if (cpu_unit_reservation_p (state, _0mii_))
6170 return 0;
6171 else if (cpu_unit_reservation_p (state, _0mmi_))
6172 return 1;
6173 else if (cpu_unit_reservation_p (state, _0mfi_))
6174 return 2;
6175 else if (cpu_unit_reservation_p (state, _0mmf_))
6176 return 3;
6177 else if (cpu_unit_reservation_p (state, _0bbb_))
6178 return 4;
6179 else if (cpu_unit_reservation_p (state, _0mbb_))
6180 return 5;
6181 else if (cpu_unit_reservation_p (state, _0mib_))
6182 return 6;
6183 else if (cpu_unit_reservation_p (state, _0mmb_))
6184 return 7;
6185 else if (cpu_unit_reservation_p (state, _0mfb_))
6186 return 8;
6187 else if (cpu_unit_reservation_p (state, _0mlx_))
6188 return 9;
6189 else
6190 abort ();
6191 case 6:
6192 if (cpu_unit_reservation_p (state, _1mii_))
6193 return 0;
6194 else if (cpu_unit_reservation_p (state, _1mmi_))
6195 return 1;
6196 else if (cpu_unit_reservation_p (state, _1mfi_))
6197 return 2;
6198 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6199 return 3;
6200 else if (cpu_unit_reservation_p (state, _1bbb_))
6201 return 4;
6202 else if (cpu_unit_reservation_p (state, _1mbb_))
6203 return 5;
6204 else if (cpu_unit_reservation_p (state, _1mib_))
6205 return 6;
6206 else if (cpu_unit_reservation_p (state, _1mmb_))
6207 return 7;
6208 else if (cpu_unit_reservation_p (state, _1mfb_))
6209 return 8;
6210 else if (cpu_unit_reservation_p (state, _1mlx_))
6211 return 9;
6212 else
6213 abort ();
6214 default:
6215 abort ();
6219 /* The following function returns an insn important for insn bundling
6220 followed by INSN and before TAIL. */
6222 static rtx
6223 get_next_important_insn (rtx insn, rtx tail)
6225 for (; insn && insn != tail; insn = NEXT_INSN (insn))
6226 if (INSN_P (insn)
6227 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6228 && GET_CODE (PATTERN (insn)) != USE
6229 && GET_CODE (PATTERN (insn)) != CLOBBER)
6230 return insn;
6231 return NULL_RTX;
6234 /* The following function does insn bundling. Bundling means
6235 inserting templates and nop insns to fit insn groups into permitted
6236 templates. Instruction scheduling uses NDFA (non-deterministic
6237 finite automata) encoding informations about the templates and the
6238 inserted nops. Nondeterminism of the automata permits follows
6239 all possible insn sequences very fast.
6241 Unfortunately it is not possible to get information about inserting
6242 nop insns and used templates from the automata states. The
6243 automata only says that we can issue an insn possibly inserting
6244 some nops before it and using some template. Therefore insn
6245 bundling in this function is implemented by using DFA
6246 (deterministic finite automata). We follows all possible insn
6247 sequences by inserting 0-2 nops (that is what the NDFA describe for
6248 insn scheduling) before/after each insn being bundled. We know the
6249 start of simulated processor cycle from insn scheduling (insn
6250 starting a new cycle has TImode).
6252 Simple implementation of insn bundling would create enormous
6253 number of possible insn sequences satisfying information about new
6254 cycle ticks taken from the insn scheduling. To make the algorithm
6255 practical we use dynamic programming. Each decision (about
6256 inserting nops and implicitly about previous decisions) is described
6257 by structure bundle_state (see above). If we generate the same
6258 bundle state (key is automaton state after issuing the insns and
6259 nops for it), we reuse already generated one. As consequence we
6260 reject some decisions which cannot improve the solution and
6261 reduce memory for the algorithm.
6263 When we reach the end of EBB (extended basic block), we choose the
6264 best sequence and then, moving back in EBB, insert templates for
6265 the best alternative. The templates are taken from querying
6266 automaton state for each insn in chosen bundle states.
6268 So the algorithm makes two (forward and backward) passes through
6269 EBB. There is an additional forward pass through EBB for Itanium1
6270 processor. This pass inserts more nops to make dependency between
6271 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
6273 static void
6274 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
6276 struct bundle_state *curr_state, *next_state, *best_state;
6277 rtx insn, next_insn;
6278 int insn_num;
6279 int i, bundle_end_p, only_bundle_end_p, asm_p;
6280 int pos = 0, max_pos, template0, template1;
6281 rtx b;
6282 rtx nop;
6283 enum attr_type type;
6285 insn_num = 0;
6286 /* Count insns in the EBB. */
6287 for (insn = NEXT_INSN (prev_head_insn);
6288 insn && insn != tail;
6289 insn = NEXT_INSN (insn))
6290 if (INSN_P (insn))
6291 insn_num++;
6292 if (insn_num == 0)
6293 return;
6294 bundling_p = 1;
6295 dfa_clean_insn_cache ();
6296 initiate_bundle_state_table ();
6297 index_to_bundle_states = xmalloc ((insn_num + 2)
6298 * sizeof (struct bundle_state *));
6299 /* First (forward) pass -- generation of bundle states. */
6300 curr_state = get_free_bundle_state ();
6301 curr_state->insn = NULL;
6302 curr_state->before_nops_num = 0;
6303 curr_state->after_nops_num = 0;
6304 curr_state->insn_num = 0;
6305 curr_state->cost = 0;
6306 curr_state->accumulated_insns_num = 0;
6307 curr_state->branch_deviation = 0;
6308 curr_state->next = NULL;
6309 curr_state->originator = NULL;
6310 state_reset (curr_state->dfa_state);
6311 index_to_bundle_states [0] = curr_state;
6312 insn_num = 0;
6313 /* Shift cycle mark if it is put on insn which could be ignored. */
6314 for (insn = NEXT_INSN (prev_head_insn);
6315 insn != tail;
6316 insn = NEXT_INSN (insn))
6317 if (INSN_P (insn)
6318 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6319 || GET_CODE (PATTERN (insn)) == USE
6320 || GET_CODE (PATTERN (insn)) == CLOBBER)
6321 && GET_MODE (insn) == TImode)
6323 PUT_MODE (insn, VOIDmode);
6324 for (next_insn = NEXT_INSN (insn);
6325 next_insn != tail;
6326 next_insn = NEXT_INSN (next_insn))
6327 if (INSN_P (next_insn)
6328 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
6329 && GET_CODE (PATTERN (next_insn)) != USE
6330 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
6332 PUT_MODE (next_insn, TImode);
6333 break;
6336 /* Froward pass: generation of bundle states. */
6337 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6338 insn != NULL_RTX;
6339 insn = next_insn)
6341 if (!INSN_P (insn)
6342 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6343 || GET_CODE (PATTERN (insn)) == USE
6344 || GET_CODE (PATTERN (insn)) == CLOBBER)
6345 abort ();
6346 type = ia64_safe_type (insn);
6347 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6348 insn_num++;
6349 index_to_bundle_states [insn_num] = NULL;
6350 for (curr_state = index_to_bundle_states [insn_num - 1];
6351 curr_state != NULL;
6352 curr_state = next_state)
6354 pos = curr_state->accumulated_insns_num % 3;
6355 next_state = curr_state->next;
6356 /* We must fill up the current bundle in order to start a
6357 subsequent asm insn in a new bundle. Asm insn is always
6358 placed in a separate bundle. */
6359 only_bundle_end_p
6360 = (next_insn != NULL_RTX
6361 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
6362 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
6363 /* We may fill up the current bundle if it is the cycle end
6364 without a group barrier. */
6365 bundle_end_p
6366 = (only_bundle_end_p || next_insn == NULL_RTX
6367 || (GET_MODE (next_insn) == TImode
6368 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
6369 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
6370 || type == TYPE_S
6371 /* We need to insert 2 nops for cases like M_MII. To
6372 guarantee issuing all insns on the same cycle for
6373 Itanium 1, we need to issue 2 nops after the first M
6374 insn (MnnMII where n is a nop insn). */
6375 || ((type == TYPE_M || type == TYPE_A)
6376 && ia64_tune == PROCESSOR_ITANIUM
6377 && !bundle_end_p && pos == 1))
6378 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
6379 only_bundle_end_p);
6380 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
6381 only_bundle_end_p);
6382 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
6383 only_bundle_end_p);
6385 if (index_to_bundle_states [insn_num] == NULL)
6386 abort ();
6387 for (curr_state = index_to_bundle_states [insn_num];
6388 curr_state != NULL;
6389 curr_state = curr_state->next)
6390 if (verbose >= 2 && dump)
6392 /* This structure is taken from generated code of the
6393 pipeline hazard recognizer (see file insn-attrtab.c).
6394 Please don't forget to change the structure if a new
6395 automaton is added to .md file. */
6396 struct DFA_chip
6398 unsigned short one_automaton_state;
6399 unsigned short oneb_automaton_state;
6400 unsigned short two_automaton_state;
6401 unsigned short twob_automaton_state;
6404 fprintf
6405 (dump,
6406 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6407 curr_state->unique_num,
6408 (curr_state->originator == NULL
6409 ? -1 : curr_state->originator->unique_num),
6410 curr_state->cost,
6411 curr_state->before_nops_num, curr_state->after_nops_num,
6412 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6413 (ia64_tune == PROCESSOR_ITANIUM
6414 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6415 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6416 INSN_UID (insn));
6419 if (index_to_bundle_states [insn_num] == NULL)
6420 /* We should find a solution because the 2nd insn scheduling has
6421 found one. */
6422 abort ();
6423 /* Find a state corresponding to the best insn sequence. */
6424 best_state = NULL;
6425 for (curr_state = index_to_bundle_states [insn_num];
6426 curr_state != NULL;
6427 curr_state = curr_state->next)
6428 /* We are just looking at the states with fully filled up last
6429 bundle. The first we prefer insn sequences with minimal cost
6430 then with minimal inserted nops and finally with branch insns
6431 placed in the 3rd slots. */
6432 if (curr_state->accumulated_insns_num % 3 == 0
6433 && (best_state == NULL || best_state->cost > curr_state->cost
6434 || (best_state->cost == curr_state->cost
6435 && (curr_state->accumulated_insns_num
6436 < best_state->accumulated_insns_num
6437 || (curr_state->accumulated_insns_num
6438 == best_state->accumulated_insns_num
6439 && curr_state->branch_deviation
6440 < best_state->branch_deviation)))))
6441 best_state = curr_state;
6442 /* Second (backward) pass: adding nops and templates. */
6443 insn_num = best_state->before_nops_num;
6444 template0 = template1 = -1;
6445 for (curr_state = best_state;
6446 curr_state->originator != NULL;
6447 curr_state = curr_state->originator)
6449 insn = curr_state->insn;
6450 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
6451 || asm_noperands (PATTERN (insn)) >= 0);
6452 insn_num++;
6453 if (verbose >= 2 && dump)
6455 struct DFA_chip
6457 unsigned short one_automaton_state;
6458 unsigned short oneb_automaton_state;
6459 unsigned short two_automaton_state;
6460 unsigned short twob_automaton_state;
6463 fprintf
6464 (dump,
6465 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6466 curr_state->unique_num,
6467 (curr_state->originator == NULL
6468 ? -1 : curr_state->originator->unique_num),
6469 curr_state->cost,
6470 curr_state->before_nops_num, curr_state->after_nops_num,
6471 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6472 (ia64_tune == PROCESSOR_ITANIUM
6473 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6474 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6475 INSN_UID (insn));
6477 /* Find the position in the current bundle window. The window can
6478 contain at most two bundles. Two bundle window means that
6479 the processor will make two bundle rotation. */
6480 max_pos = get_max_pos (curr_state->dfa_state);
6481 if (max_pos == 6
6482 /* The following (negative template number) means that the
6483 processor did one bundle rotation. */
6484 || (max_pos == 3 && template0 < 0))
6486 /* We are at the end of the window -- find template(s) for
6487 its bundle(s). */
6488 pos = max_pos;
6489 if (max_pos == 3)
6490 template0 = get_template (curr_state->dfa_state, 3);
6491 else
6493 template1 = get_template (curr_state->dfa_state, 3);
6494 template0 = get_template (curr_state->dfa_state, 6);
6497 if (max_pos > 3 && template1 < 0)
6498 /* It may happen when we have the stop inside a bundle. */
6500 if (pos > 3)
6501 abort ();
6502 template1 = get_template (curr_state->dfa_state, 3);
6503 pos += 3;
6505 if (!asm_p)
6506 /* Emit nops after the current insn. */
6507 for (i = 0; i < curr_state->after_nops_num; i++)
6509 nop = gen_nop ();
6510 emit_insn_after (nop, insn);
6511 pos--;
6512 if (pos < 0)
6513 abort ();
6514 if (pos % 3 == 0)
6516 /* We are at the start of a bundle: emit the template
6517 (it should be defined). */
6518 if (template0 < 0)
6519 abort ();
6520 b = gen_bundle_selector (GEN_INT (template0));
6521 ia64_emit_insn_before (b, nop);
6522 /* If we have two bundle window, we make one bundle
6523 rotation. Otherwise template0 will be undefined
6524 (negative value). */
6525 template0 = template1;
6526 template1 = -1;
6529 /* Move the position backward in the window. Group barrier has
6530 no slot. Asm insn takes all bundle. */
6531 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
6532 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6533 && asm_noperands (PATTERN (insn)) < 0)
6534 pos--;
6535 /* Long insn takes 2 slots. */
6536 if (ia64_safe_type (insn) == TYPE_L)
6537 pos--;
6538 if (pos < 0)
6539 abort ();
6540 if (pos % 3 == 0
6541 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
6542 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6543 && asm_noperands (PATTERN (insn)) < 0)
6545 /* The current insn is at the bundle start: emit the
6546 template. */
6547 if (template0 < 0)
6548 abort ();
6549 b = gen_bundle_selector (GEN_INT (template0));
6550 ia64_emit_insn_before (b, insn);
6551 b = PREV_INSN (insn);
6552 insn = b;
6553 /* See comment above in analogous place for emitting nops
6554 after the insn. */
6555 template0 = template1;
6556 template1 = -1;
6558 /* Emit nops after the current insn. */
6559 for (i = 0; i < curr_state->before_nops_num; i++)
6561 nop = gen_nop ();
6562 ia64_emit_insn_before (nop, insn);
6563 nop = PREV_INSN (insn);
6564 insn = nop;
6565 pos--;
6566 if (pos < 0)
6567 abort ();
6568 if (pos % 3 == 0)
6570 /* See comment above in analogous place for emitting nops
6571 after the insn. */
6572 if (template0 < 0)
6573 abort ();
6574 b = gen_bundle_selector (GEN_INT (template0));
6575 ia64_emit_insn_before (b, insn);
6576 b = PREV_INSN (insn);
6577 insn = b;
6578 template0 = template1;
6579 template1 = -1;
6583 if (ia64_tune == PROCESSOR_ITANIUM)
6584 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
6585 Itanium1 has a strange design, if the distance between an insn
6586 and dependent MM-insn is less 4 then we have a 6 additional
6587 cycles stall. So we make the distance equal to 4 cycles if it
6588 is less. */
6589 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6590 insn != NULL_RTX;
6591 insn = next_insn)
6593 if (!INSN_P (insn)
6594 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6595 || GET_CODE (PATTERN (insn)) == USE
6596 || GET_CODE (PATTERN (insn)) == CLOBBER)
6597 abort ();
6598 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6599 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
6600 /* We found a MM-insn which needs additional cycles. */
6602 rtx last;
6603 int i, j, n;
6604 int pred_stop_p;
6606 /* Now we are searching for a template of the bundle in
6607 which the MM-insn is placed and the position of the
6608 insn in the bundle (0, 1, 2). Also we are searching
6609 for that there is a stop before the insn. */
6610 last = prev_active_insn (insn);
6611 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
6612 if (pred_stop_p)
6613 last = prev_active_insn (last);
6614 n = 0;
6615 for (;; last = prev_active_insn (last))
6616 if (recog_memoized (last) == CODE_FOR_bundle_selector)
6618 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
6619 if (template0 == 9)
6620 /* The insn is in MLX bundle. Change the template
6621 onto MFI because we will add nops before the
6622 insn. It simplifies subsequent code a lot. */
6623 PATTERN (last)
6624 = gen_bundle_selector (const2_rtx); /* -> MFI */
6625 break;
6627 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
6628 && (ia64_safe_itanium_class (last)
6629 != ITANIUM_CLASS_IGNORE))
6630 n++;
6631 /* Some check of correctness: the stop is not at the
6632 bundle start, there are no more 3 insns in the bundle,
6633 and the MM-insn is not at the start of bundle with
6634 template MLX. */
6635 if ((pred_stop_p && n == 0) || n > 2
6636 || (template0 == 9 && n != 0))
6637 abort ();
6638 /* Put nops after the insn in the bundle. */
6639 for (j = 3 - n; j > 0; j --)
6640 ia64_emit_insn_before (gen_nop (), insn);
6641 /* It takes into account that we will add more N nops
6642 before the insn lately -- please see code below. */
6643 add_cycles [INSN_UID (insn)]--;
6644 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
6645 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6646 insn);
6647 if (pred_stop_p)
6648 add_cycles [INSN_UID (insn)]--;
6649 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
6651 /* Insert "MII;" template. */
6652 ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
6653 insn);
6654 ia64_emit_insn_before (gen_nop (), insn);
6655 ia64_emit_insn_before (gen_nop (), insn);
6656 if (i > 1)
6658 /* To decrease code size, we use "MI;I;"
6659 template. */
6660 ia64_emit_insn_before
6661 (gen_insn_group_barrier (GEN_INT (3)), insn);
6662 i--;
6664 ia64_emit_insn_before (gen_nop (), insn);
6665 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6666 insn);
6668 /* Put the MM-insn in the same slot of a bundle with the
6669 same template as the original one. */
6670 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
6671 insn);
6672 /* To put the insn in the same slot, add necessary number
6673 of nops. */
6674 for (j = n; j > 0; j --)
6675 ia64_emit_insn_before (gen_nop (), insn);
6676 /* Put the stop if the original bundle had it. */
6677 if (pred_stop_p)
6678 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6679 insn);
6682 free (index_to_bundle_states);
6683 finish_bundle_state_table ();
6684 bundling_p = 0;
6685 dfa_clean_insn_cache ();
6688 /* The following function is called at the end of scheduling BB or
6689 EBB. After reload, it inserts stop bits and does insn bundling. */
6691 static void
6692 ia64_sched_finish (FILE *dump, int sched_verbose)
6694 if (sched_verbose)
6695 fprintf (dump, "// Finishing schedule.\n");
6696 if (!reload_completed)
6697 return;
6698 if (reload_completed)
6700 final_emit_insn_group_barriers (dump);
6701 bundling (dump, sched_verbose, current_sched_info->prev_head,
6702 current_sched_info->next_tail);
6703 if (sched_verbose && dump)
6704 fprintf (dump, "// finishing %d-%d\n",
6705 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
6706 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
6708 return;
6712 /* The following function inserts stop bits in scheduled BB or EBB. */
6714 static void
6715 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6717 rtx insn;
6718 int need_barrier_p = 0;
6719 rtx prev_insn = NULL_RTX;
6721 init_insn_group_barriers ();
6723 for (insn = NEXT_INSN (current_sched_info->prev_head);
6724 insn != current_sched_info->next_tail;
6725 insn = NEXT_INSN (insn))
6727 if (GET_CODE (insn) == BARRIER)
6729 rtx last = prev_active_insn (insn);
6731 if (! last)
6732 continue;
6733 if (GET_CODE (last) == JUMP_INSN
6734 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6735 last = prev_active_insn (last);
6736 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6737 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6739 init_insn_group_barriers ();
6740 need_barrier_p = 0;
6741 prev_insn = NULL_RTX;
6743 else if (INSN_P (insn))
6745 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6747 init_insn_group_barriers ();
6748 need_barrier_p = 0;
6749 prev_insn = NULL_RTX;
6751 else if (need_barrier_p || group_barrier_needed_p (insn))
6753 if (TARGET_EARLY_STOP_BITS)
6755 rtx last;
6757 for (last = insn;
6758 last != current_sched_info->prev_head;
6759 last = PREV_INSN (last))
6760 if (INSN_P (last) && GET_MODE (last) == TImode
6761 && stops_p [INSN_UID (last)])
6762 break;
6763 if (last == current_sched_info->prev_head)
6764 last = insn;
6765 last = prev_active_insn (last);
6766 if (last
6767 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
6768 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
6769 last);
6770 init_insn_group_barriers ();
6771 for (last = NEXT_INSN (last);
6772 last != insn;
6773 last = NEXT_INSN (last))
6774 if (INSN_P (last))
6775 group_barrier_needed_p (last);
6777 else
6779 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6780 insn);
6781 init_insn_group_barriers ();
6783 group_barrier_needed_p (insn);
6784 prev_insn = NULL_RTX;
6786 else if (recog_memoized (insn) >= 0)
6787 prev_insn = insn;
6788 need_barrier_p = (GET_CODE (insn) == CALL_INSN
6789 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6790 || asm_noperands (PATTERN (insn)) >= 0);
6797 /* If the following function returns TRUE, we will use the the DFA
6798 insn scheduler. */
6800 static int
6801 ia64_first_cycle_multipass_dfa_lookahead (void)
6803 return (reload_completed ? 6 : 4);
6806 /* The following function initiates variable `dfa_pre_cycle_insn'. */
6808 static void
6809 ia64_init_dfa_pre_cycle_insn (void)
6811 if (temp_dfa_state == NULL)
6813 dfa_state_size = state_size ();
6814 temp_dfa_state = xmalloc (dfa_state_size);
6815 prev_cycle_state = xmalloc (dfa_state_size);
6817 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
6818 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
6819 recog_memoized (dfa_pre_cycle_insn);
6820 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
6821 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
6822 recog_memoized (dfa_stop_insn);
6825 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
6826 used by the DFA insn scheduler. */
6828 static rtx
6829 ia64_dfa_pre_cycle_insn (void)
6831 return dfa_pre_cycle_insn;
6834 /* The following function returns TRUE if PRODUCER (of type ilog or
6835 ld) produces address for CONSUMER (of type st or stf). */
6838 ia64_st_address_bypass_p (rtx producer, rtx consumer)
6840 rtx dest, reg, mem;
6842 if (producer == NULL_RTX || consumer == NULL_RTX)
6843 abort ();
6844 dest = ia64_single_set (producer);
6845 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
6846 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
6847 abort ();
6848 if (GET_CODE (reg) == SUBREG)
6849 reg = SUBREG_REG (reg);
6850 dest = ia64_single_set (consumer);
6851 if (dest == NULL_RTX || (mem = SET_DEST (dest)) == NULL_RTX
6852 || GET_CODE (mem) != MEM)
6853 abort ();
6854 return reg_mentioned_p (reg, mem);
6857 /* The following function returns TRUE if PRODUCER (of type ilog or
6858 ld) produces address for CONSUMER (of type ld or fld). */
6861 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
6863 rtx dest, src, reg, mem;
6865 if (producer == NULL_RTX || consumer == NULL_RTX)
6866 abort ();
6867 dest = ia64_single_set (producer);
6868 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
6869 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
6870 abort ();
6871 if (GET_CODE (reg) == SUBREG)
6872 reg = SUBREG_REG (reg);
6873 src = ia64_single_set (consumer);
6874 if (src == NULL_RTX || (mem = SET_SRC (src)) == NULL_RTX)
6875 abort ();
6876 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
6877 mem = XVECEXP (mem, 0, 0);
6878 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
6879 mem = XEXP (mem, 0);
6881 /* Note that LO_SUM is used for GOT loads. */
6882 if (GET_CODE (mem) != LO_SUM && GET_CODE (mem) != MEM)
6883 abort ();
6885 return reg_mentioned_p (reg, mem);
6888 /* The following function returns TRUE if INSN produces address for a
6889 load/store insn. We will place such insns into M slot because it
6890 decreases its latency time. */
6893 ia64_produce_address_p (rtx insn)
6895 return insn->call;
6899 /* Emit pseudo-ops for the assembler to describe predicate relations.
6900 At present this assumes that we only consider predicate pairs to
6901 be mutex, and that the assembler can deduce proper values from
6902 straight-line code. */
6904 static void
6905 emit_predicate_relation_info (void)
6907 basic_block bb;
6909 FOR_EACH_BB_REVERSE (bb)
6911 int r;
6912 rtx head = BB_HEAD (bb);
6914 /* We only need such notes at code labels. */
6915 if (GET_CODE (head) != CODE_LABEL)
6916 continue;
6917 if (GET_CODE (NEXT_INSN (head)) == NOTE
6918 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6919 head = NEXT_INSN (head);
6921 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6922 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6924 rtx p = gen_rtx_REG (BImode, r);
6925 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6926 if (head == BB_END (bb))
6927 BB_END (bb) = n;
6928 head = n;
6932 /* Look for conditional calls that do not return, and protect predicate
6933 relations around them. Otherwise the assembler will assume the call
6934 returns, and complain about uses of call-clobbered predicates after
6935 the call. */
6936 FOR_EACH_BB_REVERSE (bb)
6938 rtx insn = BB_HEAD (bb);
6940 while (1)
6942 if (GET_CODE (insn) == CALL_INSN
6943 && GET_CODE (PATTERN (insn)) == COND_EXEC
6944 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6946 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6947 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6948 if (BB_HEAD (bb) == insn)
6949 BB_HEAD (bb) = b;
6950 if (BB_END (bb) == insn)
6951 BB_END (bb) = a;
6954 if (insn == BB_END (bb))
6955 break;
6956 insn = NEXT_INSN (insn);
6961 /* Perform machine dependent operations on the rtl chain INSNS. */
6963 static void
6964 ia64_reorg (void)
6966 /* We are freeing block_for_insn in the toplev to keep compatibility
6967 with old MDEP_REORGS that are not CFG based. Recompute it now. */
6968 compute_bb_for_insn ();
6970 /* If optimizing, we'll have split before scheduling. */
6971 if (optimize == 0)
6972 split_all_insns (0);
6974 /* ??? update_life_info_in_dirty_blocks fails to terminate during
6975 non-optimizing bootstrap. */
6976 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
6978 if (ia64_flag_schedule_insns2)
6980 timevar_push (TV_SCHED2);
6981 ia64_final_schedule = 1;
6983 initiate_bundle_states ();
6984 ia64_nop = make_insn_raw (gen_nop ());
6985 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
6986 recog_memoized (ia64_nop);
6987 clocks_length = get_max_uid () + 1;
6988 stops_p = xcalloc (1, clocks_length);
6989 if (ia64_tune == PROCESSOR_ITANIUM)
6991 clocks = xcalloc (clocks_length, sizeof (int));
6992 add_cycles = xcalloc (clocks_length, sizeof (int));
6994 if (ia64_tune == PROCESSOR_ITANIUM2)
6996 pos_1 = get_cpu_unit_code ("2_1");
6997 pos_2 = get_cpu_unit_code ("2_2");
6998 pos_3 = get_cpu_unit_code ("2_3");
6999 pos_4 = get_cpu_unit_code ("2_4");
7000 pos_5 = get_cpu_unit_code ("2_5");
7001 pos_6 = get_cpu_unit_code ("2_6");
7002 _0mii_ = get_cpu_unit_code ("2b_0mii.");
7003 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7004 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7005 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7006 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7007 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7008 _0mib_ = get_cpu_unit_code ("2b_0mib.");
7009 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7010 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7011 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7012 _1mii_ = get_cpu_unit_code ("2b_1mii.");
7013 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7014 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7015 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7016 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7017 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7018 _1mib_ = get_cpu_unit_code ("2b_1mib.");
7019 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7020 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7021 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7023 else
7025 pos_1 = get_cpu_unit_code ("1_1");
7026 pos_2 = get_cpu_unit_code ("1_2");
7027 pos_3 = get_cpu_unit_code ("1_3");
7028 pos_4 = get_cpu_unit_code ("1_4");
7029 pos_5 = get_cpu_unit_code ("1_5");
7030 pos_6 = get_cpu_unit_code ("1_6");
7031 _0mii_ = get_cpu_unit_code ("1b_0mii.");
7032 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7033 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7034 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7035 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7036 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7037 _0mib_ = get_cpu_unit_code ("1b_0mib.");
7038 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7039 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7040 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7041 _1mii_ = get_cpu_unit_code ("1b_1mii.");
7042 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7043 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7044 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7045 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7046 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7047 _1mib_ = get_cpu_unit_code ("1b_1mib.");
7048 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7049 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7050 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7052 schedule_ebbs (dump_file);
7053 finish_bundle_states ();
7054 if (ia64_tune == PROCESSOR_ITANIUM)
7056 free (add_cycles);
7057 free (clocks);
7059 free (stops_p);
7060 emit_insn_group_barriers (dump_file);
7062 ia64_final_schedule = 0;
7063 timevar_pop (TV_SCHED2);
7065 else
7066 emit_all_insn_group_barriers (dump_file);
7068 /* A call must not be the last instruction in a function, so that the
7069 return address is still within the function, so that unwinding works
7070 properly. Note that IA-64 differs from dwarf2 on this point. */
7071 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7073 rtx insn;
7074 int saw_stop = 0;
7076 insn = get_last_insn ();
7077 if (! INSN_P (insn))
7078 insn = prev_active_insn (insn);
7079 /* Skip over insns that expand to nothing. */
7080 while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
7082 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7083 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7084 saw_stop = 1;
7085 insn = prev_active_insn (insn);
7087 if (GET_CODE (insn) == CALL_INSN)
7089 if (! saw_stop)
7090 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7091 emit_insn (gen_break_f ());
7092 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7096 fixup_errata ();
7097 emit_predicate_relation_info ();
7099 if (ia64_flag_var_tracking)
7101 timevar_push (TV_VAR_TRACKING);
7102 variable_tracking_main ();
7103 timevar_pop (TV_VAR_TRACKING);
7107 /* Return true if REGNO is used by the epilogue. */
7110 ia64_epilogue_uses (int regno)
7112 switch (regno)
7114 case R_GR (1):
7115 /* With a call to a function in another module, we will write a new
7116 value to "gp". After returning from such a call, we need to make
7117 sure the function restores the original gp-value, even if the
7118 function itself does not use the gp anymore. */
7119 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7121 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7122 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7123 /* For functions defined with the syscall_linkage attribute, all
7124 input registers are marked as live at all function exits. This
7125 prevents the register allocator from using the input registers,
7126 which in turn makes it possible to restart a system call after
7127 an interrupt without having to save/restore the input registers.
7128 This also prevents kernel data from leaking to application code. */
7129 return lookup_attribute ("syscall_linkage",
7130 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7132 case R_BR (0):
7133 /* Conditional return patterns can't represent the use of `b0' as
7134 the return address, so we force the value live this way. */
7135 return 1;
7137 case AR_PFS_REGNUM:
7138 /* Likewise for ar.pfs, which is used by br.ret. */
7139 return 1;
7141 default:
7142 return 0;
7146 /* Return true if REGNO is used by the frame unwinder. */
7149 ia64_eh_uses (int regno)
7151 if (! reload_completed)
7152 return 0;
7154 if (current_frame_info.reg_save_b0
7155 && regno == current_frame_info.reg_save_b0)
7156 return 1;
7157 if (current_frame_info.reg_save_pr
7158 && regno == current_frame_info.reg_save_pr)
7159 return 1;
7160 if (current_frame_info.reg_save_ar_pfs
7161 && regno == current_frame_info.reg_save_ar_pfs)
7162 return 1;
7163 if (current_frame_info.reg_save_ar_unat
7164 && regno == current_frame_info.reg_save_ar_unat)
7165 return 1;
7166 if (current_frame_info.reg_save_ar_lc
7167 && regno == current_frame_info.reg_save_ar_lc)
7168 return 1;
7170 return 0;
7173 /* Return true if this goes in small data/bss. */
7175 /* ??? We could also support own long data here. Generating movl/add/ld8
7176 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7177 code faster because there is one less load. This also includes incomplete
7178 types which can't go in sdata/sbss. */
7180 static bool
7181 ia64_in_small_data_p (tree exp)
7183 if (TARGET_NO_SDATA)
7184 return false;
7186 /* We want to merge strings, so we never consider them small data. */
7187 if (TREE_CODE (exp) == STRING_CST)
7188 return false;
7190 /* Functions are never small data. */
7191 if (TREE_CODE (exp) == FUNCTION_DECL)
7192 return false;
7194 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7196 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7197 if (strcmp (section, ".sdata") == 0
7198 || strcmp (section, ".sbss") == 0)
7199 return true;
7201 else
7203 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7205 /* If this is an incomplete type with size 0, then we can't put it
7206 in sdata because it might be too big when completed. */
7207 if (size > 0 && size <= ia64_section_threshold)
7208 return true;
7211 return false;
7214 /* Output assembly directives for prologue regions. */
7216 /* The current basic block number. */
7218 static bool last_block;
7220 /* True if we need a copy_state command at the start of the next block. */
7222 static bool need_copy_state;
7224 /* The function emits unwind directives for the start of an epilogue. */
7226 static void
7227 process_epilogue (void)
7229 /* If this isn't the last block of the function, then we need to label the
7230 current state, and copy it back in at the start of the next block. */
7232 if (!last_block)
7234 fprintf (asm_out_file, "\t.label_state 1\n");
7235 need_copy_state = true;
7238 fprintf (asm_out_file, "\t.restore sp\n");
7241 /* This function processes a SET pattern looking for specific patterns
7242 which result in emitting an assembly directive required for unwinding. */
7244 static int
7245 process_set (FILE *asm_out_file, rtx pat)
7247 rtx src = SET_SRC (pat);
7248 rtx dest = SET_DEST (pat);
7249 int src_regno, dest_regno;
7251 /* Look for the ALLOC insn. */
7252 if (GET_CODE (src) == UNSPEC_VOLATILE
7253 && XINT (src, 1) == UNSPECV_ALLOC
7254 && GET_CODE (dest) == REG)
7256 dest_regno = REGNO (dest);
7258 /* If this isn't the final destination for ar.pfs, the alloc
7259 shouldn't have been marked frame related. */
7260 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7261 abort ();
7263 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7264 ia64_dbx_register_number (dest_regno));
7265 return 1;
7268 /* Look for SP = .... */
7269 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7271 if (GET_CODE (src) == PLUS)
7273 rtx op0 = XEXP (src, 0);
7274 rtx op1 = XEXP (src, 1);
7275 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7277 if (INTVAL (op1) < 0)
7278 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
7279 -INTVAL (op1));
7280 else
7281 process_epilogue ();
7283 else
7284 abort ();
7286 else if (GET_CODE (src) == REG
7287 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7288 process_epilogue ();
7289 else
7290 abort ();
7292 return 1;
7295 /* Register move we need to look at. */
7296 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7298 src_regno = REGNO (src);
7299 dest_regno = REGNO (dest);
7301 switch (src_regno)
7303 case BR_REG (0):
7304 /* Saving return address pointer. */
7305 if (dest_regno != current_frame_info.reg_save_b0)
7306 abort ();
7307 fprintf (asm_out_file, "\t.save rp, r%d\n",
7308 ia64_dbx_register_number (dest_regno));
7309 return 1;
7311 case PR_REG (0):
7312 if (dest_regno != current_frame_info.reg_save_pr)
7313 abort ();
7314 fprintf (asm_out_file, "\t.save pr, r%d\n",
7315 ia64_dbx_register_number (dest_regno));
7316 return 1;
7318 case AR_UNAT_REGNUM:
7319 if (dest_regno != current_frame_info.reg_save_ar_unat)
7320 abort ();
7321 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7322 ia64_dbx_register_number (dest_regno));
7323 return 1;
7325 case AR_LC_REGNUM:
7326 if (dest_regno != current_frame_info.reg_save_ar_lc)
7327 abort ();
7328 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7329 ia64_dbx_register_number (dest_regno));
7330 return 1;
7332 case STACK_POINTER_REGNUM:
7333 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7334 || ! frame_pointer_needed)
7335 abort ();
7336 fprintf (asm_out_file, "\t.vframe r%d\n",
7337 ia64_dbx_register_number (dest_regno));
7338 return 1;
7340 default:
7341 /* Everything else should indicate being stored to memory. */
7342 abort ();
7346 /* Memory store we need to look at. */
7347 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7349 long off;
7350 rtx base;
7351 const char *saveop;
7353 if (GET_CODE (XEXP (dest, 0)) == REG)
7355 base = XEXP (dest, 0);
7356 off = 0;
7358 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7359 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7361 base = XEXP (XEXP (dest, 0), 0);
7362 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7364 else
7365 abort ();
7367 if (base == hard_frame_pointer_rtx)
7369 saveop = ".savepsp";
7370 off = - off;
7372 else if (base == stack_pointer_rtx)
7373 saveop = ".savesp";
7374 else
7375 abort ();
7377 src_regno = REGNO (src);
7378 switch (src_regno)
7380 case BR_REG (0):
7381 if (current_frame_info.reg_save_b0 != 0)
7382 abort ();
7383 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7384 return 1;
7386 case PR_REG (0):
7387 if (current_frame_info.reg_save_pr != 0)
7388 abort ();
7389 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7390 return 1;
7392 case AR_LC_REGNUM:
7393 if (current_frame_info.reg_save_ar_lc != 0)
7394 abort ();
7395 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7396 return 1;
7398 case AR_PFS_REGNUM:
7399 if (current_frame_info.reg_save_ar_pfs != 0)
7400 abort ();
7401 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7402 return 1;
7404 case AR_UNAT_REGNUM:
7405 if (current_frame_info.reg_save_ar_unat != 0)
7406 abort ();
7407 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7408 return 1;
7410 case GR_REG (4):
7411 case GR_REG (5):
7412 case GR_REG (6):
7413 case GR_REG (7):
7414 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7415 1 << (src_regno - GR_REG (4)));
7416 return 1;
7418 case BR_REG (1):
7419 case BR_REG (2):
7420 case BR_REG (3):
7421 case BR_REG (4):
7422 case BR_REG (5):
7423 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7424 1 << (src_regno - BR_REG (1)));
7425 return 1;
7427 case FR_REG (2):
7428 case FR_REG (3):
7429 case FR_REG (4):
7430 case FR_REG (5):
7431 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7432 1 << (src_regno - FR_REG (2)));
7433 return 1;
7435 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7436 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7437 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7438 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7439 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7440 1 << (src_regno - FR_REG (12)));
7441 return 1;
7443 default:
7444 return 0;
7448 return 0;
7452 /* This function looks at a single insn and emits any directives
7453 required to unwind this insn. */
7454 void
7455 process_for_unwind_directive (FILE *asm_out_file, rtx insn)
7457 if (flag_unwind_tables
7458 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7460 rtx pat;
7462 if (GET_CODE (insn) == NOTE
7463 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7465 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7467 /* Restore unwind state from immediately before the epilogue. */
7468 if (need_copy_state)
7470 fprintf (asm_out_file, "\t.body\n");
7471 fprintf (asm_out_file, "\t.copy_state 1\n");
7472 need_copy_state = false;
7476 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7477 return;
7479 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7480 if (pat)
7481 pat = XEXP (pat, 0);
7482 else
7483 pat = PATTERN (insn);
7485 switch (GET_CODE (pat))
7487 case SET:
7488 process_set (asm_out_file, pat);
7489 break;
7491 case PARALLEL:
7493 int par_index;
7494 int limit = XVECLEN (pat, 0);
7495 for (par_index = 0; par_index < limit; par_index++)
7497 rtx x = XVECEXP (pat, 0, par_index);
7498 if (GET_CODE (x) == SET)
7499 process_set (asm_out_file, x);
7501 break;
7504 default:
7505 abort ();
7511 void
7512 ia64_init_builtins (void)
7514 tree psi_type_node = build_pointer_type (integer_type_node);
7515 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7517 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7518 tree si_ftype_psi_si_si
7519 = build_function_type_list (integer_type_node,
7520 psi_type_node, integer_type_node,
7521 integer_type_node, NULL_TREE);
7523 /* __sync_val_compare_and_swap_di */
7524 tree di_ftype_pdi_di_di
7525 = build_function_type_list (long_integer_type_node,
7526 pdi_type_node, long_integer_type_node,
7527 long_integer_type_node, NULL_TREE);
7528 /* __sync_bool_compare_and_swap_di */
7529 tree si_ftype_pdi_di_di
7530 = build_function_type_list (integer_type_node,
7531 pdi_type_node, long_integer_type_node,
7532 long_integer_type_node, NULL_TREE);
7533 /* __sync_synchronize */
7534 tree void_ftype_void
7535 = build_function_type (void_type_node, void_list_node);
7537 /* __sync_lock_test_and_set_si */
7538 tree si_ftype_psi_si
7539 = build_function_type_list (integer_type_node,
7540 psi_type_node, integer_type_node, NULL_TREE);
7542 /* __sync_lock_test_and_set_di */
7543 tree di_ftype_pdi_di
7544 = build_function_type_list (long_integer_type_node,
7545 pdi_type_node, long_integer_type_node,
7546 NULL_TREE);
7548 /* __sync_lock_release_si */
7549 tree void_ftype_psi
7550 = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
7552 /* __sync_lock_release_di */
7553 tree void_ftype_pdi
7554 = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
7556 tree fpreg_type;
7557 tree float80_type;
7559 /* The __fpreg type. */
7560 fpreg_type = make_node (REAL_TYPE);
7561 /* ??? The back end should know to load/save __fpreg variables using
7562 the ldf.fill and stf.spill instructions. */
7563 TYPE_PRECISION (fpreg_type) = 80;
7564 layout_type (fpreg_type);
7565 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
7567 /* The __float80 type. */
7568 float80_type = make_node (REAL_TYPE);
7569 TYPE_PRECISION (float80_type) = 80;
7570 layout_type (float80_type);
7571 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
7573 /* The __float128 type. */
7574 if (!TARGET_HPUX)
7576 tree float128_type = make_node (REAL_TYPE);
7577 TYPE_PRECISION (float128_type) = 128;
7578 layout_type (float128_type);
7579 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
7581 else
7582 /* Under HPUX, this is a synonym for "long double". */
7583 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
7584 "__float128");
7586 #define def_builtin(name, type, code) \
7587 lang_hooks.builtin_function ((name), (type), (code), BUILT_IN_MD, \
7588 NULL, NULL_TREE)
7590 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7591 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7592 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7593 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7594 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7595 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7596 def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di,
7597 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7599 def_builtin ("__sync_synchronize", void_ftype_void,
7600 IA64_BUILTIN_SYNCHRONIZE);
7602 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7603 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7604 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7605 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7606 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7607 IA64_BUILTIN_LOCK_RELEASE_SI);
7608 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7609 IA64_BUILTIN_LOCK_RELEASE_DI);
7611 def_builtin ("__builtin_ia64_bsp",
7612 build_function_type (ptr_type_node, void_list_node),
7613 IA64_BUILTIN_BSP);
7615 def_builtin ("__builtin_ia64_flushrs",
7616 build_function_type (void_type_node, void_list_node),
7617 IA64_BUILTIN_FLUSHRS);
7619 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7620 IA64_BUILTIN_FETCH_AND_ADD_SI);
7621 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7622 IA64_BUILTIN_FETCH_AND_SUB_SI);
7623 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7624 IA64_BUILTIN_FETCH_AND_OR_SI);
7625 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7626 IA64_BUILTIN_FETCH_AND_AND_SI);
7627 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7628 IA64_BUILTIN_FETCH_AND_XOR_SI);
7629 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7630 IA64_BUILTIN_FETCH_AND_NAND_SI);
7632 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7633 IA64_BUILTIN_ADD_AND_FETCH_SI);
7634 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7635 IA64_BUILTIN_SUB_AND_FETCH_SI);
7636 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7637 IA64_BUILTIN_OR_AND_FETCH_SI);
7638 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7639 IA64_BUILTIN_AND_AND_FETCH_SI);
7640 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7641 IA64_BUILTIN_XOR_AND_FETCH_SI);
7642 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7643 IA64_BUILTIN_NAND_AND_FETCH_SI);
7645 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7646 IA64_BUILTIN_FETCH_AND_ADD_DI);
7647 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7648 IA64_BUILTIN_FETCH_AND_SUB_DI);
7649 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7650 IA64_BUILTIN_FETCH_AND_OR_DI);
7651 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7652 IA64_BUILTIN_FETCH_AND_AND_DI);
7653 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7654 IA64_BUILTIN_FETCH_AND_XOR_DI);
7655 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7656 IA64_BUILTIN_FETCH_AND_NAND_DI);
7658 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7659 IA64_BUILTIN_ADD_AND_FETCH_DI);
7660 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7661 IA64_BUILTIN_SUB_AND_FETCH_DI);
7662 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7663 IA64_BUILTIN_OR_AND_FETCH_DI);
7664 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7665 IA64_BUILTIN_AND_AND_FETCH_DI);
7666 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7667 IA64_BUILTIN_XOR_AND_FETCH_DI);
7668 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7669 IA64_BUILTIN_NAND_AND_FETCH_DI);
7671 #undef def_builtin
7674 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7677 tmp = [ptr];
7678 do {
7679 ret = tmp;
7680 ar.ccv = tmp;
7681 tmp <op>= value;
7682 cmpxchgsz.acq tmp = [ptr], tmp
7683 } while (tmp != ret)
7686 static rtx
7687 ia64_expand_fetch_and_op (optab binoptab, enum machine_mode mode,
7688 tree arglist, rtx target)
7690 rtx ret, label, tmp, ccv, insn, mem, value;
7691 tree arg0, arg1;
7693 arg0 = TREE_VALUE (arglist);
7694 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7695 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7696 #ifdef POINTERS_EXTEND_UNSIGNED
7697 if (GET_MODE(mem) != Pmode)
7698 mem = convert_memory_address (Pmode, mem);
7699 #endif
7700 value = expand_expr (arg1, NULL_RTX, mode, 0);
7702 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7703 MEM_VOLATILE_P (mem) = 1;
7705 if (target && register_operand (target, mode))
7706 ret = target;
7707 else
7708 ret = gen_reg_rtx (mode);
7710 emit_insn (gen_mf ());
7712 /* Special case for fetchadd instructions. */
7713 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7715 if (mode == SImode)
7716 insn = gen_fetchadd_acq_si (ret, mem, value);
7717 else
7718 insn = gen_fetchadd_acq_di (ret, mem, value);
7719 emit_insn (insn);
7720 return ret;
7723 tmp = gen_reg_rtx (mode);
7724 /* ar.ccv must always be loaded with a zero-extended DImode value. */
7725 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
7726 emit_move_insn (tmp, mem);
7728 label = gen_label_rtx ();
7729 emit_label (label);
7730 emit_move_insn (ret, tmp);
7731 convert_move (ccv, tmp, /*unsignedp=*/1);
7733 /* Perform the specific operation. Special case NAND by noticing
7734 one_cmpl_optab instead. */
7735 if (binoptab == one_cmpl_optab)
7737 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7738 binoptab = and_optab;
7740 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7742 if (mode == SImode)
7743 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7744 else
7745 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7746 emit_insn (insn);
7748 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
7750 return ret;
7753 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7756 tmp = [ptr];
7757 do {
7758 old = tmp;
7759 ar.ccv = tmp;
7760 ret = tmp <op> value;
7761 cmpxchgsz.acq tmp = [ptr], ret
7762 } while (tmp != old)
7765 static rtx
7766 ia64_expand_op_and_fetch (optab binoptab, enum machine_mode mode,
7767 tree arglist, rtx target)
7769 rtx old, label, tmp, ret, ccv, insn, mem, value;
7770 tree arg0, arg1;
7772 arg0 = TREE_VALUE (arglist);
7773 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7774 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7775 #ifdef POINTERS_EXTEND_UNSIGNED
7776 if (GET_MODE(mem) != Pmode)
7777 mem = convert_memory_address (Pmode, mem);
7778 #endif
7780 value = expand_expr (arg1, NULL_RTX, mode, 0);
7782 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7783 MEM_VOLATILE_P (mem) = 1;
7785 if (target && ! register_operand (target, mode))
7786 target = NULL_RTX;
7788 emit_insn (gen_mf ());
7789 tmp = gen_reg_rtx (mode);
7790 old = gen_reg_rtx (mode);
7791 /* ar.ccv must always be loaded with a zero-extended DImode value. */
7792 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
7794 emit_move_insn (tmp, mem);
7796 label = gen_label_rtx ();
7797 emit_label (label);
7798 emit_move_insn (old, tmp);
7799 convert_move (ccv, tmp, /*unsignedp=*/1);
7801 /* Perform the specific operation. Special case NAND by noticing
7802 one_cmpl_optab instead. */
7803 if (binoptab == one_cmpl_optab)
7805 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7806 binoptab = and_optab;
7808 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
7810 if (mode == SImode)
7811 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
7812 else
7813 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7814 emit_insn (insn);
7816 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
7818 return ret;
7821 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7823 ar.ccv = oldval
7825 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7826 return ret
7828 For bool_ it's the same except return ret == oldval.
7831 static rtx
7832 ia64_expand_compare_and_swap (enum machine_mode rmode, enum machine_mode mode,
7833 int boolp, tree arglist, rtx target)
7835 tree arg0, arg1, arg2;
7836 rtx mem, old, new, ccv, tmp, insn;
7838 arg0 = TREE_VALUE (arglist);
7839 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7840 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7841 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
7842 old = expand_expr (arg1, NULL_RTX, mode, 0);
7843 new = expand_expr (arg2, NULL_RTX, mode, 0);
7845 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
7846 MEM_VOLATILE_P (mem) = 1;
7848 if (GET_MODE (old) != mode)
7849 old = convert_to_mode (mode, old, /*unsignedp=*/1);
7850 if (GET_MODE (new) != mode)
7851 new = convert_to_mode (mode, new, /*unsignedp=*/1);
7853 if (! register_operand (old, mode))
7854 old = copy_to_mode_reg (mode, old);
7855 if (! register_operand (new, mode))
7856 new = copy_to_mode_reg (mode, new);
7858 if (! boolp && target && register_operand (target, mode))
7859 tmp = target;
7860 else
7861 tmp = gen_reg_rtx (mode);
7863 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
7864 convert_move (ccv, old, /*unsignedp=*/1);
7865 emit_insn (gen_mf ());
7866 if (mode == SImode)
7867 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7868 else
7869 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7870 emit_insn (insn);
7872 if (boolp)
7874 if (! target)
7875 target = gen_reg_rtx (rmode);
7876 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
7878 else
7879 return tmp;
7882 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7884 static rtx
7885 ia64_expand_lock_test_and_set (enum machine_mode mode, tree arglist,
7886 rtx target)
7888 tree arg0, arg1;
7889 rtx mem, new, ret, insn;
7891 arg0 = TREE_VALUE (arglist);
7892 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7893 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
7894 new = expand_expr (arg1, NULL_RTX, mode, 0);
7896 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
7897 MEM_VOLATILE_P (mem) = 1;
7898 if (! register_operand (new, mode))
7899 new = copy_to_mode_reg (mode, new);
7901 if (target && register_operand (target, mode))
7902 ret = target;
7903 else
7904 ret = gen_reg_rtx (mode);
7906 if (mode == SImode)
7907 insn = gen_xchgsi (ret, mem, new);
7908 else
7909 insn = gen_xchgdi (ret, mem, new);
7910 emit_insn (insn);
7912 return ret;
7915 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7917 static rtx
7918 ia64_expand_lock_release (enum machine_mode mode, tree arglist,
7919 rtx target ATTRIBUTE_UNUSED)
7921 tree arg0;
7922 rtx mem;
7924 arg0 = TREE_VALUE (arglist);
7925 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
7927 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
7928 MEM_VOLATILE_P (mem) = 1;
7930 emit_move_insn (mem, const0_rtx);
7932 return const0_rtx;
7936 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
7937 enum machine_mode mode ATTRIBUTE_UNUSED,
7938 int ignore ATTRIBUTE_UNUSED)
7940 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7941 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
7942 tree arglist = TREE_OPERAND (exp, 1);
7943 enum machine_mode rmode = VOIDmode;
7945 switch (fcode)
7947 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7948 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7949 mode = SImode;
7950 rmode = SImode;
7951 break;
7953 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7954 case IA64_BUILTIN_LOCK_RELEASE_SI:
7955 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7956 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7957 case IA64_BUILTIN_FETCH_AND_OR_SI:
7958 case IA64_BUILTIN_FETCH_AND_AND_SI:
7959 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7960 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7961 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7962 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7963 case IA64_BUILTIN_OR_AND_FETCH_SI:
7964 case IA64_BUILTIN_AND_AND_FETCH_SI:
7965 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7966 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7967 mode = SImode;
7968 break;
7970 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7971 mode = DImode;
7972 rmode = SImode;
7973 break;
7975 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7976 mode = DImode;
7977 rmode = DImode;
7978 break;
7980 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7981 case IA64_BUILTIN_LOCK_RELEASE_DI:
7982 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7983 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7984 case IA64_BUILTIN_FETCH_AND_OR_DI:
7985 case IA64_BUILTIN_FETCH_AND_AND_DI:
7986 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7987 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7988 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7989 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7990 case IA64_BUILTIN_OR_AND_FETCH_DI:
7991 case IA64_BUILTIN_AND_AND_FETCH_DI:
7992 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7993 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7994 mode = DImode;
7995 break;
7997 default:
7998 break;
8001 switch (fcode)
8003 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8004 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8005 return ia64_expand_compare_and_swap (rmode, mode, 1, arglist,
8006 target);
8008 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8009 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8010 return ia64_expand_compare_and_swap (rmode, mode, 0, arglist,
8011 target);
8013 case IA64_BUILTIN_SYNCHRONIZE:
8014 emit_insn (gen_mf ());
8015 return const0_rtx;
8017 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8018 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8019 return ia64_expand_lock_test_and_set (mode, arglist, target);
8021 case IA64_BUILTIN_LOCK_RELEASE_SI:
8022 case IA64_BUILTIN_LOCK_RELEASE_DI:
8023 return ia64_expand_lock_release (mode, arglist, target);
8025 case IA64_BUILTIN_BSP:
8026 if (! target || ! register_operand (target, DImode))
8027 target = gen_reg_rtx (DImode);
8028 emit_insn (gen_bsp_value (target));
8029 #ifdef POINTERS_EXTEND_UNSIGNED
8030 target = convert_memory_address (ptr_mode, target);
8031 #endif
8032 return target;
8034 case IA64_BUILTIN_FLUSHRS:
8035 emit_insn (gen_flushrs ());
8036 return const0_rtx;
8038 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8039 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8040 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8042 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8043 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8044 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8046 case IA64_BUILTIN_FETCH_AND_OR_SI:
8047 case IA64_BUILTIN_FETCH_AND_OR_DI:
8048 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8050 case IA64_BUILTIN_FETCH_AND_AND_SI:
8051 case IA64_BUILTIN_FETCH_AND_AND_DI:
8052 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8054 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8055 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8056 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8058 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8059 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8060 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8062 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8063 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8064 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8066 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8067 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8068 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8070 case IA64_BUILTIN_OR_AND_FETCH_SI:
8071 case IA64_BUILTIN_OR_AND_FETCH_DI:
8072 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8074 case IA64_BUILTIN_AND_AND_FETCH_SI:
8075 case IA64_BUILTIN_AND_AND_FETCH_DI:
8076 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8078 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8079 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8080 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8082 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8083 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8084 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8086 default:
8087 break;
8090 return NULL_RTX;
8093 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8094 most significant bits of the stack slot. */
8096 enum direction
8097 ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
8099 /* Exception to normal case for structures/unions/etc. */
8101 if (type && AGGREGATE_TYPE_P (type)
8102 && int_size_in_bytes (type) < UNITS_PER_WORD)
8103 return upward;
8105 /* Fall back to the default. */
8106 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
8109 /* Linked list of all external functions that are to be emitted by GCC.
8110 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8111 order to avoid putting out names that are never really used. */
8113 struct extern_func_list GTY(())
8115 struct extern_func_list *next;
8116 tree decl;
8119 static GTY(()) struct extern_func_list *extern_func_head;
8121 static void
8122 ia64_hpux_add_extern_decl (tree decl)
8124 struct extern_func_list *p = ggc_alloc (sizeof (struct extern_func_list));
8126 p->decl = decl;
8127 p->next = extern_func_head;
8128 extern_func_head = p;
8131 /* Print out the list of used global functions. */
8133 static void
8134 ia64_hpux_file_end (void)
8136 struct extern_func_list *p;
8138 for (p = extern_func_head; p; p = p->next)
8140 tree decl = p->decl;
8141 tree id = DECL_ASSEMBLER_NAME (decl);
8143 if (!id)
8144 abort ();
8146 if (!TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (id))
8148 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
8150 TREE_ASM_WRITTEN (decl) = 1;
8151 (*targetm.asm_out.globalize_label) (asm_out_file, name);
8152 fputs (TYPE_ASM_OP, asm_out_file);
8153 assemble_name (asm_out_file, name);
8154 fprintf (asm_out_file, "," TYPE_OPERAND_FMT "\n", "function");
8158 extern_func_head = 0;
8161 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
8162 modes of word_mode and larger. Rename the TFmode libfuncs using the
8163 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
8164 backward compatibility. */
8166 static void
8167 ia64_init_libfuncs (void)
8169 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
8170 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
8171 set_optab_libfunc (smod_optab, SImode, "__modsi3");
8172 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
8174 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
8175 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
8176 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
8177 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
8178 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
8180 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
8181 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
8182 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
8183 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
8184 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
8185 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
8187 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
8188 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
8189 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
8190 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
8192 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
8193 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
8196 /* Rename all the TFmode libfuncs using the HPUX conventions. */
8198 static void
8199 ia64_hpux_init_libfuncs (void)
8201 ia64_init_libfuncs ();
8203 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
8204 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
8205 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
8207 /* ia64_expand_compare uses this. */
8208 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
8210 /* These should never be used. */
8211 set_optab_libfunc (eq_optab, TFmode, 0);
8212 set_optab_libfunc (ne_optab, TFmode, 0);
8213 set_optab_libfunc (gt_optab, TFmode, 0);
8214 set_optab_libfunc (ge_optab, TFmode, 0);
8215 set_optab_libfunc (lt_optab, TFmode, 0);
8216 set_optab_libfunc (le_optab, TFmode, 0);
8219 /* Rename the division and modulus functions in VMS. */
8221 static void
8222 ia64_vms_init_libfuncs (void)
8224 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
8225 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
8226 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
8227 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
8228 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
8229 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
8230 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
8231 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
8234 /* Rename the TFmode libfuncs available from soft-fp in glibc using
8235 the HPUX conventions. */
8237 static void
8238 ia64_sysv4_init_libfuncs (void)
8240 ia64_init_libfuncs ();
8242 /* These functions are not part of the HPUX TFmode interface. We
8243 use them instead of _U_Qfcmp, which doesn't work the way we
8244 expect. */
8245 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
8246 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
8247 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
8248 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
8249 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
8250 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
8252 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
8253 glibc doesn't have them. */
8256 /* Switch to the section to which we should output X. The only thing
8257 special we do here is to honor small data. */
8259 static void
8260 ia64_select_rtx_section (enum machine_mode mode, rtx x,
8261 unsigned HOST_WIDE_INT align)
8263 if (GET_MODE_SIZE (mode) > 0
8264 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8265 sdata_section ();
8266 else
8267 default_elf_select_rtx_section (mode, x, align);
8270 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8271 Pretend flag_pic is always set. */
8273 static void
8274 ia64_rwreloc_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
8276 default_elf_select_section_1 (exp, reloc, align, true);
8279 static void
8280 ia64_rwreloc_unique_section (tree decl, int reloc)
8282 default_unique_section_1 (decl, reloc, true);
8285 static void
8286 ia64_rwreloc_select_rtx_section (enum machine_mode mode, rtx x,
8287 unsigned HOST_WIDE_INT align)
8289 int save_pic = flag_pic;
8290 flag_pic = 1;
8291 ia64_select_rtx_section (mode, x, align);
8292 flag_pic = save_pic;
8295 static unsigned int
8296 ia64_rwreloc_section_type_flags (tree decl, const char *name, int reloc)
8298 return default_section_type_flags_1 (decl, name, reloc, true);
8301 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
8302 structure type and that the address of that type should be passed
8303 in out0, rather than in r8. */
8305 static bool
8306 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
8308 tree ret_type = TREE_TYPE (fntype);
8310 /* The Itanium C++ ABI requires that out0, rather than r8, be used
8311 as the structure return address parameter, if the return value
8312 type has a non-trivial copy constructor or destructor. It is not
8313 clear if this same convention should be used for other
8314 programming languages. Until G++ 3.4, we incorrectly used r8 for
8315 these return values. */
8316 return (abi_version_at_least (2)
8317 && ret_type
8318 && TYPE_MODE (ret_type) == BLKmode
8319 && TREE_ADDRESSABLE (ret_type)
8320 && strcmp (lang_hooks.name, "GNU C++") == 0);
8323 /* Output the assembler code for a thunk function. THUNK_DECL is the
8324 declaration for the thunk function itself, FUNCTION is the decl for
8325 the target function. DELTA is an immediate constant offset to be
8326 added to THIS. If VCALL_OFFSET is nonzero, the word at
8327 *(*this + vcall_offset) should be added to THIS. */
8329 static void
8330 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
8331 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8332 tree function)
8334 rtx this, insn, funexp;
8335 unsigned int this_parmno;
8336 unsigned int this_regno;
8338 reload_completed = 1;
8339 epilogue_completed = 1;
8340 no_new_pseudos = 1;
8341 reset_block_changes ();
8343 /* Set things up as ia64_expand_prologue might. */
8344 last_scratch_gr_reg = 15;
8346 memset (&current_frame_info, 0, sizeof (current_frame_info));
8347 current_frame_info.spill_cfa_off = -16;
8348 current_frame_info.n_input_regs = 1;
8349 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8351 /* Mark the end of the (empty) prologue. */
8352 emit_note (NOTE_INSN_PROLOGUE_END);
8354 /* Figure out whether "this" will be the first parameter (the
8355 typical case) or the second parameter (as happens when the
8356 virtual function returns certain class objects). */
8357 this_parmno
8358 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
8359 ? 1 : 0);
8360 this_regno = IN_REG (this_parmno);
8361 if (!TARGET_REG_NAMES)
8362 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
8364 this = gen_rtx_REG (Pmode, this_regno);
8365 if (TARGET_ILP32)
8367 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
8368 REG_POINTER (tmp) = 1;
8369 if (delta && CONST_OK_FOR_I (delta))
8371 emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
8372 delta = 0;
8374 else
8375 emit_insn (gen_ptr_extend (this, tmp));
8378 /* Apply the constant offset, if required. */
8379 if (delta)
8381 rtx delta_rtx = GEN_INT (delta);
8383 if (!CONST_OK_FOR_I (delta))
8385 rtx tmp = gen_rtx_REG (Pmode, 2);
8386 emit_move_insn (tmp, delta_rtx);
8387 delta_rtx = tmp;
8389 emit_insn (gen_adddi3 (this, this, delta_rtx));
8392 /* Apply the offset from the vtable, if required. */
8393 if (vcall_offset)
8395 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8396 rtx tmp = gen_rtx_REG (Pmode, 2);
8398 if (TARGET_ILP32)
8400 rtx t = gen_rtx_REG (ptr_mode, 2);
8401 REG_POINTER (t) = 1;
8402 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
8403 if (CONST_OK_FOR_I (vcall_offset))
8405 emit_insn (gen_ptr_extend_plus_imm (tmp, t,
8406 vcall_offset_rtx));
8407 vcall_offset = 0;
8409 else
8410 emit_insn (gen_ptr_extend (tmp, t));
8412 else
8413 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8415 if (vcall_offset)
8417 if (!CONST_OK_FOR_J (vcall_offset))
8419 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8420 emit_move_insn (tmp2, vcall_offset_rtx);
8421 vcall_offset_rtx = tmp2;
8423 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8426 if (TARGET_ILP32)
8427 emit_move_insn (gen_rtx_REG (ptr_mode, 2),
8428 gen_rtx_MEM (ptr_mode, tmp));
8429 else
8430 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8432 emit_insn (gen_adddi3 (this, this, tmp));
8435 /* Generate a tail call to the target function. */
8436 if (! TREE_USED (function))
8438 assemble_external (function);
8439 TREE_USED (function) = 1;
8441 funexp = XEXP (DECL_RTL (function), 0);
8442 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8443 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8444 insn = get_last_insn ();
8445 SIBLING_CALL_P (insn) = 1;
8447 /* Code generation for calls relies on splitting. */
8448 reload_completed = 1;
8449 epilogue_completed = 1;
8450 try_split (PATTERN (insn), insn, 0);
8452 emit_barrier ();
8454 /* Run just enough of rest_of_compilation to get the insns emitted.
8455 There's not really enough bulk here to make other passes such as
8456 instruction scheduling worth while. Note that use_thunk calls
8457 assemble_start_function and assemble_end_function. */
8459 insn_locators_initialize ();
8460 emit_all_insn_group_barriers (NULL);
8461 insn = get_insns ();
8462 shorten_branches (insn);
8463 final_start_function (insn, file, 1);
8464 final (insn, file, 1, 0);
8465 final_end_function ();
8467 reload_completed = 0;
8468 epilogue_completed = 0;
8469 no_new_pseudos = 0;
8472 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
8474 static rtx
8475 ia64_struct_value_rtx (tree fntype,
8476 int incoming ATTRIBUTE_UNUSED)
8478 if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
8479 return NULL_RTX;
8480 return gen_rtx_REG (Pmode, GR_REG (8));
8483 #include "gt-ia64.h"