* emit-rtl.c (gen_const_mem): New.
[official-gcc.git] / gcc / config / ia64 / ia64.c
blob770f83279155dce8f4421465455f949ba08f5331
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson <wilson@cygnus.com> and
5 David Mosberger <davidm@hpl.hp.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "regs.h"
31 #include "hard-reg-set.h"
32 #include "real.h"
33 #include "insn-config.h"
34 #include "conditions.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "recog.h"
39 #include "expr.h"
40 #include "optabs.h"
41 #include "except.h"
42 #include "function.h"
43 #include "ggc.h"
44 #include "basic-block.h"
45 #include "toplev.h"
46 #include "sched-int.h"
47 #include "timevar.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "tm_p.h"
51 #include "hashtab.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
54 #include "tree-gimple.h"
56 /* This is used for communication between ASM_OUTPUT_LABEL and
57 ASM_OUTPUT_LABELREF. */
58 int ia64_asm_output_label = 0;
60 /* Define the information needed to generate branch and scc insns. This is
61 stored from the compare operation. */
62 struct rtx_def * ia64_compare_op0;
63 struct rtx_def * ia64_compare_op1;
65 /* Register names for ia64_expand_prologue. */
66 static const char * const ia64_reg_numbers[96] =
67 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
68 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
69 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
70 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
71 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
72 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
73 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
74 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
75 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
76 "r104","r105","r106","r107","r108","r109","r110","r111",
77 "r112","r113","r114","r115","r116","r117","r118","r119",
78 "r120","r121","r122","r123","r124","r125","r126","r127"};
80 /* ??? These strings could be shared with REGISTER_NAMES. */
81 static const char * const ia64_input_reg_names[8] =
82 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
84 /* ??? These strings could be shared with REGISTER_NAMES. */
85 static const char * const ia64_local_reg_names[80] =
86 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
87 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
88 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
89 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
90 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
91 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
92 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
93 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
94 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
95 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
97 /* ??? These strings could be shared with REGISTER_NAMES. */
98 static const char * const ia64_output_reg_names[8] =
99 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
101 /* String used with the -mfixed-range= option. */
102 const char *ia64_fixed_range_string;
104 /* Determines whether we use adds, addl, or movl to generate our
105 TLS immediate offsets. */
106 int ia64_tls_size = 22;
108 /* String used with the -mtls-size= option. */
109 const char *ia64_tls_size_string;
111 /* Which cpu are we scheduling for. */
112 enum processor_type ia64_tune;
114 /* String used with the -tune= option. */
115 const char *ia64_tune_string;
117 /* Determines whether we run our final scheduling pass or not. We always
118 avoid the normal second scheduling pass. */
119 static int ia64_flag_schedule_insns2;
121 /* Determines whether we run variable tracking in machine dependent
122 reorganization. */
123 static int ia64_flag_var_tracking;
125 /* Variables which are this size or smaller are put in the sdata/sbss
126 sections. */
128 unsigned int ia64_section_threshold;
130 /* The following variable is used by the DFA insn scheduler. The value is
131 TRUE if we do insn bundling instead of insn scheduling. */
132 int bundling_p = 0;
134 /* Structure to be filled in by ia64_compute_frame_size with register
135 save masks and offsets for the current function. */
137 struct ia64_frame_info
139 HOST_WIDE_INT total_size; /* size of the stack frame, not including
140 the caller's scratch area. */
141 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
142 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
143 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
144 HARD_REG_SET mask; /* mask of saved registers. */
145 unsigned int gr_used_mask; /* mask of registers in use as gr spill
146 registers or long-term scratches. */
147 int n_spilled; /* number of spilled registers. */
148 int reg_fp; /* register for fp. */
149 int reg_save_b0; /* save register for b0. */
150 int reg_save_pr; /* save register for prs. */
151 int reg_save_ar_pfs; /* save register for ar.pfs. */
152 int reg_save_ar_unat; /* save register for ar.unat. */
153 int reg_save_ar_lc; /* save register for ar.lc. */
154 int reg_save_gp; /* save register for gp. */
155 int n_input_regs; /* number of input registers used. */
156 int n_local_regs; /* number of local registers used. */
157 int n_output_regs; /* number of output registers used. */
158 int n_rotate_regs; /* number of rotating registers used. */
160 char need_regstk; /* true if a .regstk directive needed. */
161 char initialized; /* true if the data is finalized. */
164 /* Current frame information calculated by ia64_compute_frame_size. */
165 static struct ia64_frame_info current_frame_info;
167 static int ia64_first_cycle_multipass_dfa_lookahead (void);
168 static void ia64_dependencies_evaluation_hook (rtx, rtx);
169 static void ia64_init_dfa_pre_cycle_insn (void);
170 static rtx ia64_dfa_pre_cycle_insn (void);
171 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
172 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
173 static rtx gen_tls_get_addr (void);
174 static rtx gen_thread_pointer (void);
175 static rtx ia64_expand_tls_address (enum tls_model, rtx, rtx);
176 static int find_gr_spill (int);
177 static int next_scratch_gr_reg (void);
178 static void mark_reg_gr_used_mask (rtx, void *);
179 static void ia64_compute_frame_size (HOST_WIDE_INT);
180 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
181 static void finish_spill_pointers (void);
182 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
183 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
184 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
185 static rtx gen_movdi_x (rtx, rtx, rtx);
186 static rtx gen_fr_spill_x (rtx, rtx, rtx);
187 static rtx gen_fr_restore_x (rtx, rtx, rtx);
189 static enum machine_mode hfa_element_mode (tree, int);
190 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
191 tree, int *, int);
192 static bool ia64_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
193 tree, bool);
194 static bool ia64_function_ok_for_sibcall (tree, tree);
195 static bool ia64_return_in_memory (tree, tree);
196 static bool ia64_rtx_costs (rtx, int, int, int *);
197 static void fix_range (const char *);
198 static struct machine_function * ia64_init_machine_status (void);
199 static void emit_insn_group_barriers (FILE *);
200 static void emit_all_insn_group_barriers (FILE *);
201 static void final_emit_insn_group_barriers (FILE *);
202 static void emit_predicate_relation_info (void);
203 static void ia64_reorg (void);
204 static bool ia64_in_small_data_p (tree);
205 static void process_epilogue (void);
206 static int process_set (FILE *, rtx);
208 static rtx ia64_expand_fetch_and_op (optab, enum machine_mode, tree, rtx);
209 static rtx ia64_expand_op_and_fetch (optab, enum machine_mode, tree, rtx);
210 static rtx ia64_expand_compare_and_swap (enum machine_mode, enum machine_mode,
211 int, tree, rtx);
212 static rtx ia64_expand_lock_test_and_set (enum machine_mode, tree, rtx);
213 static rtx ia64_expand_lock_release (enum machine_mode, tree, rtx);
214 static bool ia64_assemble_integer (rtx, unsigned int, int);
215 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
216 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
217 static void ia64_output_function_end_prologue (FILE *);
219 static int ia64_issue_rate (void);
220 static int ia64_adjust_cost (rtx, rtx, rtx, int);
221 static void ia64_sched_init (FILE *, int, int);
222 static void ia64_sched_finish (FILE *, int);
223 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
224 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
225 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
226 static int ia64_variable_issue (FILE *, int, rtx, int);
228 static struct bundle_state *get_free_bundle_state (void);
229 static void free_bundle_state (struct bundle_state *);
230 static void initiate_bundle_states (void);
231 static void finish_bundle_states (void);
232 static unsigned bundle_state_hash (const void *);
233 static int bundle_state_eq_p (const void *, const void *);
234 static int insert_bundle_state (struct bundle_state *);
235 static void initiate_bundle_state_table (void);
236 static void finish_bundle_state_table (void);
237 static int try_issue_nops (struct bundle_state *, int);
238 static int try_issue_insn (struct bundle_state *, rtx);
239 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
240 static int get_max_pos (state_t);
241 static int get_template (state_t, int);
243 static rtx get_next_important_insn (rtx, rtx);
244 static void bundling (FILE *, int, rtx, rtx);
246 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
247 HOST_WIDE_INT, tree);
248 static void ia64_file_start (void);
250 static void ia64_select_rtx_section (enum machine_mode, rtx,
251 unsigned HOST_WIDE_INT);
252 static void ia64_rwreloc_select_section (tree, int, unsigned HOST_WIDE_INT)
253 ATTRIBUTE_UNUSED;
254 static void ia64_rwreloc_unique_section (tree, int)
255 ATTRIBUTE_UNUSED;
256 static void ia64_rwreloc_select_rtx_section (enum machine_mode, rtx,
257 unsigned HOST_WIDE_INT)
258 ATTRIBUTE_UNUSED;
259 static unsigned int ia64_rwreloc_section_type_flags (tree, const char *, int)
260 ATTRIBUTE_UNUSED;
262 static void ia64_hpux_add_extern_decl (tree decl)
263 ATTRIBUTE_UNUSED;
264 static void ia64_hpux_file_end (void)
265 ATTRIBUTE_UNUSED;
266 static void ia64_init_libfuncs (void)
267 ATTRIBUTE_UNUSED;
268 static void ia64_hpux_init_libfuncs (void)
269 ATTRIBUTE_UNUSED;
270 static void ia64_sysv4_init_libfuncs (void)
271 ATTRIBUTE_UNUSED;
272 static void ia64_vms_init_libfuncs (void)
273 ATTRIBUTE_UNUSED;
275 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
276 static void ia64_encode_section_info (tree, rtx, int);
277 static rtx ia64_struct_value_rtx (tree, int);
278 static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
281 /* Table of valid machine attributes. */
282 static const struct attribute_spec ia64_attribute_table[] =
284 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
285 { "syscall_linkage", 0, 0, false, true, true, NULL },
286 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
287 { NULL, 0, 0, false, false, false, NULL }
290 /* Initialize the GCC target structure. */
291 #undef TARGET_ATTRIBUTE_TABLE
292 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
294 #undef TARGET_INIT_BUILTINS
295 #define TARGET_INIT_BUILTINS ia64_init_builtins
297 #undef TARGET_EXPAND_BUILTIN
298 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
300 #undef TARGET_ASM_BYTE_OP
301 #define TARGET_ASM_BYTE_OP "\tdata1\t"
302 #undef TARGET_ASM_ALIGNED_HI_OP
303 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
304 #undef TARGET_ASM_ALIGNED_SI_OP
305 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
306 #undef TARGET_ASM_ALIGNED_DI_OP
307 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
308 #undef TARGET_ASM_UNALIGNED_HI_OP
309 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
310 #undef TARGET_ASM_UNALIGNED_SI_OP
311 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
312 #undef TARGET_ASM_UNALIGNED_DI_OP
313 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
314 #undef TARGET_ASM_INTEGER
315 #define TARGET_ASM_INTEGER ia64_assemble_integer
317 #undef TARGET_ASM_FUNCTION_PROLOGUE
318 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
319 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
320 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
321 #undef TARGET_ASM_FUNCTION_EPILOGUE
322 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
324 #undef TARGET_IN_SMALL_DATA_P
325 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
327 #undef TARGET_SCHED_ADJUST_COST
328 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
329 #undef TARGET_SCHED_ISSUE_RATE
330 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
331 #undef TARGET_SCHED_VARIABLE_ISSUE
332 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
333 #undef TARGET_SCHED_INIT
334 #define TARGET_SCHED_INIT ia64_sched_init
335 #undef TARGET_SCHED_FINISH
336 #define TARGET_SCHED_FINISH ia64_sched_finish
337 #undef TARGET_SCHED_REORDER
338 #define TARGET_SCHED_REORDER ia64_sched_reorder
339 #undef TARGET_SCHED_REORDER2
340 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
342 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
343 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
345 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
346 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
348 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
349 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
350 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
351 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
353 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
354 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
355 ia64_first_cycle_multipass_dfa_lookahead_guard
357 #undef TARGET_SCHED_DFA_NEW_CYCLE
358 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
360 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
361 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
362 #undef TARGET_PASS_BY_REFERENCE
363 #define TARGET_PASS_BY_REFERENCE ia64_pass_by_reference
365 #undef TARGET_ASM_OUTPUT_MI_THUNK
366 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
367 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
368 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
370 #undef TARGET_ASM_FILE_START
371 #define TARGET_ASM_FILE_START ia64_file_start
373 #undef TARGET_RTX_COSTS
374 #define TARGET_RTX_COSTS ia64_rtx_costs
375 #undef TARGET_ADDRESS_COST
376 #define TARGET_ADDRESS_COST hook_int_rtx_0
378 #undef TARGET_MACHINE_DEPENDENT_REORG
379 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
381 #undef TARGET_ENCODE_SECTION_INFO
382 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
384 /* ??? ABI doesn't allow us to define this. */
385 #if 0
386 #undef TARGET_PROMOTE_FUNCTION_ARGS
387 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
388 #endif
390 /* ??? ABI doesn't allow us to define this. */
391 #if 0
392 #undef TARGET_PROMOTE_FUNCTION_RETURN
393 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
394 #endif
396 /* ??? Investigate. */
397 #if 0
398 #undef TARGET_PROMOTE_PROTOTYPES
399 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
400 #endif
402 #undef TARGET_STRUCT_VALUE_RTX
403 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
404 #undef TARGET_RETURN_IN_MEMORY
405 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
406 #undef TARGET_SETUP_INCOMING_VARARGS
407 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
408 #undef TARGET_STRICT_ARGUMENT_NAMING
409 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
410 #undef TARGET_MUST_PASS_IN_STACK
411 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
413 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
414 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
416 #undef TARGET_UNWIND_EMIT
417 #define TARGET_UNWIND_EMIT process_for_unwind_directive
419 struct gcc_target targetm = TARGET_INITIALIZER;
421 typedef enum
423 ADDR_AREA_NORMAL, /* normal address area */
424 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
426 ia64_addr_area;
428 static GTY(()) tree small_ident1;
429 static GTY(()) tree small_ident2;
431 static void
432 init_idents (void)
434 if (small_ident1 == 0)
436 small_ident1 = get_identifier ("small");
437 small_ident2 = get_identifier ("__small__");
441 /* Retrieve the address area that has been chosen for the given decl. */
443 static ia64_addr_area
444 ia64_get_addr_area (tree decl)
446 tree model_attr;
448 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
449 if (model_attr)
451 tree id;
453 init_idents ();
454 id = TREE_VALUE (TREE_VALUE (model_attr));
455 if (id == small_ident1 || id == small_ident2)
456 return ADDR_AREA_SMALL;
458 return ADDR_AREA_NORMAL;
461 static tree
462 ia64_handle_model_attribute (tree *node, tree name, tree args, int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
464 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
465 ia64_addr_area area;
466 tree arg, decl = *node;
468 init_idents ();
469 arg = TREE_VALUE (args);
470 if (arg == small_ident1 || arg == small_ident2)
472 addr_area = ADDR_AREA_SMALL;
474 else
476 warning ("invalid argument of `%s' attribute",
477 IDENTIFIER_POINTER (name));
478 *no_add_attrs = true;
481 switch (TREE_CODE (decl))
483 case VAR_DECL:
484 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
485 == FUNCTION_DECL)
486 && !TREE_STATIC (decl))
488 error ("%Jan address area attribute cannot be specified for "
489 "local variables", decl, decl);
490 *no_add_attrs = true;
492 area = ia64_get_addr_area (decl);
493 if (area != ADDR_AREA_NORMAL && addr_area != area)
495 error ("%Jaddress area of '%s' conflicts with previous "
496 "declaration", decl, decl);
497 *no_add_attrs = true;
499 break;
501 case FUNCTION_DECL:
502 error ("%Jaddress area attribute cannot be specified for functions",
503 decl, decl);
504 *no_add_attrs = true;
505 break;
507 default:
508 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
509 *no_add_attrs = true;
510 break;
513 return NULL_TREE;
516 static void
517 ia64_encode_addr_area (tree decl, rtx symbol)
519 int flags;
521 flags = SYMBOL_REF_FLAGS (symbol);
522 switch (ia64_get_addr_area (decl))
524 case ADDR_AREA_NORMAL: break;
525 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
526 default: abort ();
528 SYMBOL_REF_FLAGS (symbol) = flags;
531 static void
532 ia64_encode_section_info (tree decl, rtx rtl, int first)
534 default_encode_section_info (decl, rtl, first);
536 /* Careful not to prod global register variables. */
537 if (TREE_CODE (decl) == VAR_DECL
538 && GET_CODE (DECL_RTL (decl)) == MEM
539 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
540 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
541 ia64_encode_addr_area (decl, XEXP (rtl, 0));
544 /* Return 1 if the operands of a move are ok. */
547 ia64_move_ok (rtx dst, rtx src)
549 /* If we're under init_recog_no_volatile, we'll not be able to use
550 memory_operand. So check the code directly and don't worry about
551 the validity of the underlying address, which should have been
552 checked elsewhere anyway. */
553 if (GET_CODE (dst) != MEM)
554 return 1;
555 if (GET_CODE (src) == MEM)
556 return 0;
557 if (register_operand (src, VOIDmode))
558 return 1;
560 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
561 if (INTEGRAL_MODE_P (GET_MODE (dst)))
562 return src == const0_rtx;
563 else
564 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
568 addp4_optimize_ok (rtx op1, rtx op2)
570 return (basereg_operand (op1, GET_MODE(op1)) !=
571 basereg_operand (op2, GET_MODE(op2)));
574 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
575 Return the length of the field, or <= 0 on failure. */
578 ia64_depz_field_mask (rtx rop, rtx rshift)
580 unsigned HOST_WIDE_INT op = INTVAL (rop);
581 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
583 /* Get rid of the zero bits we're shifting in. */
584 op >>= shift;
586 /* We must now have a solid block of 1's at bit 0. */
587 return exact_log2 (op + 1);
590 /* Expand a symbolic constant load. */
592 void
593 ia64_expand_load_address (rtx dest, rtx src)
595 if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (src))
596 abort ();
597 if (GET_CODE (dest) != REG)
598 abort ();
600 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
601 having to pointer-extend the value afterward. Other forms of address
602 computation below are also more natural to compute as 64-bit quantities.
603 If we've been given an SImode destination register, change it. */
604 if (GET_MODE (dest) != Pmode)
605 dest = gen_rtx_REG (Pmode, REGNO (dest));
607 if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_SMALL_ADDR_P (src))
609 emit_insn (gen_rtx_SET (VOIDmode, dest, src));
610 return;
612 else if (TARGET_AUTO_PIC)
614 emit_insn (gen_load_gprel64 (dest, src));
615 return;
617 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
619 emit_insn (gen_load_fptr (dest, src));
620 return;
622 else if (sdata_symbolic_operand (src, VOIDmode))
624 emit_insn (gen_load_gprel (dest, src));
625 return;
628 if (GET_CODE (src) == CONST
629 && GET_CODE (XEXP (src, 0)) == PLUS
630 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
631 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x3fff) != 0)
633 rtx sym = XEXP (XEXP (src, 0), 0);
634 HOST_WIDE_INT ofs, hi, lo;
636 /* Split the offset into a sign extended 14-bit low part
637 and a complementary high part. */
638 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
639 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
640 hi = ofs - lo;
642 ia64_expand_load_address (dest, plus_constant (sym, hi));
643 emit_insn (gen_adddi3 (dest, dest, GEN_INT (lo)));
645 else
647 rtx tmp;
649 tmp = gen_rtx_HIGH (Pmode, src);
650 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
651 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
653 tmp = gen_rtx_LO_SUM (GET_MODE (dest), dest, src);
654 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
658 static GTY(()) rtx gen_tls_tga;
659 static rtx
660 gen_tls_get_addr (void)
662 if (!gen_tls_tga)
663 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
664 return gen_tls_tga;
667 static GTY(()) rtx thread_pointer_rtx;
668 static rtx
669 gen_thread_pointer (void)
671 if (!thread_pointer_rtx)
672 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
673 return thread_pointer_rtx;
676 static rtx
677 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1)
679 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
680 rtx orig_op0 = op0;
682 switch (tls_kind)
684 case TLS_MODEL_GLOBAL_DYNAMIC:
685 start_sequence ();
687 tga_op1 = gen_reg_rtx (Pmode);
688 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
689 tga_op1 = gen_const_mem (Pmode, tga_op1);
691 tga_op2 = gen_reg_rtx (Pmode);
692 emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
693 tga_op2 = gen_const_mem (Pmode, tga_op2);
695 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
696 LCT_CONST, Pmode, 2, tga_op1,
697 Pmode, tga_op2, Pmode);
699 insns = get_insns ();
700 end_sequence ();
702 if (GET_MODE (op0) != Pmode)
703 op0 = tga_ret;
704 emit_libcall_block (insns, op0, tga_ret, op1);
705 break;
707 case TLS_MODEL_LOCAL_DYNAMIC:
708 /* ??? This isn't the completely proper way to do local-dynamic
709 If the call to __tls_get_addr is used only by a single symbol,
710 then we should (somehow) move the dtprel to the second arg
711 to avoid the extra add. */
712 start_sequence ();
714 tga_op1 = gen_reg_rtx (Pmode);
715 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
716 tga_op1 = gen_const_mem (Pmode, tga_op1);
718 tga_op2 = const0_rtx;
720 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
721 LCT_CONST, Pmode, 2, tga_op1,
722 Pmode, tga_op2, Pmode);
724 insns = get_insns ();
725 end_sequence ();
727 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
728 UNSPEC_LD_BASE);
729 tmp = gen_reg_rtx (Pmode);
730 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
732 if (!register_operand (op0, Pmode))
733 op0 = gen_reg_rtx (Pmode);
734 if (TARGET_TLS64)
736 emit_insn (gen_load_dtprel (op0, op1));
737 emit_insn (gen_adddi3 (op0, tmp, op0));
739 else
740 emit_insn (gen_add_dtprel (op0, tmp, op1));
741 break;
743 case TLS_MODEL_INITIAL_EXEC:
744 tmp = gen_reg_rtx (Pmode);
745 emit_insn (gen_load_ltoff_tprel (tmp, op1));
746 tmp = gen_const_mem (Pmode, tmp);
747 tmp = force_reg (Pmode, tmp);
749 if (!register_operand (op0, Pmode))
750 op0 = gen_reg_rtx (Pmode);
751 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
752 break;
754 case TLS_MODEL_LOCAL_EXEC:
755 if (!register_operand (op0, Pmode))
756 op0 = gen_reg_rtx (Pmode);
757 if (TARGET_TLS64)
759 emit_insn (gen_load_tprel (op0, op1));
760 emit_insn (gen_adddi3 (op0, gen_thread_pointer (), op0));
762 else
763 emit_insn (gen_add_tprel (op0, gen_thread_pointer (), op1));
764 break;
766 default:
767 abort ();
770 if (orig_op0 == op0)
771 return NULL_RTX;
772 if (GET_MODE (orig_op0) == Pmode)
773 return op0;
774 return gen_lowpart (GET_MODE (orig_op0), op0);
778 ia64_expand_move (rtx op0, rtx op1)
780 enum machine_mode mode = GET_MODE (op0);
782 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
783 op1 = force_reg (mode, op1);
785 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
787 enum tls_model tls_kind;
788 if (GET_CODE (op1) == SYMBOL_REF
789 && (tls_kind = SYMBOL_REF_TLS_MODEL (op1)))
790 return ia64_expand_tls_address (tls_kind, op0, op1);
792 if (!TARGET_NO_PIC && reload_completed)
794 ia64_expand_load_address (op0, op1);
795 return NULL_RTX;
799 return op1;
802 /* Split a move from OP1 to OP0 conditional on COND. */
804 void
805 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
807 rtx insn, first = get_last_insn ();
809 emit_move_insn (op0, op1);
811 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
812 if (INSN_P (insn))
813 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
814 PATTERN (insn));
817 /* Split a post-reload TImode or TFmode reference into two DImode
818 components. This is made extra difficult by the fact that we do
819 not get any scratch registers to work with, because reload cannot
820 be prevented from giving us a scratch that overlaps the register
821 pair involved. So instead, when addressing memory, we tweak the
822 pointer register up and back down with POST_INCs. Or up and not
823 back down when we can get away with it.
825 REVERSED is true when the loads must be done in reversed order
826 (high word first) for correctness. DEAD is true when the pointer
827 dies with the second insn we generate and therefore the second
828 address must not carry a postmodify.
830 May return an insn which is to be emitted after the moves. */
832 static rtx
833 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
835 rtx fixup = 0;
837 switch (GET_CODE (in))
839 case REG:
840 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
841 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
842 break;
844 case CONST_INT:
845 case CONST_DOUBLE:
846 /* Cannot occur reversed. */
847 if (reversed) abort ();
849 if (GET_MODE (in) != TFmode)
850 split_double (in, &out[0], &out[1]);
851 else
852 /* split_double does not understand how to split a TFmode
853 quantity into a pair of DImode constants. */
855 REAL_VALUE_TYPE r;
856 unsigned HOST_WIDE_INT p[2];
857 long l[4]; /* TFmode is 128 bits */
859 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
860 real_to_target (l, &r, TFmode);
862 if (FLOAT_WORDS_BIG_ENDIAN)
864 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
865 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
867 else
869 p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
870 p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
872 out[0] = GEN_INT (p[0]);
873 out[1] = GEN_INT (p[1]);
875 break;
877 case MEM:
879 rtx base = XEXP (in, 0);
880 rtx offset;
882 switch (GET_CODE (base))
884 case REG:
885 if (!reversed)
887 out[0] = adjust_automodify_address
888 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
889 out[1] = adjust_automodify_address
890 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
892 else
894 /* Reversal requires a pre-increment, which can only
895 be done as a separate insn. */
896 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
897 out[0] = adjust_automodify_address
898 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
899 out[1] = adjust_address (in, DImode, 0);
901 break;
903 case POST_INC:
904 if (reversed || dead) abort ();
905 /* Just do the increment in two steps. */
906 out[0] = adjust_automodify_address (in, DImode, 0, 0);
907 out[1] = adjust_automodify_address (in, DImode, 0, 8);
908 break;
910 case POST_DEC:
911 if (reversed || dead) abort ();
912 /* Add 8, subtract 24. */
913 base = XEXP (base, 0);
914 out[0] = adjust_automodify_address
915 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
916 out[1] = adjust_automodify_address
917 (in, DImode,
918 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
920 break;
922 case POST_MODIFY:
923 if (reversed || dead) abort ();
924 /* Extract and adjust the modification. This case is
925 trickier than the others, because we might have an
926 index register, or we might have a combined offset that
927 doesn't fit a signed 9-bit displacement field. We can
928 assume the incoming expression is already legitimate. */
929 offset = XEXP (base, 1);
930 base = XEXP (base, 0);
932 out[0] = adjust_automodify_address
933 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
935 if (GET_CODE (XEXP (offset, 1)) == REG)
937 /* Can't adjust the postmodify to match. Emit the
938 original, then a separate addition insn. */
939 out[1] = adjust_automodify_address (in, DImode, 0, 8);
940 fixup = gen_adddi3 (base, base, GEN_INT (-8));
942 else if (GET_CODE (XEXP (offset, 1)) != CONST_INT)
943 abort ();
944 else if (INTVAL (XEXP (offset, 1)) < -256 + 8)
946 /* Again the postmodify cannot be made to match, but
947 in this case it's more efficient to get rid of the
948 postmodify entirely and fix up with an add insn. */
949 out[1] = adjust_automodify_address (in, DImode, base, 8);
950 fixup = gen_adddi3 (base, base,
951 GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
953 else
955 /* Combined offset still fits in the displacement field.
956 (We cannot overflow it at the high end.) */
957 out[1] = adjust_automodify_address
958 (in, DImode,
959 gen_rtx_POST_MODIFY (Pmode, base,
960 gen_rtx_PLUS (Pmode, base,
961 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
964 break;
966 default:
967 abort ();
969 break;
972 default:
973 abort ();
976 return fixup;
979 /* Split a TImode or TFmode move instruction after reload.
980 This is used by *movtf_internal and *movti_internal. */
981 void
982 ia64_split_tmode_move (rtx operands[])
984 rtx in[2], out[2], insn;
985 rtx fixup[2];
986 bool dead = false;
987 bool reversed = false;
989 /* It is possible for reload to decide to overwrite a pointer with
990 the value it points to. In that case we have to do the loads in
991 the appropriate order so that the pointer is not destroyed too
992 early. Also we must not generate a postmodify for that second
993 load, or rws_access_regno will abort. */
994 if (GET_CODE (operands[1]) == MEM
995 && reg_overlap_mentioned_p (operands[0], operands[1]))
997 rtx base = XEXP (operands[1], 0);
998 while (GET_CODE (base) != REG)
999 base = XEXP (base, 0);
1001 if (REGNO (base) == REGNO (operands[0]))
1002 reversed = true;
1003 dead = true;
1005 /* Another reason to do the moves in reversed order is if the first
1006 element of the target register pair is also the second element of
1007 the source register pair. */
1008 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1009 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1010 reversed = true;
1012 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1013 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1015 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1016 if (GET_CODE (EXP) == MEM \
1017 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1018 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1019 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1020 REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
1021 XEXP (XEXP (EXP, 0), 0), \
1022 REG_NOTES (INSN))
1024 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1025 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1026 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1028 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1029 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1030 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1032 if (fixup[0])
1033 emit_insn (fixup[0]);
1034 if (fixup[1])
1035 emit_insn (fixup[1]);
1037 #undef MAYBE_ADD_REG_INC_NOTE
1040 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1041 through memory plus an extra GR scratch register. Except that you can
1042 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1043 SECONDARY_RELOAD_CLASS, but not both.
1045 We got into problems in the first place by allowing a construct like
1046 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1047 This solution attempts to prevent this situation from occurring. When
1048 we see something like the above, we spill the inner register to memory. */
1051 spill_xfmode_operand (rtx in, int force)
1053 if (GET_CODE (in) == SUBREG
1054 && GET_MODE (SUBREG_REG (in)) == TImode
1055 && GET_CODE (SUBREG_REG (in)) == REG)
1057 rtx memt = assign_stack_temp (TImode, 16, 0);
1058 emit_move_insn (memt, SUBREG_REG (in));
1059 return adjust_address (memt, XFmode, 0);
1061 else if (force && GET_CODE (in) == REG)
1063 rtx memx = assign_stack_temp (XFmode, 16, 0);
1064 emit_move_insn (memx, in);
1065 return memx;
1067 else
1068 return in;
1071 /* Emit comparison instruction if necessary, returning the expression
1072 that holds the compare result in the proper mode. */
1074 static GTY(()) rtx cmptf_libfunc;
1077 ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1079 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1080 rtx cmp;
1082 /* If we have a BImode input, then we already have a compare result, and
1083 do not need to emit another comparison. */
1084 if (GET_MODE (op0) == BImode)
1086 if ((code == NE || code == EQ) && op1 == const0_rtx)
1087 cmp = op0;
1088 else
1089 abort ();
1091 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1092 magic number as its third argument, that indicates what to do.
1093 The return value is an integer to be compared against zero. */
1094 else if (GET_MODE (op0) == TFmode)
1096 enum qfcmp_magic {
1097 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1098 QCMP_UNORD = 2,
1099 QCMP_EQ = 4,
1100 QCMP_LT = 8,
1101 QCMP_GT = 16
1102 } magic;
1103 enum rtx_code ncode;
1104 rtx ret, insns;
1105 if (!cmptf_libfunc || GET_MODE (op1) != TFmode)
1106 abort ();
1107 switch (code)
1109 /* 1 = equal, 0 = not equal. Equality operators do
1110 not raise FP_INVALID when given an SNaN operand. */
1111 case EQ: magic = QCMP_EQ; ncode = NE; break;
1112 case NE: magic = QCMP_EQ; ncode = EQ; break;
1113 /* isunordered() from C99. */
1114 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1115 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1116 /* Relational operators raise FP_INVALID when given
1117 an SNaN operand. */
1118 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1119 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1120 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1121 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1122 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1123 Expanders for buneq etc. weuld have to be added to ia64.md
1124 for this to be useful. */
1125 default: abort ();
1128 start_sequence ();
1130 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1131 op0, TFmode, op1, TFmode,
1132 GEN_INT (magic), DImode);
1133 cmp = gen_reg_rtx (BImode);
1134 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1135 gen_rtx_fmt_ee (ncode, BImode,
1136 ret, const0_rtx)));
1138 insns = get_insns ();
1139 end_sequence ();
1141 emit_libcall_block (insns, cmp, cmp,
1142 gen_rtx_fmt_ee (code, BImode, op0, op1));
1143 code = NE;
1145 else
1147 cmp = gen_reg_rtx (BImode);
1148 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1149 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1150 code = NE;
1153 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1156 /* Emit the appropriate sequence for a call. */
1158 void
1159 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1160 int sibcall_p)
1162 rtx insn, b0;
1164 addr = XEXP (addr, 0);
1165 addr = convert_memory_address (DImode, addr);
1166 b0 = gen_rtx_REG (DImode, R_BR (0));
1168 /* ??? Should do this for functions known to bind local too. */
1169 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1171 if (sibcall_p)
1172 insn = gen_sibcall_nogp (addr);
1173 else if (! retval)
1174 insn = gen_call_nogp (addr, b0);
1175 else
1176 insn = gen_call_value_nogp (retval, addr, b0);
1177 insn = emit_call_insn (insn);
1179 else
1181 if (sibcall_p)
1182 insn = gen_sibcall_gp (addr);
1183 else if (! retval)
1184 insn = gen_call_gp (addr, b0);
1185 else
1186 insn = gen_call_value_gp (retval, addr, b0);
1187 insn = emit_call_insn (insn);
1189 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1192 if (sibcall_p)
1193 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1196 void
1197 ia64_reload_gp (void)
1199 rtx tmp;
1201 if (current_frame_info.reg_save_gp)
1202 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1203 else
1205 HOST_WIDE_INT offset;
1207 offset = (current_frame_info.spill_cfa_off
1208 + current_frame_info.spill_size);
1209 if (frame_pointer_needed)
1211 tmp = hard_frame_pointer_rtx;
1212 offset = -offset;
1214 else
1216 tmp = stack_pointer_rtx;
1217 offset = current_frame_info.total_size - offset;
1220 if (CONST_OK_FOR_I (offset))
1221 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1222 tmp, GEN_INT (offset)));
1223 else
1225 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1226 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1227 pic_offset_table_rtx, tmp));
1230 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1233 emit_move_insn (pic_offset_table_rtx, tmp);
1236 void
1237 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1238 rtx scratch_b, int noreturn_p, int sibcall_p)
1240 rtx insn;
1241 bool is_desc = false;
1243 /* If we find we're calling through a register, then we're actually
1244 calling through a descriptor, so load up the values. */
1245 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1247 rtx tmp;
1248 bool addr_dead_p;
1250 /* ??? We are currently constrained to *not* use peep2, because
1251 we can legitimately change the global lifetime of the GP
1252 (in the form of killing where previously live). This is
1253 because a call through a descriptor doesn't use the previous
1254 value of the GP, while a direct call does, and we do not
1255 commit to either form until the split here.
1257 That said, this means that we lack precise life info for
1258 whether ADDR is dead after this call. This is not terribly
1259 important, since we can fix things up essentially for free
1260 with the POST_DEC below, but it's nice to not use it when we
1261 can immediately tell it's not necessary. */
1262 addr_dead_p = ((noreturn_p || sibcall_p
1263 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1264 REGNO (addr)))
1265 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1267 /* Load the code address into scratch_b. */
1268 tmp = gen_rtx_POST_INC (Pmode, addr);
1269 tmp = gen_rtx_MEM (Pmode, tmp);
1270 emit_move_insn (scratch_r, tmp);
1271 emit_move_insn (scratch_b, scratch_r);
1273 /* Load the GP address. If ADDR is not dead here, then we must
1274 revert the change made above via the POST_INCREMENT. */
1275 if (!addr_dead_p)
1276 tmp = gen_rtx_POST_DEC (Pmode, addr);
1277 else
1278 tmp = addr;
1279 tmp = gen_rtx_MEM (Pmode, tmp);
1280 emit_move_insn (pic_offset_table_rtx, tmp);
1282 is_desc = true;
1283 addr = scratch_b;
1286 if (sibcall_p)
1287 insn = gen_sibcall_nogp (addr);
1288 else if (retval)
1289 insn = gen_call_value_nogp (retval, addr, retaddr);
1290 else
1291 insn = gen_call_nogp (addr, retaddr);
1292 emit_call_insn (insn);
1294 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1295 ia64_reload_gp ();
1298 /* Begin the assembly file. */
1300 static void
1301 ia64_file_start (void)
1303 default_file_start ();
1304 emit_safe_across_calls ();
1307 void
1308 emit_safe_across_calls (void)
1310 unsigned int rs, re;
1311 int out_state;
1313 rs = 1;
1314 out_state = 0;
1315 while (1)
1317 while (rs < 64 && call_used_regs[PR_REG (rs)])
1318 rs++;
1319 if (rs >= 64)
1320 break;
1321 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1322 continue;
1323 if (out_state == 0)
1325 fputs ("\t.pred.safe_across_calls ", asm_out_file);
1326 out_state = 1;
1328 else
1329 fputc (',', asm_out_file);
1330 if (re == rs + 1)
1331 fprintf (asm_out_file, "p%u", rs);
1332 else
1333 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
1334 rs = re + 1;
1336 if (out_state)
1337 fputc ('\n', asm_out_file);
1340 /* Helper function for ia64_compute_frame_size: find an appropriate general
1341 register to spill some special register to. SPECIAL_SPILL_MASK contains
1342 bits in GR0 to GR31 that have already been allocated by this routine.
1343 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1345 static int
1346 find_gr_spill (int try_locals)
1348 int regno;
1350 /* If this is a leaf function, first try an otherwise unused
1351 call-clobbered register. */
1352 if (current_function_is_leaf)
1354 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1355 if (! regs_ever_live[regno]
1356 && call_used_regs[regno]
1357 && ! fixed_regs[regno]
1358 && ! global_regs[regno]
1359 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1361 current_frame_info.gr_used_mask |= 1 << regno;
1362 return regno;
1366 if (try_locals)
1368 regno = current_frame_info.n_local_regs;
1369 /* If there is a frame pointer, then we can't use loc79, because
1370 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1371 reg_name switching code in ia64_expand_prologue. */
1372 if (regno < (80 - frame_pointer_needed))
1374 current_frame_info.n_local_regs = regno + 1;
1375 return LOC_REG (0) + regno;
1379 /* Failed to find a general register to spill to. Must use stack. */
1380 return 0;
1383 /* In order to make for nice schedules, we try to allocate every temporary
1384 to a different register. We must of course stay away from call-saved,
1385 fixed, and global registers. We must also stay away from registers
1386 allocated in current_frame_info.gr_used_mask, since those include regs
1387 used all through the prologue.
1389 Any register allocated here must be used immediately. The idea is to
1390 aid scheduling, not to solve data flow problems. */
1392 static int last_scratch_gr_reg;
1394 static int
1395 next_scratch_gr_reg (void)
1397 int i, regno;
1399 for (i = 0; i < 32; ++i)
1401 regno = (last_scratch_gr_reg + i + 1) & 31;
1402 if (call_used_regs[regno]
1403 && ! fixed_regs[regno]
1404 && ! global_regs[regno]
1405 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1407 last_scratch_gr_reg = regno;
1408 return regno;
1412 /* There must be _something_ available. */
1413 abort ();
1416 /* Helper function for ia64_compute_frame_size, called through
1417 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1419 static void
1420 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
1422 unsigned int regno = REGNO (reg);
1423 if (regno < 32)
1425 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1426 for (i = 0; i < n; ++i)
1427 current_frame_info.gr_used_mask |= 1 << (regno + i);
1431 /* Returns the number of bytes offset between the frame pointer and the stack
1432 pointer for the current function. SIZE is the number of bytes of space
1433 needed for local variables. */
1435 static void
1436 ia64_compute_frame_size (HOST_WIDE_INT size)
1438 HOST_WIDE_INT total_size;
1439 HOST_WIDE_INT spill_size = 0;
1440 HOST_WIDE_INT extra_spill_size = 0;
1441 HOST_WIDE_INT pretend_args_size;
1442 HARD_REG_SET mask;
1443 int n_spilled = 0;
1444 int spilled_gr_p = 0;
1445 int spilled_fr_p = 0;
1446 unsigned int regno;
1447 int i;
1449 if (current_frame_info.initialized)
1450 return;
1452 memset (&current_frame_info, 0, sizeof current_frame_info);
1453 CLEAR_HARD_REG_SET (mask);
1455 /* Don't allocate scratches to the return register. */
1456 diddle_return_value (mark_reg_gr_used_mask, NULL);
1458 /* Don't allocate scratches to the EH scratch registers. */
1459 if (cfun->machine->ia64_eh_epilogue_sp)
1460 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1461 if (cfun->machine->ia64_eh_epilogue_bsp)
1462 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1464 /* Find the size of the register stack frame. We have only 80 local
1465 registers, because we reserve 8 for the inputs and 8 for the
1466 outputs. */
1468 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1469 since we'll be adjusting that down later. */
1470 regno = LOC_REG (78) + ! frame_pointer_needed;
1471 for (; regno >= LOC_REG (0); regno--)
1472 if (regs_ever_live[regno])
1473 break;
1474 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1476 /* For functions marked with the syscall_linkage attribute, we must mark
1477 all eight input registers as in use, so that locals aren't visible to
1478 the caller. */
1480 if (cfun->machine->n_varargs > 0
1481 || lookup_attribute ("syscall_linkage",
1482 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1483 current_frame_info.n_input_regs = 8;
1484 else
1486 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1487 if (regs_ever_live[regno])
1488 break;
1489 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1492 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1493 if (regs_ever_live[regno])
1494 break;
1495 i = regno - OUT_REG (0) + 1;
1497 /* When -p profiling, we need one output register for the mcount argument.
1498 Likewise for -a profiling for the bb_init_func argument. For -ax
1499 profiling, we need two output registers for the two bb_init_trace_func
1500 arguments. */
1501 if (current_function_profile)
1502 i = MAX (i, 1);
1503 current_frame_info.n_output_regs = i;
1505 /* ??? No rotating register support yet. */
1506 current_frame_info.n_rotate_regs = 0;
1508 /* Discover which registers need spilling, and how much room that
1509 will take. Begin with floating point and general registers,
1510 which will always wind up on the stack. */
1512 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1513 if (regs_ever_live[regno] && ! call_used_regs[regno])
1515 SET_HARD_REG_BIT (mask, regno);
1516 spill_size += 16;
1517 n_spilled += 1;
1518 spilled_fr_p = 1;
1521 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1522 if (regs_ever_live[regno] && ! call_used_regs[regno])
1524 SET_HARD_REG_BIT (mask, regno);
1525 spill_size += 8;
1526 n_spilled += 1;
1527 spilled_gr_p = 1;
1530 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1531 if (regs_ever_live[regno] && ! call_used_regs[regno])
1533 SET_HARD_REG_BIT (mask, regno);
1534 spill_size += 8;
1535 n_spilled += 1;
1538 /* Now come all special registers that might get saved in other
1539 general registers. */
1541 if (frame_pointer_needed)
1543 current_frame_info.reg_fp = find_gr_spill (1);
1544 /* If we did not get a register, then we take LOC79. This is guaranteed
1545 to be free, even if regs_ever_live is already set, because this is
1546 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1547 as we don't count loc79 above. */
1548 if (current_frame_info.reg_fp == 0)
1550 current_frame_info.reg_fp = LOC_REG (79);
1551 current_frame_info.n_local_regs++;
1555 if (! current_function_is_leaf)
1557 /* Emit a save of BR0 if we call other functions. Do this even
1558 if this function doesn't return, as EH depends on this to be
1559 able to unwind the stack. */
1560 SET_HARD_REG_BIT (mask, BR_REG (0));
1562 current_frame_info.reg_save_b0 = find_gr_spill (1);
1563 if (current_frame_info.reg_save_b0 == 0)
1565 spill_size += 8;
1566 n_spilled += 1;
1569 /* Similarly for ar.pfs. */
1570 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1571 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1572 if (current_frame_info.reg_save_ar_pfs == 0)
1574 extra_spill_size += 8;
1575 n_spilled += 1;
1578 /* Similarly for gp. Note that if we're calling setjmp, the stacked
1579 registers are clobbered, so we fall back to the stack. */
1580 current_frame_info.reg_save_gp
1581 = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
1582 if (current_frame_info.reg_save_gp == 0)
1584 SET_HARD_REG_BIT (mask, GR_REG (1));
1585 spill_size += 8;
1586 n_spilled += 1;
1589 else
1591 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1593 SET_HARD_REG_BIT (mask, BR_REG (0));
1594 spill_size += 8;
1595 n_spilled += 1;
1598 if (regs_ever_live[AR_PFS_REGNUM])
1600 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1601 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1602 if (current_frame_info.reg_save_ar_pfs == 0)
1604 extra_spill_size += 8;
1605 n_spilled += 1;
1610 /* Unwind descriptor hackery: things are most efficient if we allocate
1611 consecutive GR save registers for RP, PFS, FP in that order. However,
1612 it is absolutely critical that FP get the only hard register that's
1613 guaranteed to be free, so we allocated it first. If all three did
1614 happen to be allocated hard regs, and are consecutive, rearrange them
1615 into the preferred order now. */
1616 if (current_frame_info.reg_fp != 0
1617 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1618 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1620 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1621 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1622 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1625 /* See if we need to store the predicate register block. */
1626 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1627 if (regs_ever_live[regno] && ! call_used_regs[regno])
1628 break;
1629 if (regno <= PR_REG (63))
1631 SET_HARD_REG_BIT (mask, PR_REG (0));
1632 current_frame_info.reg_save_pr = find_gr_spill (1);
1633 if (current_frame_info.reg_save_pr == 0)
1635 extra_spill_size += 8;
1636 n_spilled += 1;
1639 /* ??? Mark them all as used so that register renaming and such
1640 are free to use them. */
1641 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1642 regs_ever_live[regno] = 1;
1645 /* If we're forced to use st8.spill, we're forced to save and restore
1646 ar.unat as well. The check for existing liveness allows inline asm
1647 to touch ar.unat. */
1648 if (spilled_gr_p || cfun->machine->n_varargs
1649 || regs_ever_live[AR_UNAT_REGNUM])
1651 regs_ever_live[AR_UNAT_REGNUM] = 1;
1652 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1653 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1654 if (current_frame_info.reg_save_ar_unat == 0)
1656 extra_spill_size += 8;
1657 n_spilled += 1;
1661 if (regs_ever_live[AR_LC_REGNUM])
1663 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1664 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1665 if (current_frame_info.reg_save_ar_lc == 0)
1667 extra_spill_size += 8;
1668 n_spilled += 1;
1672 /* If we have an odd number of words of pretend arguments written to
1673 the stack, then the FR save area will be unaligned. We round the
1674 size of this area up to keep things 16 byte aligned. */
1675 if (spilled_fr_p)
1676 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1677 else
1678 pretend_args_size = current_function_pretend_args_size;
1680 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1681 + current_function_outgoing_args_size);
1682 total_size = IA64_STACK_ALIGN (total_size);
1684 /* We always use the 16-byte scratch area provided by the caller, but
1685 if we are a leaf function, there's no one to which we need to provide
1686 a scratch area. */
1687 if (current_function_is_leaf)
1688 total_size = MAX (0, total_size - 16);
1690 current_frame_info.total_size = total_size;
1691 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1692 current_frame_info.spill_size = spill_size;
1693 current_frame_info.extra_spill_size = extra_spill_size;
1694 COPY_HARD_REG_SET (current_frame_info.mask, mask);
1695 current_frame_info.n_spilled = n_spilled;
1696 current_frame_info.initialized = reload_completed;
1699 /* Compute the initial difference between the specified pair of registers. */
1701 HOST_WIDE_INT
1702 ia64_initial_elimination_offset (int from, int to)
1704 HOST_WIDE_INT offset;
1706 ia64_compute_frame_size (get_frame_size ());
1707 switch (from)
1709 case FRAME_POINTER_REGNUM:
1710 if (to == HARD_FRAME_POINTER_REGNUM)
1712 if (current_function_is_leaf)
1713 offset = -current_frame_info.total_size;
1714 else
1715 offset = -(current_frame_info.total_size
1716 - current_function_outgoing_args_size - 16);
1718 else if (to == STACK_POINTER_REGNUM)
1720 if (current_function_is_leaf)
1721 offset = 0;
1722 else
1723 offset = 16 + current_function_outgoing_args_size;
1725 else
1726 abort ();
1727 break;
1729 case ARG_POINTER_REGNUM:
1730 /* Arguments start above the 16 byte save area, unless stdarg
1731 in which case we store through the 16 byte save area. */
1732 if (to == HARD_FRAME_POINTER_REGNUM)
1733 offset = 16 - current_function_pretend_args_size;
1734 else if (to == STACK_POINTER_REGNUM)
1735 offset = (current_frame_info.total_size
1736 + 16 - current_function_pretend_args_size);
1737 else
1738 abort ();
1739 break;
1741 default:
1742 abort ();
1745 return offset;
1748 /* If there are more than a trivial number of register spills, we use
1749 two interleaved iterators so that we can get two memory references
1750 per insn group.
1752 In order to simplify things in the prologue and epilogue expanders,
1753 we use helper functions to fix up the memory references after the
1754 fact with the appropriate offsets to a POST_MODIFY memory mode.
1755 The following data structure tracks the state of the two iterators
1756 while insns are being emitted. */
1758 struct spill_fill_data
1760 rtx init_after; /* point at which to emit initializations */
1761 rtx init_reg[2]; /* initial base register */
1762 rtx iter_reg[2]; /* the iterator registers */
1763 rtx *prev_addr[2]; /* address of last memory use */
1764 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
1765 HOST_WIDE_INT prev_off[2]; /* last offset */
1766 int n_iter; /* number of iterators in use */
1767 int next_iter; /* next iterator to use */
1768 unsigned int save_gr_used_mask;
1771 static struct spill_fill_data spill_fill_data;
1773 static void
1774 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
1776 int i;
1778 spill_fill_data.init_after = get_last_insn ();
1779 spill_fill_data.init_reg[0] = init_reg;
1780 spill_fill_data.init_reg[1] = init_reg;
1781 spill_fill_data.prev_addr[0] = NULL;
1782 spill_fill_data.prev_addr[1] = NULL;
1783 spill_fill_data.prev_insn[0] = NULL;
1784 spill_fill_data.prev_insn[1] = NULL;
1785 spill_fill_data.prev_off[0] = cfa_off;
1786 spill_fill_data.prev_off[1] = cfa_off;
1787 spill_fill_data.next_iter = 0;
1788 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1790 spill_fill_data.n_iter = 1 + (n_spills > 2);
1791 for (i = 0; i < spill_fill_data.n_iter; ++i)
1793 int regno = next_scratch_gr_reg ();
1794 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1795 current_frame_info.gr_used_mask |= 1 << regno;
1799 static void
1800 finish_spill_pointers (void)
1802 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1805 static rtx
1806 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
1808 int iter = spill_fill_data.next_iter;
1809 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1810 rtx disp_rtx = GEN_INT (disp);
1811 rtx mem;
1813 if (spill_fill_data.prev_addr[iter])
1815 if (CONST_OK_FOR_N (disp))
1817 *spill_fill_data.prev_addr[iter]
1818 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1819 gen_rtx_PLUS (DImode,
1820 spill_fill_data.iter_reg[iter],
1821 disp_rtx));
1822 REG_NOTES (spill_fill_data.prev_insn[iter])
1823 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
1824 REG_NOTES (spill_fill_data.prev_insn[iter]));
1826 else
1828 /* ??? Could use register post_modify for loads. */
1829 if (! CONST_OK_FOR_I (disp))
1831 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1832 emit_move_insn (tmp, disp_rtx);
1833 disp_rtx = tmp;
1835 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1836 spill_fill_data.iter_reg[iter], disp_rtx));
1839 /* Micro-optimization: if we've created a frame pointer, it's at
1840 CFA 0, which may allow the real iterator to be initialized lower,
1841 slightly increasing parallelism. Also, if there are few saves
1842 it may eliminate the iterator entirely. */
1843 else if (disp == 0
1844 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1845 && frame_pointer_needed)
1847 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1848 set_mem_alias_set (mem, get_varargs_alias_set ());
1849 return mem;
1851 else
1853 rtx seq, insn;
1855 if (disp == 0)
1856 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1857 spill_fill_data.init_reg[iter]);
1858 else
1860 start_sequence ();
1862 if (! CONST_OK_FOR_I (disp))
1864 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1865 emit_move_insn (tmp, disp_rtx);
1866 disp_rtx = tmp;
1869 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1870 spill_fill_data.init_reg[iter],
1871 disp_rtx));
1873 seq = get_insns ();
1874 end_sequence ();
1877 /* Careful for being the first insn in a sequence. */
1878 if (spill_fill_data.init_after)
1879 insn = emit_insn_after (seq, spill_fill_data.init_after);
1880 else
1882 rtx first = get_insns ();
1883 if (first)
1884 insn = emit_insn_before (seq, first);
1885 else
1886 insn = emit_insn (seq);
1888 spill_fill_data.init_after = insn;
1890 /* If DISP is 0, we may or may not have a further adjustment
1891 afterward. If we do, then the load/store insn may be modified
1892 to be a post-modify. If we don't, then this copy may be
1893 eliminated by copyprop_hardreg_forward, which makes this
1894 insn garbage, which runs afoul of the sanity check in
1895 propagate_one_insn. So mark this insn as legal to delete. */
1896 if (disp == 0)
1897 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1898 REG_NOTES (insn));
1901 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
1903 /* ??? Not all of the spills are for varargs, but some of them are.
1904 The rest of the spills belong in an alias set of their own. But
1905 it doesn't actually hurt to include them here. */
1906 set_mem_alias_set (mem, get_varargs_alias_set ());
1908 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1909 spill_fill_data.prev_off[iter] = cfa_off;
1911 if (++iter >= spill_fill_data.n_iter)
1912 iter = 0;
1913 spill_fill_data.next_iter = iter;
1915 return mem;
1918 static void
1919 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
1920 rtx frame_reg)
1922 int iter = spill_fill_data.next_iter;
1923 rtx mem, insn;
1925 mem = spill_restore_mem (reg, cfa_off);
1926 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
1927 spill_fill_data.prev_insn[iter] = insn;
1929 if (frame_reg)
1931 rtx base;
1932 HOST_WIDE_INT off;
1934 RTX_FRAME_RELATED_P (insn) = 1;
1936 /* Don't even pretend that the unwind code can intuit its way
1937 through a pair of interleaved post_modify iterators. Just
1938 provide the correct answer. */
1940 if (frame_pointer_needed)
1942 base = hard_frame_pointer_rtx;
1943 off = - cfa_off;
1945 else
1947 base = stack_pointer_rtx;
1948 off = current_frame_info.total_size - cfa_off;
1951 REG_NOTES (insn)
1952 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1953 gen_rtx_SET (VOIDmode,
1954 gen_rtx_MEM (GET_MODE (reg),
1955 plus_constant (base, off)),
1956 frame_reg),
1957 REG_NOTES (insn));
1961 static void
1962 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
1964 int iter = spill_fill_data.next_iter;
1965 rtx insn;
1967 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1968 GEN_INT (cfa_off)));
1969 spill_fill_data.prev_insn[iter] = insn;
1972 /* Wrapper functions that discards the CONST_INT spill offset. These
1973 exist so that we can give gr_spill/gr_fill the offset they need and
1974 use a consistent function interface. */
1976 static rtx
1977 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
1979 return gen_movdi (dest, src);
1982 static rtx
1983 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
1985 return gen_fr_spill (dest, src);
1988 static rtx
1989 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
1991 return gen_fr_restore (dest, src);
1994 /* Called after register allocation to add any instructions needed for the
1995 prologue. Using a prologue insn is favored compared to putting all of the
1996 instructions in output_function_prologue(), since it allows the scheduler
1997 to intermix instructions with the saves of the caller saved registers. In
1998 some cases, it might be necessary to emit a barrier instruction as the last
1999 insn to prevent such scheduling.
2001 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2002 so that the debug info generation code can handle them properly.
2004 The register save area is layed out like so:
2005 cfa+16
2006 [ varargs spill area ]
2007 [ fr register spill area ]
2008 [ br register spill area ]
2009 [ ar register spill area ]
2010 [ pr register spill area ]
2011 [ gr register spill area ] */
2013 /* ??? Get inefficient code when the frame size is larger than can fit in an
2014 adds instruction. */
2016 void
2017 ia64_expand_prologue (void)
2019 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2020 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2021 rtx reg, alt_reg;
2023 ia64_compute_frame_size (get_frame_size ());
2024 last_scratch_gr_reg = 15;
2026 /* If there is no epilogue, then we don't need some prologue insns.
2027 We need to avoid emitting the dead prologue insns, because flow
2028 will complain about them. */
2029 if (optimize)
2031 edge e;
2033 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
2034 if ((e->flags & EDGE_FAKE) == 0
2035 && (e->flags & EDGE_FALLTHRU) != 0)
2036 break;
2037 epilogue_p = (e != NULL);
2039 else
2040 epilogue_p = 1;
2042 /* Set the local, input, and output register names. We need to do this
2043 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2044 half. If we use in/loc/out register names, then we get assembler errors
2045 in crtn.S because there is no alloc insn or regstk directive in there. */
2046 if (! TARGET_REG_NAMES)
2048 int inputs = current_frame_info.n_input_regs;
2049 int locals = current_frame_info.n_local_regs;
2050 int outputs = current_frame_info.n_output_regs;
2052 for (i = 0; i < inputs; i++)
2053 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2054 for (i = 0; i < locals; i++)
2055 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2056 for (i = 0; i < outputs; i++)
2057 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2060 /* Set the frame pointer register name. The regnum is logically loc79,
2061 but of course we'll not have allocated that many locals. Rather than
2062 worrying about renumbering the existing rtxs, we adjust the name. */
2063 /* ??? This code means that we can never use one local register when
2064 there is a frame pointer. loc79 gets wasted in this case, as it is
2065 renamed to a register that will never be used. See also the try_locals
2066 code in find_gr_spill. */
2067 if (current_frame_info.reg_fp)
2069 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2070 reg_names[HARD_FRAME_POINTER_REGNUM]
2071 = reg_names[current_frame_info.reg_fp];
2072 reg_names[current_frame_info.reg_fp] = tmp;
2075 /* We don't need an alloc instruction if we've used no outputs or locals. */
2076 if (current_frame_info.n_local_regs == 0
2077 && current_frame_info.n_output_regs == 0
2078 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2079 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2081 /* If there is no alloc, but there are input registers used, then we
2082 need a .regstk directive. */
2083 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2084 ar_pfs_save_reg = NULL_RTX;
2086 else
2088 current_frame_info.need_regstk = 0;
2090 if (current_frame_info.reg_save_ar_pfs)
2091 regno = current_frame_info.reg_save_ar_pfs;
2092 else
2093 regno = next_scratch_gr_reg ();
2094 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2096 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2097 GEN_INT (current_frame_info.n_input_regs),
2098 GEN_INT (current_frame_info.n_local_regs),
2099 GEN_INT (current_frame_info.n_output_regs),
2100 GEN_INT (current_frame_info.n_rotate_regs)));
2101 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2104 /* Set up frame pointer, stack pointer, and spill iterators. */
2106 n_varargs = cfun->machine->n_varargs;
2107 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2108 stack_pointer_rtx, 0);
2110 if (frame_pointer_needed)
2112 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2113 RTX_FRAME_RELATED_P (insn) = 1;
2116 if (current_frame_info.total_size != 0)
2118 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2119 rtx offset;
2121 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2122 offset = frame_size_rtx;
2123 else
2125 regno = next_scratch_gr_reg ();
2126 offset = gen_rtx_REG (DImode, regno);
2127 emit_move_insn (offset, frame_size_rtx);
2130 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2131 stack_pointer_rtx, offset));
2133 if (! frame_pointer_needed)
2135 RTX_FRAME_RELATED_P (insn) = 1;
2136 if (GET_CODE (offset) != CONST_INT)
2138 REG_NOTES (insn)
2139 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2140 gen_rtx_SET (VOIDmode,
2141 stack_pointer_rtx,
2142 gen_rtx_PLUS (DImode,
2143 stack_pointer_rtx,
2144 frame_size_rtx)),
2145 REG_NOTES (insn));
2149 /* ??? At this point we must generate a magic insn that appears to
2150 modify the stack pointer, the frame pointer, and all spill
2151 iterators. This would allow the most scheduling freedom. For
2152 now, just hard stop. */
2153 emit_insn (gen_blockage ());
2156 /* Must copy out ar.unat before doing any integer spills. */
2157 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2159 if (current_frame_info.reg_save_ar_unat)
2160 ar_unat_save_reg
2161 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2162 else
2164 alt_regno = next_scratch_gr_reg ();
2165 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2166 current_frame_info.gr_used_mask |= 1 << alt_regno;
2169 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2170 insn = emit_move_insn (ar_unat_save_reg, reg);
2171 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2173 /* Even if we're not going to generate an epilogue, we still
2174 need to save the register so that EH works. */
2175 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2176 emit_insn (gen_prologue_use (ar_unat_save_reg));
2178 else
2179 ar_unat_save_reg = NULL_RTX;
2181 /* Spill all varargs registers. Do this before spilling any GR registers,
2182 since we want the UNAT bits for the GR registers to override the UNAT
2183 bits from varargs, which we don't care about. */
2185 cfa_off = -16;
2186 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2188 reg = gen_rtx_REG (DImode, regno);
2189 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2192 /* Locate the bottom of the register save area. */
2193 cfa_off = (current_frame_info.spill_cfa_off
2194 + current_frame_info.spill_size
2195 + current_frame_info.extra_spill_size);
2197 /* Save the predicate register block either in a register or in memory. */
2198 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2200 reg = gen_rtx_REG (DImode, PR_REG (0));
2201 if (current_frame_info.reg_save_pr != 0)
2203 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2204 insn = emit_move_insn (alt_reg, reg);
2206 /* ??? Denote pr spill/fill by a DImode move that modifies all
2207 64 hard registers. */
2208 RTX_FRAME_RELATED_P (insn) = 1;
2209 REG_NOTES (insn)
2210 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2211 gen_rtx_SET (VOIDmode, alt_reg, reg),
2212 REG_NOTES (insn));
2214 /* Even if we're not going to generate an epilogue, we still
2215 need to save the register so that EH works. */
2216 if (! epilogue_p)
2217 emit_insn (gen_prologue_use (alt_reg));
2219 else
2221 alt_regno = next_scratch_gr_reg ();
2222 alt_reg = gen_rtx_REG (DImode, alt_regno);
2223 insn = emit_move_insn (alt_reg, reg);
2224 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2225 cfa_off -= 8;
2229 /* Handle AR regs in numerical order. All of them get special handling. */
2230 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2231 && current_frame_info.reg_save_ar_unat == 0)
2233 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2234 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2235 cfa_off -= 8;
2238 /* The alloc insn already copied ar.pfs into a general register. The
2239 only thing we have to do now is copy that register to a stack slot
2240 if we'd not allocated a local register for the job. */
2241 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2242 && current_frame_info.reg_save_ar_pfs == 0)
2244 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2245 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2246 cfa_off -= 8;
2249 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2251 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2252 if (current_frame_info.reg_save_ar_lc != 0)
2254 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2255 insn = emit_move_insn (alt_reg, reg);
2256 RTX_FRAME_RELATED_P (insn) = 1;
2258 /* Even if we're not going to generate an epilogue, we still
2259 need to save the register so that EH works. */
2260 if (! epilogue_p)
2261 emit_insn (gen_prologue_use (alt_reg));
2263 else
2265 alt_regno = next_scratch_gr_reg ();
2266 alt_reg = gen_rtx_REG (DImode, alt_regno);
2267 emit_move_insn (alt_reg, reg);
2268 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2269 cfa_off -= 8;
2273 if (current_frame_info.reg_save_gp)
2275 insn = emit_move_insn (gen_rtx_REG (DImode,
2276 current_frame_info.reg_save_gp),
2277 pic_offset_table_rtx);
2278 /* We don't know for sure yet if this is actually needed, since
2279 we've not split the PIC call patterns. If all of the calls
2280 are indirect, and not followed by any uses of the gp, then
2281 this save is dead. Allow it to go away. */
2282 REG_NOTES (insn)
2283 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2286 /* We should now be at the base of the gr/br/fr spill area. */
2287 if (cfa_off != (current_frame_info.spill_cfa_off
2288 + current_frame_info.spill_size))
2289 abort ();
2291 /* Spill all general registers. */
2292 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2293 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2295 reg = gen_rtx_REG (DImode, regno);
2296 do_spill (gen_gr_spill, reg, cfa_off, reg);
2297 cfa_off -= 8;
2300 /* Handle BR0 specially -- it may be getting stored permanently in
2301 some GR register. */
2302 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2304 reg = gen_rtx_REG (DImode, BR_REG (0));
2305 if (current_frame_info.reg_save_b0 != 0)
2307 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2308 insn = emit_move_insn (alt_reg, reg);
2309 RTX_FRAME_RELATED_P (insn) = 1;
2311 /* Even if we're not going to generate an epilogue, we still
2312 need to save the register so that EH works. */
2313 if (! epilogue_p)
2314 emit_insn (gen_prologue_use (alt_reg));
2316 else
2318 alt_regno = next_scratch_gr_reg ();
2319 alt_reg = gen_rtx_REG (DImode, alt_regno);
2320 emit_move_insn (alt_reg, reg);
2321 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2322 cfa_off -= 8;
2326 /* Spill the rest of the BR registers. */
2327 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2328 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2330 alt_regno = next_scratch_gr_reg ();
2331 alt_reg = gen_rtx_REG (DImode, alt_regno);
2332 reg = gen_rtx_REG (DImode, regno);
2333 emit_move_insn (alt_reg, reg);
2334 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2335 cfa_off -= 8;
2338 /* Align the frame and spill all FR registers. */
2339 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2340 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2342 if (cfa_off & 15)
2343 abort ();
2344 reg = gen_rtx_REG (XFmode, regno);
2345 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2346 cfa_off -= 16;
2349 if (cfa_off != current_frame_info.spill_cfa_off)
2350 abort ();
2352 finish_spill_pointers ();
2355 /* Called after register allocation to add any instructions needed for the
2356 epilogue. Using an epilogue insn is favored compared to putting all of the
2357 instructions in output_function_prologue(), since it allows the scheduler
2358 to intermix instructions with the saves of the caller saved registers. In
2359 some cases, it might be necessary to emit a barrier instruction as the last
2360 insn to prevent such scheduling. */
2362 void
2363 ia64_expand_epilogue (int sibcall_p)
2365 rtx insn, reg, alt_reg, ar_unat_save_reg;
2366 int regno, alt_regno, cfa_off;
2368 ia64_compute_frame_size (get_frame_size ());
2370 /* If there is a frame pointer, then we use it instead of the stack
2371 pointer, so that the stack pointer does not need to be valid when
2372 the epilogue starts. See EXIT_IGNORE_STACK. */
2373 if (frame_pointer_needed)
2374 setup_spill_pointers (current_frame_info.n_spilled,
2375 hard_frame_pointer_rtx, 0);
2376 else
2377 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2378 current_frame_info.total_size);
2380 if (current_frame_info.total_size != 0)
2382 /* ??? At this point we must generate a magic insn that appears to
2383 modify the spill iterators and the frame pointer. This would
2384 allow the most scheduling freedom. For now, just hard stop. */
2385 emit_insn (gen_blockage ());
2388 /* Locate the bottom of the register save area. */
2389 cfa_off = (current_frame_info.spill_cfa_off
2390 + current_frame_info.spill_size
2391 + current_frame_info.extra_spill_size);
2393 /* Restore the predicate registers. */
2394 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2396 if (current_frame_info.reg_save_pr != 0)
2397 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2398 else
2400 alt_regno = next_scratch_gr_reg ();
2401 alt_reg = gen_rtx_REG (DImode, alt_regno);
2402 do_restore (gen_movdi_x, alt_reg, cfa_off);
2403 cfa_off -= 8;
2405 reg = gen_rtx_REG (DImode, PR_REG (0));
2406 emit_move_insn (reg, alt_reg);
2409 /* Restore the application registers. */
2411 /* Load the saved unat from the stack, but do not restore it until
2412 after the GRs have been restored. */
2413 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2415 if (current_frame_info.reg_save_ar_unat != 0)
2416 ar_unat_save_reg
2417 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2418 else
2420 alt_regno = next_scratch_gr_reg ();
2421 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2422 current_frame_info.gr_used_mask |= 1 << alt_regno;
2423 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2424 cfa_off -= 8;
2427 else
2428 ar_unat_save_reg = NULL_RTX;
2430 if (current_frame_info.reg_save_ar_pfs != 0)
2432 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2433 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2434 emit_move_insn (reg, alt_reg);
2436 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2438 alt_regno = next_scratch_gr_reg ();
2439 alt_reg = gen_rtx_REG (DImode, alt_regno);
2440 do_restore (gen_movdi_x, alt_reg, cfa_off);
2441 cfa_off -= 8;
2442 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2443 emit_move_insn (reg, alt_reg);
2446 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2448 if (current_frame_info.reg_save_ar_lc != 0)
2449 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2450 else
2452 alt_regno = next_scratch_gr_reg ();
2453 alt_reg = gen_rtx_REG (DImode, alt_regno);
2454 do_restore (gen_movdi_x, alt_reg, cfa_off);
2455 cfa_off -= 8;
2457 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2458 emit_move_insn (reg, alt_reg);
2461 /* We should now be at the base of the gr/br/fr spill area. */
2462 if (cfa_off != (current_frame_info.spill_cfa_off
2463 + current_frame_info.spill_size))
2464 abort ();
2466 /* The GP may be stored on the stack in the prologue, but it's
2467 never restored in the epilogue. Skip the stack slot. */
2468 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
2469 cfa_off -= 8;
2471 /* Restore all general registers. */
2472 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
2473 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2475 reg = gen_rtx_REG (DImode, regno);
2476 do_restore (gen_gr_restore, reg, cfa_off);
2477 cfa_off -= 8;
2480 /* Restore the branch registers. Handle B0 specially, as it may
2481 have gotten stored in some GR register. */
2482 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2484 if (current_frame_info.reg_save_b0 != 0)
2485 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2486 else
2488 alt_regno = next_scratch_gr_reg ();
2489 alt_reg = gen_rtx_REG (DImode, alt_regno);
2490 do_restore (gen_movdi_x, alt_reg, cfa_off);
2491 cfa_off -= 8;
2493 reg = gen_rtx_REG (DImode, BR_REG (0));
2494 emit_move_insn (reg, alt_reg);
2497 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2498 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2500 alt_regno = next_scratch_gr_reg ();
2501 alt_reg = gen_rtx_REG (DImode, alt_regno);
2502 do_restore (gen_movdi_x, alt_reg, cfa_off);
2503 cfa_off -= 8;
2504 reg = gen_rtx_REG (DImode, regno);
2505 emit_move_insn (reg, alt_reg);
2508 /* Restore floating point registers. */
2509 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2510 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2512 if (cfa_off & 15)
2513 abort ();
2514 reg = gen_rtx_REG (XFmode, regno);
2515 do_restore (gen_fr_restore_x, reg, cfa_off);
2516 cfa_off -= 16;
2519 /* Restore ar.unat for real. */
2520 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2522 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2523 emit_move_insn (reg, ar_unat_save_reg);
2526 if (cfa_off != current_frame_info.spill_cfa_off)
2527 abort ();
2529 finish_spill_pointers ();
2531 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2533 /* ??? At this point we must generate a magic insn that appears to
2534 modify the spill iterators, the stack pointer, and the frame
2535 pointer. This would allow the most scheduling freedom. For now,
2536 just hard stop. */
2537 emit_insn (gen_blockage ());
2540 if (cfun->machine->ia64_eh_epilogue_sp)
2541 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2542 else if (frame_pointer_needed)
2544 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2545 RTX_FRAME_RELATED_P (insn) = 1;
2547 else if (current_frame_info.total_size)
2549 rtx offset, frame_size_rtx;
2551 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2552 if (CONST_OK_FOR_I (current_frame_info.total_size))
2553 offset = frame_size_rtx;
2554 else
2556 regno = next_scratch_gr_reg ();
2557 offset = gen_rtx_REG (DImode, regno);
2558 emit_move_insn (offset, frame_size_rtx);
2561 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2562 offset));
2564 RTX_FRAME_RELATED_P (insn) = 1;
2565 if (GET_CODE (offset) != CONST_INT)
2567 REG_NOTES (insn)
2568 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2569 gen_rtx_SET (VOIDmode,
2570 stack_pointer_rtx,
2571 gen_rtx_PLUS (DImode,
2572 stack_pointer_rtx,
2573 frame_size_rtx)),
2574 REG_NOTES (insn));
2578 if (cfun->machine->ia64_eh_epilogue_bsp)
2579 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2581 if (! sibcall_p)
2582 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2583 else
2585 int fp = GR_REG (2);
2586 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2587 first available call clobbered register. If there was a frame_pointer
2588 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2589 so we have to make sure we're using the string "r2" when emitting
2590 the register name for the assembler. */
2591 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2592 fp = HARD_FRAME_POINTER_REGNUM;
2594 /* We must emit an alloc to force the input registers to become output
2595 registers. Otherwise, if the callee tries to pass its parameters
2596 through to another call without an intervening alloc, then these
2597 values get lost. */
2598 /* ??? We don't need to preserve all input registers. We only need to
2599 preserve those input registers used as arguments to the sibling call.
2600 It is unclear how to compute that number here. */
2601 if (current_frame_info.n_input_regs != 0)
2602 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2603 const0_rtx, const0_rtx,
2604 GEN_INT (current_frame_info.n_input_regs),
2605 const0_rtx));
2609 /* Return 1 if br.ret can do all the work required to return from a
2610 function. */
2613 ia64_direct_return (void)
2615 if (reload_completed && ! frame_pointer_needed)
2617 ia64_compute_frame_size (get_frame_size ());
2619 return (current_frame_info.total_size == 0
2620 && current_frame_info.n_spilled == 0
2621 && current_frame_info.reg_save_b0 == 0
2622 && current_frame_info.reg_save_pr == 0
2623 && current_frame_info.reg_save_ar_pfs == 0
2624 && current_frame_info.reg_save_ar_unat == 0
2625 && current_frame_info.reg_save_ar_lc == 0);
2627 return 0;
2630 /* Return the magic cookie that we use to hold the return address
2631 during early compilation. */
2634 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
2636 if (count != 0)
2637 return NULL;
2638 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
2641 /* Split this value after reload, now that we know where the return
2642 address is saved. */
2644 void
2645 ia64_split_return_addr_rtx (rtx dest)
2647 rtx src;
2649 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2651 if (current_frame_info.reg_save_b0 != 0)
2652 src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2653 else
2655 HOST_WIDE_INT off;
2656 unsigned int regno;
2658 /* Compute offset from CFA for BR0. */
2659 /* ??? Must be kept in sync with ia64_expand_prologue. */
2660 off = (current_frame_info.spill_cfa_off
2661 + current_frame_info.spill_size);
2662 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2663 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2664 off -= 8;
2666 /* Convert CFA offset to a register based offset. */
2667 if (frame_pointer_needed)
2668 src = hard_frame_pointer_rtx;
2669 else
2671 src = stack_pointer_rtx;
2672 off += current_frame_info.total_size;
2675 /* Load address into scratch register. */
2676 if (CONST_OK_FOR_I (off))
2677 emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
2678 else
2680 emit_move_insn (dest, GEN_INT (off));
2681 emit_insn (gen_adddi3 (dest, src, dest));
2684 src = gen_rtx_MEM (Pmode, dest);
2687 else
2688 src = gen_rtx_REG (DImode, BR_REG (0));
2690 emit_move_insn (dest, src);
2694 ia64_hard_regno_rename_ok (int from, int to)
2696 /* Don't clobber any of the registers we reserved for the prologue. */
2697 if (to == current_frame_info.reg_fp
2698 || to == current_frame_info.reg_save_b0
2699 || to == current_frame_info.reg_save_pr
2700 || to == current_frame_info.reg_save_ar_pfs
2701 || to == current_frame_info.reg_save_ar_unat
2702 || to == current_frame_info.reg_save_ar_lc)
2703 return 0;
2705 if (from == current_frame_info.reg_fp
2706 || from == current_frame_info.reg_save_b0
2707 || from == current_frame_info.reg_save_pr
2708 || from == current_frame_info.reg_save_ar_pfs
2709 || from == current_frame_info.reg_save_ar_unat
2710 || from == current_frame_info.reg_save_ar_lc)
2711 return 0;
2713 /* Don't use output registers outside the register frame. */
2714 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2715 return 0;
2717 /* Retain even/oddness on predicate register pairs. */
2718 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2719 return (from & 1) == (to & 1);
2721 return 1;
2724 /* Target hook for assembling integer objects. Handle word-sized
2725 aligned objects and detect the cases when @fptr is needed. */
2727 static bool
2728 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
2730 if (size == POINTER_SIZE / BITS_PER_UNIT
2731 && aligned_p
2732 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
2733 && GET_CODE (x) == SYMBOL_REF
2734 && SYMBOL_REF_FUNCTION_P (x))
2736 if (POINTER_SIZE == 32)
2737 fputs ("\tdata4\t@fptr(", asm_out_file);
2738 else
2739 fputs ("\tdata8\t@fptr(", asm_out_file);
2740 output_addr_const (asm_out_file, x);
2741 fputs (")\n", asm_out_file);
2742 return true;
2744 return default_assemble_integer (x, size, aligned_p);
2747 /* Emit the function prologue. */
2749 static void
2750 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
2752 int mask, grsave, grsave_prev;
2754 if (current_frame_info.need_regstk)
2755 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2756 current_frame_info.n_input_regs,
2757 current_frame_info.n_local_regs,
2758 current_frame_info.n_output_regs,
2759 current_frame_info.n_rotate_regs);
2761 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2762 return;
2764 /* Emit the .prologue directive. */
2766 mask = 0;
2767 grsave = grsave_prev = 0;
2768 if (current_frame_info.reg_save_b0 != 0)
2770 mask |= 8;
2771 grsave = grsave_prev = current_frame_info.reg_save_b0;
2773 if (current_frame_info.reg_save_ar_pfs != 0
2774 && (grsave_prev == 0
2775 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2777 mask |= 4;
2778 if (grsave_prev == 0)
2779 grsave = current_frame_info.reg_save_ar_pfs;
2780 grsave_prev = current_frame_info.reg_save_ar_pfs;
2782 if (current_frame_info.reg_fp != 0
2783 && (grsave_prev == 0
2784 || current_frame_info.reg_fp == grsave_prev + 1))
2786 mask |= 2;
2787 if (grsave_prev == 0)
2788 grsave = HARD_FRAME_POINTER_REGNUM;
2789 grsave_prev = current_frame_info.reg_fp;
2791 if (current_frame_info.reg_save_pr != 0
2792 && (grsave_prev == 0
2793 || current_frame_info.reg_save_pr == grsave_prev + 1))
2795 mask |= 1;
2796 if (grsave_prev == 0)
2797 grsave = current_frame_info.reg_save_pr;
2800 if (mask && TARGET_GNU_AS)
2801 fprintf (file, "\t.prologue %d, %d\n", mask,
2802 ia64_dbx_register_number (grsave));
2803 else
2804 fputs ("\t.prologue\n", file);
2806 /* Emit a .spill directive, if necessary, to relocate the base of
2807 the register spill area. */
2808 if (current_frame_info.spill_cfa_off != -16)
2809 fprintf (file, "\t.spill %ld\n",
2810 (long) (current_frame_info.spill_cfa_off
2811 + current_frame_info.spill_size));
2814 /* Emit the .body directive at the scheduled end of the prologue. */
2816 static void
2817 ia64_output_function_end_prologue (FILE *file)
2819 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2820 return;
2822 fputs ("\t.body\n", file);
2825 /* Emit the function epilogue. */
2827 static void
2828 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
2829 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
2831 int i;
2833 if (current_frame_info.reg_fp)
2835 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2836 reg_names[HARD_FRAME_POINTER_REGNUM]
2837 = reg_names[current_frame_info.reg_fp];
2838 reg_names[current_frame_info.reg_fp] = tmp;
2840 if (! TARGET_REG_NAMES)
2842 for (i = 0; i < current_frame_info.n_input_regs; i++)
2843 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2844 for (i = 0; i < current_frame_info.n_local_regs; i++)
2845 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2846 for (i = 0; i < current_frame_info.n_output_regs; i++)
2847 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2850 current_frame_info.initialized = 0;
2854 ia64_dbx_register_number (int regno)
2856 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2857 from its home at loc79 to something inside the register frame. We
2858 must perform the same renumbering here for the debug info. */
2859 if (current_frame_info.reg_fp)
2861 if (regno == HARD_FRAME_POINTER_REGNUM)
2862 regno = current_frame_info.reg_fp;
2863 else if (regno == current_frame_info.reg_fp)
2864 regno = HARD_FRAME_POINTER_REGNUM;
2867 if (IN_REGNO_P (regno))
2868 return 32 + regno - IN_REG (0);
2869 else if (LOC_REGNO_P (regno))
2870 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2871 else if (OUT_REGNO_P (regno))
2872 return (32 + current_frame_info.n_input_regs
2873 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2874 else
2875 return regno;
2878 void
2879 ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
2881 rtx addr_reg, eight = GEN_INT (8);
2883 /* The Intel assembler requires that the global __ia64_trampoline symbol
2884 be declared explicitly */
2885 if (!TARGET_GNU_AS)
2887 static bool declared_ia64_trampoline = false;
2889 if (!declared_ia64_trampoline)
2891 declared_ia64_trampoline = true;
2892 (*targetm.asm_out.globalize_label) (asm_out_file,
2893 "__ia64_trampoline");
2897 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
2898 addr = convert_memory_address (Pmode, addr);
2899 fnaddr = convert_memory_address (Pmode, fnaddr);
2900 static_chain = convert_memory_address (Pmode, static_chain);
2902 /* Load up our iterator. */
2903 addr_reg = gen_reg_rtx (Pmode);
2904 emit_move_insn (addr_reg, addr);
2906 /* The first two words are the fake descriptor:
2907 __ia64_trampoline, ADDR+16. */
2908 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2909 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2910 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2912 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2913 copy_to_reg (plus_constant (addr, 16)));
2914 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2916 /* The third word is the target descriptor. */
2917 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2918 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2920 /* The fourth word is the static chain. */
2921 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2924 /* Do any needed setup for a variadic function. CUM has not been updated
2925 for the last named argument which has type TYPE and mode MODE.
2927 We generate the actual spill instructions during prologue generation. */
2929 static void
2930 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2931 tree type, int * pretend_size,
2932 int second_time ATTRIBUTE_UNUSED)
2934 CUMULATIVE_ARGS next_cum = *cum;
2936 /* Skip the current argument. */
2937 ia64_function_arg_advance (&next_cum, mode, type, 1);
2939 if (next_cum.words < MAX_ARGUMENT_SLOTS)
2941 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
2942 *pretend_size = n * UNITS_PER_WORD;
2943 cfun->machine->n_varargs = n;
2947 /* Check whether TYPE is a homogeneous floating point aggregate. If
2948 it is, return the mode of the floating point type that appears
2949 in all leafs. If it is not, return VOIDmode.
2951 An aggregate is a homogeneous floating point aggregate is if all
2952 fields/elements in it have the same floating point type (e.g,
2953 SFmode). 128-bit quad-precision floats are excluded. */
2955 static enum machine_mode
2956 hfa_element_mode (tree type, int nested)
2958 enum machine_mode element_mode = VOIDmode;
2959 enum machine_mode mode;
2960 enum tree_code code = TREE_CODE (type);
2961 int know_element_mode = 0;
2962 tree t;
2964 switch (code)
2966 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2967 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2968 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2969 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2970 case FUNCTION_TYPE:
2971 return VOIDmode;
2973 /* Fortran complex types are supposed to be HFAs, so we need to handle
2974 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2975 types though. */
2976 case COMPLEX_TYPE:
2977 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
2978 && TYPE_MODE (type) != TCmode)
2979 return GET_MODE_INNER (TYPE_MODE (type));
2980 else
2981 return VOIDmode;
2983 case REAL_TYPE:
2984 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2985 mode if this is contained within an aggregate. */
2986 if (nested && TYPE_MODE (type) != TFmode)
2987 return TYPE_MODE (type);
2988 else
2989 return VOIDmode;
2991 case ARRAY_TYPE:
2992 return hfa_element_mode (TREE_TYPE (type), 1);
2994 case RECORD_TYPE:
2995 case UNION_TYPE:
2996 case QUAL_UNION_TYPE:
2997 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
2999 if (TREE_CODE (t) != FIELD_DECL)
3000 continue;
3002 mode = hfa_element_mode (TREE_TYPE (t), 1);
3003 if (know_element_mode)
3005 if (mode != element_mode)
3006 return VOIDmode;
3008 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3009 return VOIDmode;
3010 else
3012 know_element_mode = 1;
3013 element_mode = mode;
3016 return element_mode;
3018 default:
3019 /* If we reach here, we probably have some front-end specific type
3020 that the backend doesn't know about. This can happen via the
3021 aggregate_value_p call in init_function_start. All we can do is
3022 ignore unknown tree types. */
3023 return VOIDmode;
3026 return VOIDmode;
3029 /* Return the number of words required to hold a quantity of TYPE and MODE
3030 when passed as an argument. */
3031 static int
3032 ia64_function_arg_words (tree type, enum machine_mode mode)
3034 int words;
3036 if (mode == BLKmode)
3037 words = int_size_in_bytes (type);
3038 else
3039 words = GET_MODE_SIZE (mode);
3041 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
3044 /* Return the number of registers that should be skipped so the current
3045 argument (described by TYPE and WORDS) will be properly aligned.
3047 Integer and float arguments larger than 8 bytes start at the next
3048 even boundary. Aggregates larger than 8 bytes start at the next
3049 even boundary if the aggregate has 16 byte alignment. Note that
3050 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3051 but are still to be aligned in registers.
3053 ??? The ABI does not specify how to handle aggregates with
3054 alignment from 9 to 15 bytes, or greater than 16. We handle them
3055 all as if they had 16 byte alignment. Such aggregates can occur
3056 only if gcc extensions are used. */
3057 static int
3058 ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3060 if ((cum->words & 1) == 0)
3061 return 0;
3063 if (type
3064 && TREE_CODE (type) != INTEGER_TYPE
3065 && TREE_CODE (type) != REAL_TYPE)
3066 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3067 else
3068 return words > 1;
3071 /* Return rtx for register where argument is passed, or zero if it is passed
3072 on the stack. */
3073 /* ??? 128-bit quad-precision floats are always passed in general
3074 registers. */
3077 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3078 int named, int incoming)
3080 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3081 int words = ia64_function_arg_words (type, mode);
3082 int offset = ia64_function_arg_offset (cum, type, words);
3083 enum machine_mode hfa_mode = VOIDmode;
3085 /* If all argument slots are used, then it must go on the stack. */
3086 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3087 return 0;
3089 /* Check for and handle homogeneous FP aggregates. */
3090 if (type)
3091 hfa_mode = hfa_element_mode (type, 0);
3093 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3094 and unprototyped hfas are passed specially. */
3095 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3097 rtx loc[16];
3098 int i = 0;
3099 int fp_regs = cum->fp_regs;
3100 int int_regs = cum->words + offset;
3101 int hfa_size = GET_MODE_SIZE (hfa_mode);
3102 int byte_size;
3103 int args_byte_size;
3105 /* If prototyped, pass it in FR regs then GR regs.
3106 If not prototyped, pass it in both FR and GR regs.
3108 If this is an SFmode aggregate, then it is possible to run out of
3109 FR regs while GR regs are still left. In that case, we pass the
3110 remaining part in the GR regs. */
3112 /* Fill the FP regs. We do this always. We stop if we reach the end
3113 of the argument, the last FP register, or the last argument slot. */
3115 byte_size = ((mode == BLKmode)
3116 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3117 args_byte_size = int_regs * UNITS_PER_WORD;
3118 offset = 0;
3119 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3120 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3122 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3123 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3124 + fp_regs)),
3125 GEN_INT (offset));
3126 offset += hfa_size;
3127 args_byte_size += hfa_size;
3128 fp_regs++;
3131 /* If no prototype, then the whole thing must go in GR regs. */
3132 if (! cum->prototype)
3133 offset = 0;
3134 /* If this is an SFmode aggregate, then we might have some left over
3135 that needs to go in GR regs. */
3136 else if (byte_size != offset)
3137 int_regs += offset / UNITS_PER_WORD;
3139 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3141 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3143 enum machine_mode gr_mode = DImode;
3144 unsigned int gr_size;
3146 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3147 then this goes in a GR reg left adjusted/little endian, right
3148 adjusted/big endian. */
3149 /* ??? Currently this is handled wrong, because 4-byte hunks are
3150 always right adjusted/little endian. */
3151 if (offset & 0x4)
3152 gr_mode = SImode;
3153 /* If we have an even 4 byte hunk because the aggregate is a
3154 multiple of 4 bytes in size, then this goes in a GR reg right
3155 adjusted/little endian. */
3156 else if (byte_size - offset == 4)
3157 gr_mode = SImode;
3159 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3160 gen_rtx_REG (gr_mode, (basereg
3161 + int_regs)),
3162 GEN_INT (offset));
3164 gr_size = GET_MODE_SIZE (gr_mode);
3165 offset += gr_size;
3166 if (gr_size == UNITS_PER_WORD
3167 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
3168 int_regs++;
3169 else if (gr_size > UNITS_PER_WORD)
3170 int_regs += gr_size / UNITS_PER_WORD;
3173 /* If we ended up using just one location, just return that one loc, but
3174 change the mode back to the argument mode. However, we can't do this
3175 when hfa_mode is XFmode and mode is TImode. In that case, we would
3176 return a TImode reference to an FP reg, but FP regs can't hold TImode.
3177 We need the PARALLEL to make this work. This can happen for a union
3178 containing a single __float80 member. */
3179 if (i == 1 && ! (hfa_mode == XFmode && mode == TImode))
3180 return gen_rtx_REG (mode, REGNO (XEXP (loc[0], 0)));
3181 else
3182 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3185 /* Integral and aggregates go in general registers. If we have run out of
3186 FR registers, then FP values must also go in general registers. This can
3187 happen when we have a SFmode HFA. */
3188 else if (mode == TFmode || mode == TCmode
3189 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3191 int byte_size = ((mode == BLKmode)
3192 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3193 if (BYTES_BIG_ENDIAN
3194 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3195 && byte_size < UNITS_PER_WORD
3196 && byte_size > 0)
3198 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3199 gen_rtx_REG (DImode,
3200 (basereg + cum->words
3201 + offset)),
3202 const0_rtx);
3203 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3205 else
3206 return gen_rtx_REG (mode, basereg + cum->words + offset);
3210 /* If there is a prototype, then FP values go in a FR register when
3211 named, and in a GR register when unnamed. */
3212 else if (cum->prototype)
3214 if (named)
3215 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3216 /* In big-endian mode, an anonymous SFmode value must be represented
3217 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
3218 the value into the high half of the general register. */
3219 else if (BYTES_BIG_ENDIAN && mode == SFmode)
3220 return gen_rtx_PARALLEL (mode,
3221 gen_rtvec (1,
3222 gen_rtx_EXPR_LIST (VOIDmode,
3223 gen_rtx_REG (DImode, basereg + cum->words + offset),
3224 const0_rtx)));
3225 else
3226 return gen_rtx_REG (mode, basereg + cum->words + offset);
3228 /* If there is no prototype, then FP values go in both FR and GR
3229 registers. */
3230 else
3232 /* See comment above. */
3233 enum machine_mode inner_mode =
3234 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
3236 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3237 gen_rtx_REG (mode, (FR_ARG_FIRST
3238 + cum->fp_regs)),
3239 const0_rtx);
3240 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3241 gen_rtx_REG (inner_mode,
3242 (basereg + cum->words
3243 + offset)),
3244 const0_rtx);
3246 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3250 /* Return number of words, at the beginning of the argument, that must be
3251 put in registers. 0 is the argument is entirely in registers or entirely
3252 in memory. */
3255 ia64_function_arg_partial_nregs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3256 tree type, int named ATTRIBUTE_UNUSED)
3258 int words = ia64_function_arg_words (type, mode);
3259 int offset = ia64_function_arg_offset (cum, type, words);
3261 /* If all argument slots are used, then it must go on the stack. */
3262 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3263 return 0;
3265 /* It doesn't matter whether the argument goes in FR or GR regs. If
3266 it fits within the 8 argument slots, then it goes entirely in
3267 registers. If it extends past the last argument slot, then the rest
3268 goes on the stack. */
3270 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3271 return 0;
3273 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3276 /* Update CUM to point after this argument. This is patterned after
3277 ia64_function_arg. */
3279 void
3280 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3281 tree type, int named)
3283 int words = ia64_function_arg_words (type, mode);
3284 int offset = ia64_function_arg_offset (cum, type, words);
3285 enum machine_mode hfa_mode = VOIDmode;
3287 /* If all arg slots are already full, then there is nothing to do. */
3288 if (cum->words >= MAX_ARGUMENT_SLOTS)
3289 return;
3291 cum->words += words + offset;
3293 /* Check for and handle homogeneous FP aggregates. */
3294 if (type)
3295 hfa_mode = hfa_element_mode (type, 0);
3297 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3298 and unprototyped hfas are passed specially. */
3299 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3301 int fp_regs = cum->fp_regs;
3302 /* This is the original value of cum->words + offset. */
3303 int int_regs = cum->words - words;
3304 int hfa_size = GET_MODE_SIZE (hfa_mode);
3305 int byte_size;
3306 int args_byte_size;
3308 /* If prototyped, pass it in FR regs then GR regs.
3309 If not prototyped, pass it in both FR and GR regs.
3311 If this is an SFmode aggregate, then it is possible to run out of
3312 FR regs while GR regs are still left. In that case, we pass the
3313 remaining part in the GR regs. */
3315 /* Fill the FP regs. We do this always. We stop if we reach the end
3316 of the argument, the last FP register, or the last argument slot. */
3318 byte_size = ((mode == BLKmode)
3319 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3320 args_byte_size = int_regs * UNITS_PER_WORD;
3321 offset = 0;
3322 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3323 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3325 offset += hfa_size;
3326 args_byte_size += hfa_size;
3327 fp_regs++;
3330 cum->fp_regs = fp_regs;
3333 /* Integral and aggregates go in general registers. If we have run out of
3334 FR registers, then FP values must also go in general registers. This can
3335 happen when we have a SFmode HFA. */
3336 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3337 cum->int_regs = cum->words;
3339 /* If there is a prototype, then FP values go in a FR register when
3340 named, and in a GR register when unnamed. */
3341 else if (cum->prototype)
3343 if (! named)
3344 cum->int_regs = cum->words;
3345 else
3346 /* ??? Complex types should not reach here. */
3347 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3349 /* If there is no prototype, then FP values go in both FR and GR
3350 registers. */
3351 else
3353 /* ??? Complex types should not reach here. */
3354 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3355 cum->int_regs = cum->words;
3359 /* Variable sized types are passed by reference. */
3360 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3362 static bool
3363 ia64_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3364 enum machine_mode mode ATTRIBUTE_UNUSED,
3365 tree type, bool named ATTRIBUTE_UNUSED)
3367 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3370 /* True if it is OK to do sibling call optimization for the specified
3371 call expression EXP. DECL will be the called function, or NULL if
3372 this is an indirect call. */
3373 static bool
3374 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3376 /* We must always return with our current GP. This means we can
3377 only sibcall to functions defined in the current module. */
3378 return decl && (*targetm.binds_local_p) (decl);
3382 /* Implement va_arg. */
3384 static tree
3385 ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3387 /* Variable sized types are passed by reference. */
3388 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
3390 tree ptrtype = build_pointer_type (type);
3391 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
3392 return build_fold_indirect_ref (addr);
3395 /* Aggregate arguments with alignment larger than 8 bytes start at
3396 the next even boundary. Integer and floating point arguments
3397 do so if they are larger than 8 bytes, whether or not they are
3398 also aligned larger than 8 bytes. */
3399 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
3400 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3402 tree t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3403 build_int_cst (NULL_TREE, 2 * UNITS_PER_WORD - 1, 0));
3404 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3405 build_int_cst (NULL_TREE, -2 * UNITS_PER_WORD, -1));
3406 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3407 gimplify_and_add (t, pre_p);
3410 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3413 /* Return 1 if function return value returned in memory. Return 0 if it is
3414 in a register. */
3416 static bool
3417 ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
3419 enum machine_mode mode;
3420 enum machine_mode hfa_mode;
3421 HOST_WIDE_INT byte_size;
3423 mode = TYPE_MODE (valtype);
3424 byte_size = GET_MODE_SIZE (mode);
3425 if (mode == BLKmode)
3427 byte_size = int_size_in_bytes (valtype);
3428 if (byte_size < 0)
3429 return true;
3432 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3434 hfa_mode = hfa_element_mode (valtype, 0);
3435 if (hfa_mode != VOIDmode)
3437 int hfa_size = GET_MODE_SIZE (hfa_mode);
3439 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3440 return true;
3441 else
3442 return false;
3444 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3445 return true;
3446 else
3447 return false;
3450 /* Return rtx for register that holds the function return value. */
3453 ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
3455 enum machine_mode mode;
3456 enum machine_mode hfa_mode;
3458 mode = TYPE_MODE (valtype);
3459 hfa_mode = hfa_element_mode (valtype, 0);
3461 if (hfa_mode != VOIDmode)
3463 rtx loc[8];
3464 int i;
3465 int hfa_size;
3466 int byte_size;
3467 int offset;
3469 hfa_size = GET_MODE_SIZE (hfa_mode);
3470 byte_size = ((mode == BLKmode)
3471 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3472 offset = 0;
3473 for (i = 0; offset < byte_size; i++)
3475 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3476 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3477 GEN_INT (offset));
3478 offset += hfa_size;
3481 if (i == 1)
3482 return XEXP (loc[0], 0);
3483 else
3484 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3486 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
3487 return gen_rtx_REG (mode, FR_ARG_FIRST);
3488 else
3490 if (BYTES_BIG_ENDIAN
3491 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
3493 rtx loc[8];
3494 int offset;
3495 int bytesize;
3496 int i;
3498 offset = 0;
3499 bytesize = int_size_in_bytes (valtype);
3500 for (i = 0; offset < bytesize; i++)
3502 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3503 gen_rtx_REG (DImode,
3504 GR_RET_FIRST + i),
3505 GEN_INT (offset));
3506 offset += UNITS_PER_WORD;
3508 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3510 else
3511 return gen_rtx_REG (mode, GR_RET_FIRST);
3515 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
3516 We need to emit DTP-relative relocations. */
3518 void
3519 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
3521 if (size != 8)
3522 abort ();
3523 fputs ("\tdata8.ua\t@dtprel(", file);
3524 output_addr_const (file, x);
3525 fputs (")", file);
3528 /* Print a memory address as an operand to reference that memory location. */
3530 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3531 also call this from ia64_print_operand for memory addresses. */
3533 void
3534 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
3535 rtx address ATTRIBUTE_UNUSED)
3539 /* Print an operand to an assembler instruction.
3540 C Swap and print a comparison operator.
3541 D Print an FP comparison operator.
3542 E Print 32 - constant, for SImode shifts as extract.
3543 e Print 64 - constant, for DImode rotates.
3544 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3545 a floating point register emitted normally.
3546 I Invert a predicate register by adding 1.
3547 J Select the proper predicate register for a condition.
3548 j Select the inverse predicate register for a condition.
3549 O Append .acq for volatile load.
3550 P Postincrement of a MEM.
3551 Q Append .rel for volatile store.
3552 S Shift amount for shladd instruction.
3553 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3554 for Intel assembler.
3555 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3556 for Intel assembler.
3557 r Print register name, or constant 0 as r0. HP compatibility for
3558 Linux kernel. */
3559 void
3560 ia64_print_operand (FILE * file, rtx x, int code)
3562 const char *str;
3564 switch (code)
3566 case 0:
3567 /* Handled below. */
3568 break;
3570 case 'C':
3572 enum rtx_code c = swap_condition (GET_CODE (x));
3573 fputs (GET_RTX_NAME (c), file);
3574 return;
3577 case 'D':
3578 switch (GET_CODE (x))
3580 case NE:
3581 str = "neq";
3582 break;
3583 case UNORDERED:
3584 str = "unord";
3585 break;
3586 case ORDERED:
3587 str = "ord";
3588 break;
3589 default:
3590 str = GET_RTX_NAME (GET_CODE (x));
3591 break;
3593 fputs (str, file);
3594 return;
3596 case 'E':
3597 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3598 return;
3600 case 'e':
3601 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3602 return;
3604 case 'F':
3605 if (x == CONST0_RTX (GET_MODE (x)))
3606 str = reg_names [FR_REG (0)];
3607 else if (x == CONST1_RTX (GET_MODE (x)))
3608 str = reg_names [FR_REG (1)];
3609 else if (GET_CODE (x) == REG)
3610 str = reg_names [REGNO (x)];
3611 else
3612 abort ();
3613 fputs (str, file);
3614 return;
3616 case 'I':
3617 fputs (reg_names [REGNO (x) + 1], file);
3618 return;
3620 case 'J':
3621 case 'j':
3623 unsigned int regno = REGNO (XEXP (x, 0));
3624 if (GET_CODE (x) == EQ)
3625 regno += 1;
3626 if (code == 'j')
3627 regno ^= 1;
3628 fputs (reg_names [regno], file);
3630 return;
3632 case 'O':
3633 if (MEM_VOLATILE_P (x))
3634 fputs(".acq", file);
3635 return;
3637 case 'P':
3639 HOST_WIDE_INT value;
3641 switch (GET_CODE (XEXP (x, 0)))
3643 default:
3644 return;
3646 case POST_MODIFY:
3647 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3648 if (GET_CODE (x) == CONST_INT)
3649 value = INTVAL (x);
3650 else if (GET_CODE (x) == REG)
3652 fprintf (file, ", %s", reg_names[REGNO (x)]);
3653 return;
3655 else
3656 abort ();
3657 break;
3659 case POST_INC:
3660 value = GET_MODE_SIZE (GET_MODE (x));
3661 break;
3663 case POST_DEC:
3664 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3665 break;
3668 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
3669 return;
3672 case 'Q':
3673 if (MEM_VOLATILE_P (x))
3674 fputs(".rel", file);
3675 return;
3677 case 'S':
3678 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3679 return;
3681 case 'T':
3682 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3684 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3685 return;
3687 break;
3689 case 'U':
3690 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3692 const char *prefix = "0x";
3693 if (INTVAL (x) & 0x80000000)
3695 fprintf (file, "0xffffffff");
3696 prefix = "";
3698 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3699 return;
3701 break;
3703 case 'r':
3704 /* If this operand is the constant zero, write it as register zero.
3705 Any register, zero, or CONST_INT value is OK here. */
3706 if (GET_CODE (x) == REG)
3707 fputs (reg_names[REGNO (x)], file);
3708 else if (x == CONST0_RTX (GET_MODE (x)))
3709 fputs ("r0", file);
3710 else if (GET_CODE (x) == CONST_INT)
3711 output_addr_const (file, x);
3712 else
3713 output_operand_lossage ("invalid %%r value");
3714 return;
3716 case '+':
3718 const char *which;
3720 /* For conditional branches, returns or calls, substitute
3721 sptk, dptk, dpnt, or spnt for %s. */
3722 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3723 if (x)
3725 int pred_val = INTVAL (XEXP (x, 0));
3727 /* Guess top and bottom 10% statically predicted. */
3728 if (pred_val < REG_BR_PROB_BASE / 50)
3729 which = ".spnt";
3730 else if (pred_val < REG_BR_PROB_BASE / 2)
3731 which = ".dpnt";
3732 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3733 which = ".dptk";
3734 else
3735 which = ".sptk";
3737 else if (GET_CODE (current_output_insn) == CALL_INSN)
3738 which = ".sptk";
3739 else
3740 which = ".dptk";
3742 fputs (which, file);
3743 return;
3746 case ',':
3747 x = current_insn_predicate;
3748 if (x)
3750 unsigned int regno = REGNO (XEXP (x, 0));
3751 if (GET_CODE (x) == EQ)
3752 regno += 1;
3753 fprintf (file, "(%s) ", reg_names [regno]);
3755 return;
3757 default:
3758 output_operand_lossage ("ia64_print_operand: unknown code");
3759 return;
3762 switch (GET_CODE (x))
3764 /* This happens for the spill/restore instructions. */
3765 case POST_INC:
3766 case POST_DEC:
3767 case POST_MODIFY:
3768 x = XEXP (x, 0);
3769 /* ... fall through ... */
3771 case REG:
3772 fputs (reg_names [REGNO (x)], file);
3773 break;
3775 case MEM:
3777 rtx addr = XEXP (x, 0);
3778 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
3779 addr = XEXP (addr, 0);
3780 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3781 break;
3784 default:
3785 output_addr_const (file, x);
3786 break;
3789 return;
3792 /* Compute a (partial) cost for rtx X. Return true if the complete
3793 cost has been computed, and false if subexpressions should be
3794 scanned. In either case, *TOTAL contains the cost result. */
3795 /* ??? This is incomplete. */
3797 static bool
3798 ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
3800 switch (code)
3802 case CONST_INT:
3803 switch (outer_code)
3805 case SET:
3806 *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
3807 return true;
3808 case PLUS:
3809 if (CONST_OK_FOR_I (INTVAL (x)))
3810 *total = 0;
3811 else if (CONST_OK_FOR_J (INTVAL (x)))
3812 *total = 1;
3813 else
3814 *total = COSTS_N_INSNS (1);
3815 return true;
3816 default:
3817 if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
3818 *total = 0;
3819 else
3820 *total = COSTS_N_INSNS (1);
3821 return true;
3824 case CONST_DOUBLE:
3825 *total = COSTS_N_INSNS (1);
3826 return true;
3828 case CONST:
3829 case SYMBOL_REF:
3830 case LABEL_REF:
3831 *total = COSTS_N_INSNS (3);
3832 return true;
3834 case MULT:
3835 /* For multiplies wider than HImode, we have to go to the FPU,
3836 which normally involves copies. Plus there's the latency
3837 of the multiply itself, and the latency of the instructions to
3838 transfer integer regs to FP regs. */
3839 /* ??? Check for FP mode. */
3840 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
3841 *total = COSTS_N_INSNS (10);
3842 else
3843 *total = COSTS_N_INSNS (2);
3844 return true;
3846 case PLUS:
3847 case MINUS:
3848 case ASHIFT:
3849 case ASHIFTRT:
3850 case LSHIFTRT:
3851 *total = COSTS_N_INSNS (1);
3852 return true;
3854 case DIV:
3855 case UDIV:
3856 case MOD:
3857 case UMOD:
3858 /* We make divide expensive, so that divide-by-constant will be
3859 optimized to a multiply. */
3860 *total = COSTS_N_INSNS (60);
3861 return true;
3863 default:
3864 return false;
3868 /* Calculate the cost of moving data from a register in class FROM to
3869 one in class TO, using MODE. */
3872 ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
3873 enum reg_class to)
3875 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
3876 if (to == ADDL_REGS)
3877 to = GR_REGS;
3878 if (from == ADDL_REGS)
3879 from = GR_REGS;
3881 /* All costs are symmetric, so reduce cases by putting the
3882 lower number class as the destination. */
3883 if (from < to)
3885 enum reg_class tmp = to;
3886 to = from, from = tmp;
3889 /* Moving from FR<->GR in XFmode must be more expensive than 2,
3890 so that we get secondary memory reloads. Between FR_REGS,
3891 we have to make this at least as expensive as MEMORY_MOVE_COST
3892 to avoid spectacularly poor register class preferencing. */
3893 if (mode == XFmode)
3895 if (to != GR_REGS || from != GR_REGS)
3896 return MEMORY_MOVE_COST (mode, to, 0);
3897 else
3898 return 3;
3901 switch (to)
3903 case PR_REGS:
3904 /* Moving between PR registers takes two insns. */
3905 if (from == PR_REGS)
3906 return 3;
3907 /* Moving between PR and anything but GR is impossible. */
3908 if (from != GR_REGS)
3909 return MEMORY_MOVE_COST (mode, to, 0);
3910 break;
3912 case BR_REGS:
3913 /* Moving between BR and anything but GR is impossible. */
3914 if (from != GR_REGS && from != GR_AND_BR_REGS)
3915 return MEMORY_MOVE_COST (mode, to, 0);
3916 break;
3918 case AR_I_REGS:
3919 case AR_M_REGS:
3920 /* Moving between AR and anything but GR is impossible. */
3921 if (from != GR_REGS)
3922 return MEMORY_MOVE_COST (mode, to, 0);
3923 break;
3925 case GR_REGS:
3926 case FR_REGS:
3927 case GR_AND_FR_REGS:
3928 case GR_AND_BR_REGS:
3929 case ALL_REGS:
3930 break;
3932 default:
3933 abort ();
3936 return 2;
3939 /* This function returns the register class required for a secondary
3940 register when copying between one of the registers in CLASS, and X,
3941 using MODE. A return value of NO_REGS means that no secondary register
3942 is required. */
3944 enum reg_class
3945 ia64_secondary_reload_class (enum reg_class class,
3946 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
3948 int regno = -1;
3950 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3951 regno = true_regnum (x);
3953 switch (class)
3955 case BR_REGS:
3956 case AR_M_REGS:
3957 case AR_I_REGS:
3958 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
3959 interaction. We end up with two pseudos with overlapping lifetimes
3960 both of which are equiv to the same constant, and both which need
3961 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
3962 changes depending on the path length, which means the qty_first_reg
3963 check in make_regs_eqv can give different answers at different times.
3964 At some point I'll probably need a reload_indi pattern to handle
3965 this.
3967 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
3968 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
3969 non-general registers for good measure. */
3970 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
3971 return GR_REGS;
3973 /* This is needed if a pseudo used as a call_operand gets spilled to a
3974 stack slot. */
3975 if (GET_CODE (x) == MEM)
3976 return GR_REGS;
3977 break;
3979 case FR_REGS:
3980 /* Need to go through general registers to get to other class regs. */
3981 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
3982 return GR_REGS;
3984 /* This can happen when a paradoxical subreg is an operand to the
3985 muldi3 pattern. */
3986 /* ??? This shouldn't be necessary after instruction scheduling is
3987 enabled, because paradoxical subregs are not accepted by
3988 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3989 stop the paradoxical subreg stupidity in the *_operand functions
3990 in recog.c. */
3991 if (GET_CODE (x) == MEM
3992 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3993 || GET_MODE (x) == QImode))
3994 return GR_REGS;
3996 /* This can happen because of the ior/and/etc patterns that accept FP
3997 registers as operands. If the third operand is a constant, then it
3998 needs to be reloaded into a FP register. */
3999 if (GET_CODE (x) == CONST_INT)
4000 return GR_REGS;
4002 /* This can happen because of register elimination in a muldi3 insn.
4003 E.g. `26107 * (unsigned long)&u'. */
4004 if (GET_CODE (x) == PLUS)
4005 return GR_REGS;
4006 break;
4008 case PR_REGS:
4009 /* ??? This happens if we cse/gcse a BImode value across a call,
4010 and the function has a nonlocal goto. This is because global
4011 does not allocate call crossing pseudos to hard registers when
4012 current_function_has_nonlocal_goto is true. This is relatively
4013 common for C++ programs that use exceptions. To reproduce,
4014 return NO_REGS and compile libstdc++. */
4015 if (GET_CODE (x) == MEM)
4016 return GR_REGS;
4018 /* This can happen when we take a BImode subreg of a DImode value,
4019 and that DImode value winds up in some non-GR register. */
4020 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4021 return GR_REGS;
4022 break;
4024 default:
4025 break;
4028 return NO_REGS;
4032 /* Emit text to declare externally defined variables and functions, because
4033 the Intel assembler does not support undefined externals. */
4035 void
4036 ia64_asm_output_external (FILE *file, tree decl, const char *name)
4038 int save_referenced;
4040 /* GNU as does not need anything here, but the HP linker does need
4041 something for external functions. */
4043 if (TARGET_GNU_AS
4044 && (!TARGET_HPUX_LD
4045 || TREE_CODE (decl) != FUNCTION_DECL
4046 || strstr (name, "__builtin_") == name))
4047 return;
4049 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4050 the linker when we do this, so we need to be careful not to do this for
4051 builtin functions which have no library equivalent. Unfortunately, we
4052 can't tell here whether or not a function will actually be called by
4053 expand_expr, so we pull in library functions even if we may not need
4054 them later. */
4055 if (! strcmp (name, "__builtin_next_arg")
4056 || ! strcmp (name, "alloca")
4057 || ! strcmp (name, "__builtin_constant_p")
4058 || ! strcmp (name, "__builtin_args_info"))
4059 return;
4061 if (TARGET_HPUX_LD)
4062 ia64_hpux_add_extern_decl (decl);
4063 else
4065 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4066 restore it. */
4067 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4068 if (TREE_CODE (decl) == FUNCTION_DECL)
4069 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4070 (*targetm.asm_out.globalize_label) (file, name);
4071 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4075 /* Parse the -mfixed-range= option string. */
4077 static void
4078 fix_range (const char *const_str)
4080 int i, first, last;
4081 char *str, *dash, *comma;
4083 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4084 REG2 are either register names or register numbers. The effect
4085 of this option is to mark the registers in the range from REG1 to
4086 REG2 as ``fixed'' so they won't be used by the compiler. This is
4087 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4089 i = strlen (const_str);
4090 str = (char *) alloca (i + 1);
4091 memcpy (str, const_str, i + 1);
4093 while (1)
4095 dash = strchr (str, '-');
4096 if (!dash)
4098 warning ("value of -mfixed-range must have form REG1-REG2");
4099 return;
4101 *dash = '\0';
4103 comma = strchr (dash + 1, ',');
4104 if (comma)
4105 *comma = '\0';
4107 first = decode_reg_name (str);
4108 if (first < 0)
4110 warning ("unknown register name: %s", str);
4111 return;
4114 last = decode_reg_name (dash + 1);
4115 if (last < 0)
4117 warning ("unknown register name: %s", dash + 1);
4118 return;
4121 *dash = '-';
4123 if (first > last)
4125 warning ("%s-%s is an empty range", str, dash + 1);
4126 return;
4129 for (i = first; i <= last; ++i)
4130 fixed_regs[i] = call_used_regs[i] = 1;
4132 if (!comma)
4133 break;
4135 *comma = ',';
4136 str = comma + 1;
4140 static struct machine_function *
4141 ia64_init_machine_status (void)
4143 return ggc_alloc_cleared (sizeof (struct machine_function));
4146 /* Handle TARGET_OPTIONS switches. */
4148 void
4149 ia64_override_options (void)
4151 static struct pta
4153 const char *const name; /* processor name or nickname. */
4154 const enum processor_type processor;
4156 const processor_alias_table[] =
4158 {"itanium", PROCESSOR_ITANIUM},
4159 {"itanium1", PROCESSOR_ITANIUM},
4160 {"merced", PROCESSOR_ITANIUM},
4161 {"itanium2", PROCESSOR_ITANIUM2},
4162 {"mckinley", PROCESSOR_ITANIUM2},
4165 int const pta_size = ARRAY_SIZE (processor_alias_table);
4166 int i;
4168 if (TARGET_AUTO_PIC)
4169 target_flags |= MASK_CONST_GP;
4171 if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
4173 if ((target_flags_explicit & MASK_INLINE_FLOAT_DIV_LAT)
4174 && (target_flags_explicit & MASK_INLINE_FLOAT_DIV_THR))
4176 warning ("cannot optimize floating point division for both latency and throughput");
4177 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4179 else
4181 if (target_flags_explicit & MASK_INLINE_FLOAT_DIV_THR)
4182 target_flags &= ~MASK_INLINE_FLOAT_DIV_LAT;
4183 else
4184 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4188 if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
4190 if ((target_flags_explicit & MASK_INLINE_INT_DIV_LAT)
4191 && (target_flags_explicit & MASK_INLINE_INT_DIV_THR))
4193 warning ("cannot optimize integer division for both latency and throughput");
4194 target_flags &= ~MASK_INLINE_INT_DIV_THR;
4196 else
4198 if (target_flags_explicit & MASK_INLINE_INT_DIV_THR)
4199 target_flags &= ~MASK_INLINE_INT_DIV_LAT;
4200 else
4201 target_flags &= ~MASK_INLINE_INT_DIV_THR;
4205 if (TARGET_INLINE_SQRT_LAT && TARGET_INLINE_SQRT_THR)
4207 if ((target_flags_explicit & MASK_INLINE_SQRT_LAT)
4208 && (target_flags_explicit & MASK_INLINE_SQRT_THR))
4210 warning ("cannot optimize square root for both latency and throughput");
4211 target_flags &= ~MASK_INLINE_SQRT_THR;
4213 else
4215 if (target_flags_explicit & MASK_INLINE_SQRT_THR)
4216 target_flags &= ~MASK_INLINE_SQRT_LAT;
4217 else
4218 target_flags &= ~MASK_INLINE_SQRT_THR;
4222 if (TARGET_INLINE_SQRT_LAT)
4224 warning ("not yet implemented: latency-optimized inline square root");
4225 target_flags &= ~MASK_INLINE_SQRT_LAT;
4228 if (ia64_fixed_range_string)
4229 fix_range (ia64_fixed_range_string);
4231 if (ia64_tls_size_string)
4233 char *end;
4234 unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4235 if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4236 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4237 else
4238 ia64_tls_size = tmp;
4241 if (!ia64_tune_string)
4242 ia64_tune_string = "itanium2";
4244 for (i = 0; i < pta_size; i++)
4245 if (! strcmp (ia64_tune_string, processor_alias_table[i].name))
4247 ia64_tune = processor_alias_table[i].processor;
4248 break;
4251 if (i == pta_size)
4252 error ("bad value (%s) for -tune= switch", ia64_tune_string);
4254 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4255 flag_schedule_insns_after_reload = 0;
4257 /* Variable tracking should be run after all optimizations which change order
4258 of insns. It also needs a valid CFG. */
4259 ia64_flag_var_tracking = flag_var_tracking;
4260 flag_var_tracking = 0;
4262 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4264 init_machine_status = ia64_init_machine_status;
4267 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
4268 static enum attr_type ia64_safe_type (rtx);
4270 static enum attr_itanium_class
4271 ia64_safe_itanium_class (rtx insn)
4273 if (recog_memoized (insn) >= 0)
4274 return get_attr_itanium_class (insn);
4275 else
4276 return ITANIUM_CLASS_UNKNOWN;
4279 static enum attr_type
4280 ia64_safe_type (rtx insn)
4282 if (recog_memoized (insn) >= 0)
4283 return get_attr_type (insn);
4284 else
4285 return TYPE_UNKNOWN;
4288 /* The following collection of routines emit instruction group stop bits as
4289 necessary to avoid dependencies. */
4291 /* Need to track some additional registers as far as serialization is
4292 concerned so we can properly handle br.call and br.ret. We could
4293 make these registers visible to gcc, but since these registers are
4294 never explicitly used in gcc generated code, it seems wasteful to
4295 do so (plus it would make the call and return patterns needlessly
4296 complex). */
4297 #define REG_RP (BR_REG (0))
4298 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4299 /* This is used for volatile asms which may require a stop bit immediately
4300 before and after them. */
4301 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4302 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4303 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4305 /* For each register, we keep track of how it has been written in the
4306 current instruction group.
4308 If a register is written unconditionally (no qualifying predicate),
4309 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4311 If a register is written if its qualifying predicate P is true, we
4312 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4313 may be written again by the complement of P (P^1) and when this happens,
4314 WRITE_COUNT gets set to 2.
4316 The result of this is that whenever an insn attempts to write a register
4317 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4319 If a predicate register is written by a floating-point insn, we set
4320 WRITTEN_BY_FP to true.
4322 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4323 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4325 struct reg_write_state
4327 unsigned int write_count : 2;
4328 unsigned int first_pred : 16;
4329 unsigned int written_by_fp : 1;
4330 unsigned int written_by_and : 1;
4331 unsigned int written_by_or : 1;
4334 /* Cumulative info for the current instruction group. */
4335 struct reg_write_state rws_sum[NUM_REGS];
4336 /* Info for the current instruction. This gets copied to rws_sum after a
4337 stop bit is emitted. */
4338 struct reg_write_state rws_insn[NUM_REGS];
4340 /* Indicates whether this is the first instruction after a stop bit,
4341 in which case we don't need another stop bit. Without this, we hit
4342 the abort in ia64_variable_issue when scheduling an alloc. */
4343 static int first_instruction;
4345 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4346 RTL for one instruction. */
4347 struct reg_flags
4349 unsigned int is_write : 1; /* Is register being written? */
4350 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4351 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4352 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4353 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4354 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4357 static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
4358 static int rws_access_regno (int, struct reg_flags, int);
4359 static int rws_access_reg (rtx, struct reg_flags, int);
4360 static void update_set_flags (rtx, struct reg_flags *, int *, rtx *);
4361 static int set_src_needs_barrier (rtx, struct reg_flags, int, rtx);
4362 static int rtx_needs_barrier (rtx, struct reg_flags, int);
4363 static void init_insn_group_barriers (void);
4364 static int group_barrier_needed_p (rtx);
4365 static int safe_group_barrier_needed_p (rtx);
4367 /* Update *RWS for REGNO, which is being written by the current instruction,
4368 with predicate PRED, and associated register flags in FLAGS. */
4370 static void
4371 rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
4373 if (pred)
4374 rws[regno].write_count++;
4375 else
4376 rws[regno].write_count = 2;
4377 rws[regno].written_by_fp |= flags.is_fp;
4378 /* ??? Not tracking and/or across differing predicates. */
4379 rws[regno].written_by_and = flags.is_and;
4380 rws[regno].written_by_or = flags.is_or;
4381 rws[regno].first_pred = pred;
4384 /* Handle an access to register REGNO of type FLAGS using predicate register
4385 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4386 a dependency with an earlier instruction in the same group. */
4388 static int
4389 rws_access_regno (int regno, struct reg_flags flags, int pred)
4391 int need_barrier = 0;
4393 if (regno >= NUM_REGS)
4394 abort ();
4396 if (! PR_REGNO_P (regno))
4397 flags.is_and = flags.is_or = 0;
4399 if (flags.is_write)
4401 int write_count;
4403 /* One insn writes same reg multiple times? */
4404 if (rws_insn[regno].write_count > 0)
4405 abort ();
4407 /* Update info for current instruction. */
4408 rws_update (rws_insn, regno, flags, pred);
4409 write_count = rws_sum[regno].write_count;
4411 switch (write_count)
4413 case 0:
4414 /* The register has not been written yet. */
4415 rws_update (rws_sum, regno, flags, pred);
4416 break;
4418 case 1:
4419 /* The register has been written via a predicate. If this is
4420 not a complementary predicate, then we need a barrier. */
4421 /* ??? This assumes that P and P+1 are always complementary
4422 predicates for P even. */
4423 if (flags.is_and && rws_sum[regno].written_by_and)
4425 else if (flags.is_or && rws_sum[regno].written_by_or)
4427 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4428 need_barrier = 1;
4429 rws_update (rws_sum, regno, flags, pred);
4430 break;
4432 case 2:
4433 /* The register has been unconditionally written already. We
4434 need a barrier. */
4435 if (flags.is_and && rws_sum[regno].written_by_and)
4437 else if (flags.is_or && rws_sum[regno].written_by_or)
4439 else
4440 need_barrier = 1;
4441 rws_sum[regno].written_by_and = flags.is_and;
4442 rws_sum[regno].written_by_or = flags.is_or;
4443 break;
4445 default:
4446 abort ();
4449 else
4451 if (flags.is_branch)
4453 /* Branches have several RAW exceptions that allow to avoid
4454 barriers. */
4456 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4457 /* RAW dependencies on branch regs are permissible as long
4458 as the writer is a non-branch instruction. Since we
4459 never generate code that uses a branch register written
4460 by a branch instruction, handling this case is
4461 easy. */
4462 return 0;
4464 if (REGNO_REG_CLASS (regno) == PR_REGS
4465 && ! rws_sum[regno].written_by_fp)
4466 /* The predicates of a branch are available within the
4467 same insn group as long as the predicate was written by
4468 something other than a floating-point instruction. */
4469 return 0;
4472 if (flags.is_and && rws_sum[regno].written_by_and)
4473 return 0;
4474 if (flags.is_or && rws_sum[regno].written_by_or)
4475 return 0;
4477 switch (rws_sum[regno].write_count)
4479 case 0:
4480 /* The register has not been written yet. */
4481 break;
4483 case 1:
4484 /* The register has been written via a predicate. If this is
4485 not a complementary predicate, then we need a barrier. */
4486 /* ??? This assumes that P and P+1 are always complementary
4487 predicates for P even. */
4488 if ((rws_sum[regno].first_pred ^ 1) != pred)
4489 need_barrier = 1;
4490 break;
4492 case 2:
4493 /* The register has been unconditionally written already. We
4494 need a barrier. */
4495 need_barrier = 1;
4496 break;
4498 default:
4499 abort ();
4503 return need_barrier;
4506 static int
4507 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
4509 int regno = REGNO (reg);
4510 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4512 if (n == 1)
4513 return rws_access_regno (regno, flags, pred);
4514 else
4516 int need_barrier = 0;
4517 while (--n >= 0)
4518 need_barrier |= rws_access_regno (regno + n, flags, pred);
4519 return need_barrier;
4523 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4524 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4526 static void
4527 update_set_flags (rtx x, struct reg_flags *pflags, int *ppred, rtx *pcond)
4529 rtx src = SET_SRC (x);
4531 *pcond = 0;
4533 switch (GET_CODE (src))
4535 case CALL:
4536 return;
4538 case IF_THEN_ELSE:
4539 if (SET_DEST (x) == pc_rtx)
4540 /* X is a conditional branch. */
4541 return;
4542 else
4544 int is_complemented = 0;
4546 /* X is a conditional move. */
4547 rtx cond = XEXP (src, 0);
4548 if (GET_CODE (cond) == EQ)
4549 is_complemented = 1;
4550 cond = XEXP (cond, 0);
4551 if (GET_CODE (cond) != REG
4552 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4553 abort ();
4554 *pcond = cond;
4555 if (XEXP (src, 1) == SET_DEST (x)
4556 || XEXP (src, 2) == SET_DEST (x))
4558 /* X is a conditional move that conditionally writes the
4559 destination. */
4561 /* We need another complement in this case. */
4562 if (XEXP (src, 1) == SET_DEST (x))
4563 is_complemented = ! is_complemented;
4565 *ppred = REGNO (cond);
4566 if (is_complemented)
4567 ++*ppred;
4570 /* ??? If this is a conditional write to the dest, then this
4571 instruction does not actually read one source. This probably
4572 doesn't matter, because that source is also the dest. */
4573 /* ??? Multiple writes to predicate registers are allowed
4574 if they are all AND type compares, or if they are all OR
4575 type compares. We do not generate such instructions
4576 currently. */
4578 /* ... fall through ... */
4580 default:
4581 if (COMPARISON_P (src)
4582 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4583 /* Set pflags->is_fp to 1 so that we know we're dealing
4584 with a floating point comparison when processing the
4585 destination of the SET. */
4586 pflags->is_fp = 1;
4588 /* Discover if this is a parallel comparison. We only handle
4589 and.orcm and or.andcm at present, since we must retain a
4590 strict inverse on the predicate pair. */
4591 else if (GET_CODE (src) == AND)
4592 pflags->is_and = 1;
4593 else if (GET_CODE (src) == IOR)
4594 pflags->is_or = 1;
4596 break;
4600 /* Subroutine of rtx_needs_barrier; this function determines whether the
4601 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4602 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4603 for this insn. */
4605 static int
4606 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred, rtx cond)
4608 int need_barrier = 0;
4609 rtx dst;
4610 rtx src = SET_SRC (x);
4612 if (GET_CODE (src) == CALL)
4613 /* We don't need to worry about the result registers that
4614 get written by subroutine call. */
4615 return rtx_needs_barrier (src, flags, pred);
4616 else if (SET_DEST (x) == pc_rtx)
4618 /* X is a conditional branch. */
4619 /* ??? This seems redundant, as the caller sets this bit for
4620 all JUMP_INSNs. */
4621 flags.is_branch = 1;
4622 return rtx_needs_barrier (src, flags, pred);
4625 need_barrier = rtx_needs_barrier (src, flags, pred);
4627 /* This instruction unconditionally uses a predicate register. */
4628 if (cond)
4629 need_barrier |= rws_access_reg (cond, flags, 0);
4631 dst = SET_DEST (x);
4632 if (GET_CODE (dst) == ZERO_EXTRACT)
4634 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4635 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4636 dst = XEXP (dst, 0);
4638 return need_barrier;
4641 /* Handle an access to rtx X of type FLAGS using predicate register
4642 PRED. Return 1 if this access creates a dependency with an earlier
4643 instruction in the same group. */
4645 static int
4646 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
4648 int i, j;
4649 int is_complemented = 0;
4650 int need_barrier = 0;
4651 const char *format_ptr;
4652 struct reg_flags new_flags;
4653 rtx cond = 0;
4655 if (! x)
4656 return 0;
4658 new_flags = flags;
4660 switch (GET_CODE (x))
4662 case SET:
4663 update_set_flags (x, &new_flags, &pred, &cond);
4664 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4665 if (GET_CODE (SET_SRC (x)) != CALL)
4667 new_flags.is_write = 1;
4668 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4670 break;
4672 case CALL:
4673 new_flags.is_write = 0;
4674 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4676 /* Avoid multiple register writes, in case this is a pattern with
4677 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4678 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4680 new_flags.is_write = 1;
4681 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4682 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4683 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4685 break;
4687 case COND_EXEC:
4688 /* X is a predicated instruction. */
4690 cond = COND_EXEC_TEST (x);
4691 if (pred)
4692 abort ();
4693 need_barrier = rtx_needs_barrier (cond, flags, 0);
4695 if (GET_CODE (cond) == EQ)
4696 is_complemented = 1;
4697 cond = XEXP (cond, 0);
4698 if (GET_CODE (cond) != REG
4699 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4700 abort ();
4701 pred = REGNO (cond);
4702 if (is_complemented)
4703 ++pred;
4705 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4706 return need_barrier;
4708 case CLOBBER:
4709 case USE:
4710 /* Clobber & use are for earlier compiler-phases only. */
4711 break;
4713 case ASM_OPERANDS:
4714 case ASM_INPUT:
4715 /* We always emit stop bits for traditional asms. We emit stop bits
4716 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4717 if (GET_CODE (x) != ASM_OPERANDS
4718 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4720 /* Avoid writing the register multiple times if we have multiple
4721 asm outputs. This avoids an abort in rws_access_reg. */
4722 if (! rws_insn[REG_VOLATILE].write_count)
4724 new_flags.is_write = 1;
4725 rws_access_regno (REG_VOLATILE, new_flags, pred);
4727 return 1;
4730 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4731 We cannot just fall through here since then we would be confused
4732 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4733 traditional asms unlike their normal usage. */
4735 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4736 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4737 need_barrier = 1;
4738 break;
4740 case PARALLEL:
4741 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4743 rtx pat = XVECEXP (x, 0, i);
4744 if (GET_CODE (pat) == SET)
4746 update_set_flags (pat, &new_flags, &pred, &cond);
4747 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4749 else if (GET_CODE (pat) == USE
4750 || GET_CODE (pat) == CALL
4751 || GET_CODE (pat) == ASM_OPERANDS)
4752 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4753 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4754 abort ();
4756 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4758 rtx pat = XVECEXP (x, 0, i);
4759 if (GET_CODE (pat) == SET)
4761 if (GET_CODE (SET_SRC (pat)) != CALL)
4763 new_flags.is_write = 1;
4764 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4765 pred);
4768 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
4769 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4771 break;
4773 case SUBREG:
4774 x = SUBREG_REG (x);
4775 /* FALLTHRU */
4776 case REG:
4777 if (REGNO (x) == AR_UNAT_REGNUM)
4779 for (i = 0; i < 64; ++i)
4780 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4782 else
4783 need_barrier = rws_access_reg (x, flags, pred);
4784 break;
4786 case MEM:
4787 /* Find the regs used in memory address computation. */
4788 new_flags.is_write = 0;
4789 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4790 break;
4792 case CONST_INT: case CONST_DOUBLE:
4793 case SYMBOL_REF: case LABEL_REF: case CONST:
4794 break;
4796 /* Operators with side-effects. */
4797 case POST_INC: case POST_DEC:
4798 if (GET_CODE (XEXP (x, 0)) != REG)
4799 abort ();
4801 new_flags.is_write = 0;
4802 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4803 new_flags.is_write = 1;
4804 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4805 break;
4807 case POST_MODIFY:
4808 if (GET_CODE (XEXP (x, 0)) != REG)
4809 abort ();
4811 new_flags.is_write = 0;
4812 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4813 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4814 new_flags.is_write = 1;
4815 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4816 break;
4818 /* Handle common unary and binary ops for efficiency. */
4819 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4820 case MOD: case UDIV: case UMOD: case AND: case IOR:
4821 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4822 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4823 case NE: case EQ: case GE: case GT: case LE:
4824 case LT: case GEU: case GTU: case LEU: case LTU:
4825 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4826 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4827 break;
4829 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4830 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4831 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4832 case SQRT: case FFS: case POPCOUNT:
4833 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4834 break;
4836 case UNSPEC:
4837 switch (XINT (x, 1))
4839 case UNSPEC_LTOFF_DTPMOD:
4840 case UNSPEC_LTOFF_DTPREL:
4841 case UNSPEC_DTPREL:
4842 case UNSPEC_LTOFF_TPREL:
4843 case UNSPEC_TPREL:
4844 case UNSPEC_PRED_REL_MUTEX:
4845 case UNSPEC_PIC_CALL:
4846 case UNSPEC_MF:
4847 case UNSPEC_FETCHADD_ACQ:
4848 case UNSPEC_BSP_VALUE:
4849 case UNSPEC_FLUSHRS:
4850 case UNSPEC_BUNDLE_SELECTOR:
4851 break;
4853 case UNSPEC_GR_SPILL:
4854 case UNSPEC_GR_RESTORE:
4856 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4857 HOST_WIDE_INT bit = (offset >> 3) & 63;
4859 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4860 new_flags.is_write = (XINT (x, 1) == 1);
4861 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4862 new_flags, pred);
4863 break;
4866 case UNSPEC_FR_SPILL:
4867 case UNSPEC_FR_RESTORE:
4868 case UNSPEC_GETF_EXP:
4869 case UNSPEC_SETF_EXP:
4870 case UNSPEC_ADDP4:
4871 case UNSPEC_FR_SQRT_RECIP_APPROX:
4872 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4873 break;
4875 case UNSPEC_FR_RECIP_APPROX:
4876 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4877 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4878 break;
4880 case UNSPEC_CMPXCHG_ACQ:
4881 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4882 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4883 break;
4885 default:
4886 abort ();
4888 break;
4890 case UNSPEC_VOLATILE:
4891 switch (XINT (x, 1))
4893 case UNSPECV_ALLOC:
4894 /* Alloc must always be the first instruction of a group.
4895 We force this by always returning true. */
4896 /* ??? We might get better scheduling if we explicitly check for
4897 input/local/output register dependencies, and modify the
4898 scheduler so that alloc is always reordered to the start of
4899 the current group. We could then eliminate all of the
4900 first_instruction code. */
4901 rws_access_regno (AR_PFS_REGNUM, flags, pred);
4903 new_flags.is_write = 1;
4904 rws_access_regno (REG_AR_CFM, new_flags, pred);
4905 return 1;
4907 case UNSPECV_SET_BSP:
4908 need_barrier = 1;
4909 break;
4911 case UNSPECV_BLOCKAGE:
4912 case UNSPECV_INSN_GROUP_BARRIER:
4913 case UNSPECV_BREAK:
4914 case UNSPECV_PSAC_ALL:
4915 case UNSPECV_PSAC_NORMAL:
4916 return 0;
4918 default:
4919 abort ();
4921 break;
4923 case RETURN:
4924 new_flags.is_write = 0;
4925 need_barrier = rws_access_regno (REG_RP, flags, pred);
4926 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
4928 new_flags.is_write = 1;
4929 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4930 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4931 break;
4933 default:
4934 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4935 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4936 switch (format_ptr[i])
4938 case '0': /* unused field */
4939 case 'i': /* integer */
4940 case 'n': /* note */
4941 case 'w': /* wide integer */
4942 case 's': /* pointer to string */
4943 case 'S': /* optional pointer to string */
4944 break;
4946 case 'e':
4947 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4948 need_barrier = 1;
4949 break;
4951 case 'E':
4952 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4953 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4954 need_barrier = 1;
4955 break;
4957 default:
4958 abort ();
4960 break;
4962 return need_barrier;
4965 /* Clear out the state for group_barrier_needed_p at the start of a
4966 sequence of insns. */
4968 static void
4969 init_insn_group_barriers (void)
4971 memset (rws_sum, 0, sizeof (rws_sum));
4972 first_instruction = 1;
4975 /* Given the current state, recorded by previous calls to this function,
4976 determine whether a group barrier (a stop bit) is necessary before INSN.
4977 Return nonzero if so. */
4979 static int
4980 group_barrier_needed_p (rtx insn)
4982 rtx pat;
4983 int need_barrier = 0;
4984 struct reg_flags flags;
4986 memset (&flags, 0, sizeof (flags));
4987 switch (GET_CODE (insn))
4989 case NOTE:
4990 break;
4992 case BARRIER:
4993 /* A barrier doesn't imply an instruction group boundary. */
4994 break;
4996 case CODE_LABEL:
4997 memset (rws_insn, 0, sizeof (rws_insn));
4998 return 1;
5000 case CALL_INSN:
5001 flags.is_branch = 1;
5002 flags.is_sibcall = SIBLING_CALL_P (insn);
5003 memset (rws_insn, 0, sizeof (rws_insn));
5005 /* Don't bundle a call following another call. */
5006 if ((pat = prev_active_insn (insn))
5007 && GET_CODE (pat) == CALL_INSN)
5009 need_barrier = 1;
5010 break;
5013 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5014 break;
5016 case JUMP_INSN:
5017 flags.is_branch = 1;
5019 /* Don't bundle a jump following a call. */
5020 if ((pat = prev_active_insn (insn))
5021 && GET_CODE (pat) == CALL_INSN)
5023 need_barrier = 1;
5024 break;
5026 /* FALLTHRU */
5028 case INSN:
5029 if (GET_CODE (PATTERN (insn)) == USE
5030 || GET_CODE (PATTERN (insn)) == CLOBBER)
5031 /* Don't care about USE and CLOBBER "insns"---those are used to
5032 indicate to the optimizer that it shouldn't get rid of
5033 certain operations. */
5034 break;
5036 pat = PATTERN (insn);
5038 /* Ug. Hack hacks hacked elsewhere. */
5039 switch (recog_memoized (insn))
5041 /* We play dependency tricks with the epilogue in order
5042 to get proper schedules. Undo this for dv analysis. */
5043 case CODE_FOR_epilogue_deallocate_stack:
5044 case CODE_FOR_prologue_allocate_stack:
5045 pat = XVECEXP (pat, 0, 0);
5046 break;
5048 /* The pattern we use for br.cloop confuses the code above.
5049 The second element of the vector is representative. */
5050 case CODE_FOR_doloop_end_internal:
5051 pat = XVECEXP (pat, 0, 1);
5052 break;
5054 /* Doesn't generate code. */
5055 case CODE_FOR_pred_rel_mutex:
5056 case CODE_FOR_prologue_use:
5057 return 0;
5059 default:
5060 break;
5063 memset (rws_insn, 0, sizeof (rws_insn));
5064 need_barrier = rtx_needs_barrier (pat, flags, 0);
5066 /* Check to see if the previous instruction was a volatile
5067 asm. */
5068 if (! need_barrier)
5069 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5070 break;
5072 default:
5073 abort ();
5076 if (first_instruction && INSN_P (insn)
5077 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5078 && GET_CODE (PATTERN (insn)) != USE
5079 && GET_CODE (PATTERN (insn)) != CLOBBER)
5081 need_barrier = 0;
5082 first_instruction = 0;
5085 return need_barrier;
5088 /* Like group_barrier_needed_p, but do not clobber the current state. */
5090 static int
5091 safe_group_barrier_needed_p (rtx insn)
5093 struct reg_write_state rws_saved[NUM_REGS];
5094 int saved_first_instruction;
5095 int t;
5097 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5098 saved_first_instruction = first_instruction;
5100 t = group_barrier_needed_p (insn);
5102 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5103 first_instruction = saved_first_instruction;
5105 return t;
5108 /* Scan the current function and insert stop bits as necessary to
5109 eliminate dependencies. This function assumes that a final
5110 instruction scheduling pass has been run which has already
5111 inserted most of the necessary stop bits. This function only
5112 inserts new ones at basic block boundaries, since these are
5113 invisible to the scheduler. */
5115 static void
5116 emit_insn_group_barriers (FILE *dump)
5118 rtx insn;
5119 rtx last_label = 0;
5120 int insns_since_last_label = 0;
5122 init_insn_group_barriers ();
5124 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5126 if (GET_CODE (insn) == CODE_LABEL)
5128 if (insns_since_last_label)
5129 last_label = insn;
5130 insns_since_last_label = 0;
5132 else if (GET_CODE (insn) == NOTE
5133 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5135 if (insns_since_last_label)
5136 last_label = insn;
5137 insns_since_last_label = 0;
5139 else if (GET_CODE (insn) == INSN
5140 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5141 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5143 init_insn_group_barriers ();
5144 last_label = 0;
5146 else if (INSN_P (insn))
5148 insns_since_last_label = 1;
5150 if (group_barrier_needed_p (insn))
5152 if (last_label)
5154 if (dump)
5155 fprintf (dump, "Emitting stop before label %d\n",
5156 INSN_UID (last_label));
5157 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5158 insn = last_label;
5160 init_insn_group_barriers ();
5161 last_label = 0;
5168 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5169 This function has to emit all necessary group barriers. */
5171 static void
5172 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
5174 rtx insn;
5176 init_insn_group_barriers ();
5178 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5180 if (GET_CODE (insn) == BARRIER)
5182 rtx last = prev_active_insn (insn);
5184 if (! last)
5185 continue;
5186 if (GET_CODE (last) == JUMP_INSN
5187 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5188 last = prev_active_insn (last);
5189 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5190 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5192 init_insn_group_barriers ();
5194 else if (INSN_P (insn))
5196 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5197 init_insn_group_barriers ();
5198 else if (group_barrier_needed_p (insn))
5200 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5201 init_insn_group_barriers ();
5202 group_barrier_needed_p (insn);
5209 static int errata_find_address_regs (rtx *, void *);
5210 static void errata_emit_nops (rtx);
5211 static void fixup_errata (void);
5213 /* This structure is used to track some details about the previous insns
5214 groups so we can determine if it may be necessary to insert NOPs to
5215 workaround hardware errata. */
5216 static struct group
5218 HARD_REG_SET p_reg_set;
5219 HARD_REG_SET gr_reg_conditionally_set;
5220 } last_group[2];
5222 /* Index into the last_group array. */
5223 static int group_idx;
5225 /* Called through for_each_rtx; determines if a hard register that was
5226 conditionally set in the previous group is used as an address register.
5227 It ensures that for_each_rtx returns 1 in that case. */
5228 static int
5229 errata_find_address_regs (rtx *xp, void *data ATTRIBUTE_UNUSED)
5231 rtx x = *xp;
5232 if (GET_CODE (x) != MEM)
5233 return 0;
5234 x = XEXP (x, 0);
5235 if (GET_CODE (x) == POST_MODIFY)
5236 x = XEXP (x, 0);
5237 if (GET_CODE (x) == REG)
5239 struct group *prev_group = last_group + (group_idx ^ 1);
5240 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5241 REGNO (x)))
5242 return 1;
5243 return -1;
5245 return 0;
5248 /* Called for each insn; this function keeps track of the state in
5249 last_group and emits additional NOPs if necessary to work around
5250 an Itanium A/B step erratum. */
5251 static void
5252 errata_emit_nops (rtx insn)
5254 struct group *this_group = last_group + group_idx;
5255 struct group *prev_group = last_group + (group_idx ^ 1);
5256 rtx pat = PATTERN (insn);
5257 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5258 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5259 enum attr_type type;
5260 rtx set = real_pat;
5262 if (GET_CODE (real_pat) == USE
5263 || GET_CODE (real_pat) == CLOBBER
5264 || GET_CODE (real_pat) == ASM_INPUT
5265 || GET_CODE (real_pat) == ADDR_VEC
5266 || GET_CODE (real_pat) == ADDR_DIFF_VEC
5267 || asm_noperands (PATTERN (insn)) >= 0)
5268 return;
5270 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5271 parts of it. */
5273 if (GET_CODE (set) == PARALLEL)
5275 int i;
5276 set = XVECEXP (real_pat, 0, 0);
5277 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5278 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5279 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5281 set = 0;
5282 break;
5286 if (set && GET_CODE (set) != SET)
5287 set = 0;
5289 type = get_attr_type (insn);
5291 if (type == TYPE_F
5292 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5293 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5295 if ((type == TYPE_M || type == TYPE_A) && cond && set
5296 && REG_P (SET_DEST (set))
5297 && GET_CODE (SET_SRC (set)) != PLUS
5298 && GET_CODE (SET_SRC (set)) != MINUS
5299 && (GET_CODE (SET_SRC (set)) != ASHIFT
5300 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5301 && (GET_CODE (SET_SRC (set)) != MEM
5302 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5303 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5305 if (!COMPARISON_P (cond)
5306 || !REG_P (XEXP (cond, 0)))
5307 abort ();
5309 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5310 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5312 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5314 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5315 emit_insn_before (gen_nop (), insn);
5316 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5317 group_idx = 0;
5318 memset (last_group, 0, sizeof last_group);
5322 /* Emit extra nops if they are required to work around hardware errata. */
5324 static void
5325 fixup_errata (void)
5327 rtx insn;
5329 if (! TARGET_B_STEP)
5330 return;
5332 group_idx = 0;
5333 memset (last_group, 0, sizeof last_group);
5335 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5337 if (!INSN_P (insn))
5338 continue;
5340 if (ia64_safe_type (insn) == TYPE_S)
5342 group_idx ^= 1;
5343 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5345 else
5346 errata_emit_nops (insn);
5351 /* Instruction scheduling support. */
5353 #define NR_BUNDLES 10
5355 /* A list of names of all available bundles. */
5357 static const char *bundle_name [NR_BUNDLES] =
5359 ".mii",
5360 ".mmi",
5361 ".mfi",
5362 ".mmf",
5363 #if NR_BUNDLES == 10
5364 ".bbb",
5365 ".mbb",
5366 #endif
5367 ".mib",
5368 ".mmb",
5369 ".mfb",
5370 ".mlx"
5373 /* Nonzero if we should insert stop bits into the schedule. */
5375 int ia64_final_schedule = 0;
5377 /* Codes of the corresponding quieryied units: */
5379 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
5380 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
5382 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
5383 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
5385 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
5387 /* The following variable value is an insn group barrier. */
5389 static rtx dfa_stop_insn;
5391 /* The following variable value is the last issued insn. */
5393 static rtx last_scheduled_insn;
5395 /* The following variable value is size of the DFA state. */
5397 static size_t dfa_state_size;
5399 /* The following variable value is pointer to a DFA state used as
5400 temporary variable. */
5402 static state_t temp_dfa_state = NULL;
5404 /* The following variable value is DFA state after issuing the last
5405 insn. */
5407 static state_t prev_cycle_state = NULL;
5409 /* The following array element values are TRUE if the corresponding
5410 insn requires to add stop bits before it. */
5412 static char *stops_p;
5414 /* The following variable is used to set up the mentioned above array. */
5416 static int stop_before_p = 0;
5418 /* The following variable value is length of the arrays `clocks' and
5419 `add_cycles'. */
5421 static int clocks_length;
5423 /* The following array element values are cycles on which the
5424 corresponding insn will be issued. The array is used only for
5425 Itanium1. */
5427 static int *clocks;
5429 /* The following array element values are numbers of cycles should be
5430 added to improve insn scheduling for MM_insns for Itanium1. */
5432 static int *add_cycles;
5434 static rtx ia64_single_set (rtx);
5435 static void ia64_emit_insn_before (rtx, rtx);
5437 /* Map a bundle number to its pseudo-op. */
5439 const char *
5440 get_bundle_name (int b)
5442 return bundle_name[b];
5446 /* Return the maximum number of instructions a cpu can issue. */
5448 static int
5449 ia64_issue_rate (void)
5451 return 6;
5454 /* Helper function - like single_set, but look inside COND_EXEC. */
5456 static rtx
5457 ia64_single_set (rtx insn)
5459 rtx x = PATTERN (insn), ret;
5460 if (GET_CODE (x) == COND_EXEC)
5461 x = COND_EXEC_CODE (x);
5462 if (GET_CODE (x) == SET)
5463 return x;
5465 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5466 Although they are not classical single set, the second set is there just
5467 to protect it from moving past FP-relative stack accesses. */
5468 switch (recog_memoized (insn))
5470 case CODE_FOR_prologue_allocate_stack:
5471 case CODE_FOR_epilogue_deallocate_stack:
5472 ret = XVECEXP (x, 0, 0);
5473 break;
5475 default:
5476 ret = single_set_2 (insn, x);
5477 break;
5480 return ret;
5483 /* Adjust the cost of a scheduling dependency. Return the new cost of
5484 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5486 static int
5487 ia64_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
5489 enum attr_itanium_class dep_class;
5490 enum attr_itanium_class insn_class;
5492 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
5493 return cost;
5495 insn_class = ia64_safe_itanium_class (insn);
5496 dep_class = ia64_safe_itanium_class (dep_insn);
5497 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
5498 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
5499 return 0;
5501 return cost;
5504 /* Like emit_insn_before, but skip cycle_display notes.
5505 ??? When cycle display notes are implemented, update this. */
5507 static void
5508 ia64_emit_insn_before (rtx insn, rtx before)
5510 emit_insn_before (insn, before);
5513 /* The following function marks insns who produce addresses for load
5514 and store insns. Such insns will be placed into M slots because it
5515 decrease latency time for Itanium1 (see function
5516 `ia64_produce_address_p' and the DFA descriptions). */
5518 static void
5519 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
5521 rtx insn, link, next, next_tail;
5523 next_tail = NEXT_INSN (tail);
5524 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5525 if (INSN_P (insn))
5526 insn->call = 0;
5527 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5528 if (INSN_P (insn)
5529 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
5531 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
5533 next = XEXP (link, 0);
5534 if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
5535 || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
5536 && ia64_st_address_bypass_p (insn, next))
5537 break;
5538 else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
5539 || ia64_safe_itanium_class (next)
5540 == ITANIUM_CLASS_FLD)
5541 && ia64_ld_address_bypass_p (insn, next))
5542 break;
5544 insn->call = link != 0;
5548 /* We're beginning a new block. Initialize data structures as necessary. */
5550 static void
5551 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
5552 int sched_verbose ATTRIBUTE_UNUSED,
5553 int max_ready ATTRIBUTE_UNUSED)
5555 #ifdef ENABLE_CHECKING
5556 rtx insn;
5558 if (reload_completed)
5559 for (insn = NEXT_INSN (current_sched_info->prev_head);
5560 insn != current_sched_info->next_tail;
5561 insn = NEXT_INSN (insn))
5562 if (SCHED_GROUP_P (insn))
5563 abort ();
5564 #endif
5565 last_scheduled_insn = NULL_RTX;
5566 init_insn_group_barriers ();
5569 /* We are about to being issuing insns for this clock cycle.
5570 Override the default sort algorithm to better slot instructions. */
5572 static int
5573 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
5574 int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
5575 int reorder_type)
5577 int n_asms;
5578 int n_ready = *pn_ready;
5579 rtx *e_ready = ready + n_ready;
5580 rtx *insnp;
5582 if (sched_verbose)
5583 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
5585 if (reorder_type == 0)
5587 /* First, move all USEs, CLOBBERs and other crud out of the way. */
5588 n_asms = 0;
5589 for (insnp = ready; insnp < e_ready; insnp++)
5590 if (insnp < e_ready)
5592 rtx insn = *insnp;
5593 enum attr_type t = ia64_safe_type (insn);
5594 if (t == TYPE_UNKNOWN)
5596 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
5597 || asm_noperands (PATTERN (insn)) >= 0)
5599 rtx lowest = ready[n_asms];
5600 ready[n_asms] = insn;
5601 *insnp = lowest;
5602 n_asms++;
5604 else
5606 rtx highest = ready[n_ready - 1];
5607 ready[n_ready - 1] = insn;
5608 *insnp = highest;
5609 return 1;
5614 if (n_asms < n_ready)
5616 /* Some normal insns to process. Skip the asms. */
5617 ready += n_asms;
5618 n_ready -= n_asms;
5620 else if (n_ready > 0)
5621 return 1;
5624 if (ia64_final_schedule)
5626 int deleted = 0;
5627 int nr_need_stop = 0;
5629 for (insnp = ready; insnp < e_ready; insnp++)
5630 if (safe_group_barrier_needed_p (*insnp))
5631 nr_need_stop++;
5633 if (reorder_type == 1 && n_ready == nr_need_stop)
5634 return 0;
5635 if (reorder_type == 0)
5636 return 1;
5637 insnp = e_ready;
5638 /* Move down everything that needs a stop bit, preserving
5639 relative order. */
5640 while (insnp-- > ready + deleted)
5641 while (insnp >= ready + deleted)
5643 rtx insn = *insnp;
5644 if (! safe_group_barrier_needed_p (insn))
5645 break;
5646 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
5647 *ready = insn;
5648 deleted++;
5650 n_ready -= deleted;
5651 ready += deleted;
5654 return 1;
5657 /* We are about to being issuing insns for this clock cycle. Override
5658 the default sort algorithm to better slot instructions. */
5660 static int
5661 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
5662 int clock_var)
5664 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
5665 pn_ready, clock_var, 0);
5668 /* Like ia64_sched_reorder, but called after issuing each insn.
5669 Override the default sort algorithm to better slot instructions. */
5671 static int
5672 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
5673 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
5674 int *pn_ready, int clock_var)
5676 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
5677 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
5678 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
5679 clock_var, 1);
5682 /* We are about to issue INSN. Return the number of insns left on the
5683 ready queue that can be issued this cycle. */
5685 static int
5686 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
5687 int sched_verbose ATTRIBUTE_UNUSED,
5688 rtx insn ATTRIBUTE_UNUSED,
5689 int can_issue_more ATTRIBUTE_UNUSED)
5691 last_scheduled_insn = insn;
5692 memcpy (prev_cycle_state, curr_state, dfa_state_size);
5693 if (reload_completed)
5695 if (group_barrier_needed_p (insn))
5696 abort ();
5697 if (GET_CODE (insn) == CALL_INSN)
5698 init_insn_group_barriers ();
5699 stops_p [INSN_UID (insn)] = stop_before_p;
5700 stop_before_p = 0;
5702 return 1;
5705 /* We are choosing insn from the ready queue. Return nonzero if INSN
5706 can be chosen. */
5708 static int
5709 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
5711 if (insn == NULL_RTX || !INSN_P (insn))
5712 abort ();
5713 return (!reload_completed
5714 || !safe_group_barrier_needed_p (insn));
5717 /* The following variable value is pseudo-insn used by the DFA insn
5718 scheduler to change the DFA state when the simulated clock is
5719 increased. */
5721 static rtx dfa_pre_cycle_insn;
5723 /* We are about to being issuing INSN. Return nonzero if we cannot
5724 issue it on given cycle CLOCK and return zero if we should not sort
5725 the ready queue on the next clock start. */
5727 static int
5728 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
5729 int clock, int *sort_p)
5731 int setup_clocks_p = FALSE;
5733 if (insn == NULL_RTX || !INSN_P (insn))
5734 abort ();
5735 if ((reload_completed && safe_group_barrier_needed_p (insn))
5736 || (last_scheduled_insn
5737 && (GET_CODE (last_scheduled_insn) == CALL_INSN
5738 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
5739 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
5741 init_insn_group_barriers ();
5742 if (verbose && dump)
5743 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
5744 last_clock == clock ? " + cycle advance" : "");
5745 stop_before_p = 1;
5746 if (last_clock == clock)
5748 state_transition (curr_state, dfa_stop_insn);
5749 if (TARGET_EARLY_STOP_BITS)
5750 *sort_p = (last_scheduled_insn == NULL_RTX
5751 || GET_CODE (last_scheduled_insn) != CALL_INSN);
5752 else
5753 *sort_p = 0;
5754 return 1;
5756 else if (reload_completed)
5757 setup_clocks_p = TRUE;
5758 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
5759 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
5760 state_reset (curr_state);
5761 else
5763 memcpy (curr_state, prev_cycle_state, dfa_state_size);
5764 state_transition (curr_state, dfa_stop_insn);
5765 state_transition (curr_state, dfa_pre_cycle_insn);
5766 state_transition (curr_state, NULL);
5769 else if (reload_completed)
5770 setup_clocks_p = TRUE;
5771 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
5772 && GET_CODE (PATTERN (insn)) != ASM_INPUT
5773 && asm_noperands (PATTERN (insn)) < 0)
5775 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
5777 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
5779 rtx link;
5780 int d = -1;
5782 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
5783 if (REG_NOTE_KIND (link) == 0)
5785 enum attr_itanium_class dep_class;
5786 rtx dep_insn = XEXP (link, 0);
5788 dep_class = ia64_safe_itanium_class (dep_insn);
5789 if ((dep_class == ITANIUM_CLASS_MMMUL
5790 || dep_class == ITANIUM_CLASS_MMSHF)
5791 && last_clock - clocks [INSN_UID (dep_insn)] < 4
5792 && (d < 0
5793 || last_clock - clocks [INSN_UID (dep_insn)] < d))
5794 d = last_clock - clocks [INSN_UID (dep_insn)];
5796 if (d >= 0)
5797 add_cycles [INSN_UID (insn)] = 3 - d;
5800 return 0;
5805 /* The following page contains abstract data `bundle states' which are
5806 used for bundling insns (inserting nops and template generation). */
5808 /* The following describes state of insn bundling. */
5810 struct bundle_state
5812 /* Unique bundle state number to identify them in the debugging
5813 output */
5814 int unique_num;
5815 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
5816 /* number nops before and after the insn */
5817 short before_nops_num, after_nops_num;
5818 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
5819 insn */
5820 int cost; /* cost of the state in cycles */
5821 int accumulated_insns_num; /* number of all previous insns including
5822 nops. L is considered as 2 insns */
5823 int branch_deviation; /* deviation of previous branches from 3rd slots */
5824 struct bundle_state *next; /* next state with the same insn_num */
5825 struct bundle_state *originator; /* originator (previous insn state) */
5826 /* All bundle states are in the following chain. */
5827 struct bundle_state *allocated_states_chain;
5828 /* The DFA State after issuing the insn and the nops. */
5829 state_t dfa_state;
5832 /* The following is map insn number to the corresponding bundle state. */
5834 static struct bundle_state **index_to_bundle_states;
5836 /* The unique number of next bundle state. */
5838 static int bundle_states_num;
5840 /* All allocated bundle states are in the following chain. */
5842 static struct bundle_state *allocated_bundle_states_chain;
5844 /* All allocated but not used bundle states are in the following
5845 chain. */
5847 static struct bundle_state *free_bundle_state_chain;
5850 /* The following function returns a free bundle state. */
5852 static struct bundle_state *
5853 get_free_bundle_state (void)
5855 struct bundle_state *result;
5857 if (free_bundle_state_chain != NULL)
5859 result = free_bundle_state_chain;
5860 free_bundle_state_chain = result->next;
5862 else
5864 result = xmalloc (sizeof (struct bundle_state));
5865 result->dfa_state = xmalloc (dfa_state_size);
5866 result->allocated_states_chain = allocated_bundle_states_chain;
5867 allocated_bundle_states_chain = result;
5869 result->unique_num = bundle_states_num++;
5870 return result;
5874 /* The following function frees given bundle state. */
5876 static void
5877 free_bundle_state (struct bundle_state *state)
5879 state->next = free_bundle_state_chain;
5880 free_bundle_state_chain = state;
5883 /* Start work with abstract data `bundle states'. */
5885 static void
5886 initiate_bundle_states (void)
5888 bundle_states_num = 0;
5889 free_bundle_state_chain = NULL;
5890 allocated_bundle_states_chain = NULL;
5893 /* Finish work with abstract data `bundle states'. */
5895 static void
5896 finish_bundle_states (void)
5898 struct bundle_state *curr_state, *next_state;
5900 for (curr_state = allocated_bundle_states_chain;
5901 curr_state != NULL;
5902 curr_state = next_state)
5904 next_state = curr_state->allocated_states_chain;
5905 free (curr_state->dfa_state);
5906 free (curr_state);
5910 /* Hash table of the bundle states. The key is dfa_state and insn_num
5911 of the bundle states. */
5913 static htab_t bundle_state_table;
5915 /* The function returns hash of BUNDLE_STATE. */
5917 static unsigned
5918 bundle_state_hash (const void *bundle_state)
5920 const struct bundle_state *state = (struct bundle_state *) bundle_state;
5921 unsigned result, i;
5923 for (result = i = 0; i < dfa_state_size; i++)
5924 result += (((unsigned char *) state->dfa_state) [i]
5925 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
5926 return result + state->insn_num;
5929 /* The function returns nonzero if the bundle state keys are equal. */
5931 static int
5932 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
5934 const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
5935 const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
5937 return (state1->insn_num == state2->insn_num
5938 && memcmp (state1->dfa_state, state2->dfa_state,
5939 dfa_state_size) == 0);
5942 /* The function inserts the BUNDLE_STATE into the hash table. The
5943 function returns nonzero if the bundle has been inserted into the
5944 table. The table contains the best bundle state with given key. */
5946 static int
5947 insert_bundle_state (struct bundle_state *bundle_state)
5949 void **entry_ptr;
5951 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
5952 if (*entry_ptr == NULL)
5954 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
5955 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
5956 *entry_ptr = (void *) bundle_state;
5957 return TRUE;
5959 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
5960 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
5961 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
5962 > bundle_state->accumulated_insns_num
5963 || (((struct bundle_state *)
5964 *entry_ptr)->accumulated_insns_num
5965 == bundle_state->accumulated_insns_num
5966 && ((struct bundle_state *)
5967 *entry_ptr)->branch_deviation
5968 > bundle_state->branch_deviation))))
5971 struct bundle_state temp;
5973 temp = *(struct bundle_state *) *entry_ptr;
5974 *(struct bundle_state *) *entry_ptr = *bundle_state;
5975 ((struct bundle_state *) *entry_ptr)->next = temp.next;
5976 *bundle_state = temp;
5978 return FALSE;
5981 /* Start work with the hash table. */
5983 static void
5984 initiate_bundle_state_table (void)
5986 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
5987 (htab_del) 0);
5990 /* Finish work with the hash table. */
5992 static void
5993 finish_bundle_state_table (void)
5995 htab_delete (bundle_state_table);
6000 /* The following variable is a insn `nop' used to check bundle states
6001 with different number of inserted nops. */
6003 static rtx ia64_nop;
6005 /* The following function tries to issue NOPS_NUM nops for the current
6006 state without advancing processor cycle. If it failed, the
6007 function returns FALSE and frees the current state. */
6009 static int
6010 try_issue_nops (struct bundle_state *curr_state, int nops_num)
6012 int i;
6014 for (i = 0; i < nops_num; i++)
6015 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6017 free_bundle_state (curr_state);
6018 return FALSE;
6020 return TRUE;
6023 /* The following function tries to issue INSN for the current
6024 state without advancing processor cycle. If it failed, the
6025 function returns FALSE and frees the current state. */
6027 static int
6028 try_issue_insn (struct bundle_state *curr_state, rtx insn)
6030 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6032 free_bundle_state (curr_state);
6033 return FALSE;
6035 return TRUE;
6038 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6039 starting with ORIGINATOR without advancing processor cycle. If
6040 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6041 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6042 If it was successful, the function creates new bundle state and
6043 insert into the hash table and into `index_to_bundle_states'. */
6045 static void
6046 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
6047 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
6049 struct bundle_state *curr_state;
6051 curr_state = get_free_bundle_state ();
6052 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6053 curr_state->insn = insn;
6054 curr_state->insn_num = originator->insn_num + 1;
6055 curr_state->cost = originator->cost;
6056 curr_state->originator = originator;
6057 curr_state->before_nops_num = before_nops_num;
6058 curr_state->after_nops_num = 0;
6059 curr_state->accumulated_insns_num
6060 = originator->accumulated_insns_num + before_nops_num;
6061 curr_state->branch_deviation = originator->branch_deviation;
6062 if (insn == NULL_RTX)
6063 abort ();
6064 else if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6066 if (GET_MODE (insn) == TImode)
6067 abort ();
6068 if (!try_issue_nops (curr_state, before_nops_num))
6069 return;
6070 if (!try_issue_insn (curr_state, insn))
6071 return;
6072 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6073 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6074 && curr_state->accumulated_insns_num % 3 != 0)
6076 free_bundle_state (curr_state);
6077 return;
6080 else if (GET_MODE (insn) != TImode)
6082 if (!try_issue_nops (curr_state, before_nops_num))
6083 return;
6084 if (!try_issue_insn (curr_state, insn))
6085 return;
6086 curr_state->accumulated_insns_num++;
6087 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6088 || asm_noperands (PATTERN (insn)) >= 0)
6089 abort ();
6090 if (ia64_safe_type (insn) == TYPE_L)
6091 curr_state->accumulated_insns_num++;
6093 else
6095 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6096 state_transition (curr_state->dfa_state, NULL);
6097 curr_state->cost++;
6098 if (!try_issue_nops (curr_state, before_nops_num))
6099 return;
6100 if (!try_issue_insn (curr_state, insn))
6101 return;
6102 curr_state->accumulated_insns_num++;
6103 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6104 || asm_noperands (PATTERN (insn)) >= 0)
6106 /* Finish bundle containing asm insn. */
6107 curr_state->after_nops_num
6108 = 3 - curr_state->accumulated_insns_num % 3;
6109 curr_state->accumulated_insns_num
6110 += 3 - curr_state->accumulated_insns_num % 3;
6112 else if (ia64_safe_type (insn) == TYPE_L)
6113 curr_state->accumulated_insns_num++;
6115 if (ia64_safe_type (insn) == TYPE_B)
6116 curr_state->branch_deviation
6117 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6118 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6120 if (!only_bundle_end_p && insert_bundle_state (curr_state))
6122 state_t dfa_state;
6123 struct bundle_state *curr_state1;
6124 struct bundle_state *allocated_states_chain;
6126 curr_state1 = get_free_bundle_state ();
6127 dfa_state = curr_state1->dfa_state;
6128 allocated_states_chain = curr_state1->allocated_states_chain;
6129 *curr_state1 = *curr_state;
6130 curr_state1->dfa_state = dfa_state;
6131 curr_state1->allocated_states_chain = allocated_states_chain;
6132 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6133 dfa_state_size);
6134 curr_state = curr_state1;
6136 if (!try_issue_nops (curr_state,
6137 3 - curr_state->accumulated_insns_num % 3))
6138 return;
6139 curr_state->after_nops_num
6140 = 3 - curr_state->accumulated_insns_num % 3;
6141 curr_state->accumulated_insns_num
6142 += 3 - curr_state->accumulated_insns_num % 3;
6144 if (!insert_bundle_state (curr_state))
6145 free_bundle_state (curr_state);
6146 return;
6149 /* The following function returns position in the two window bundle
6150 for given STATE. */
6152 static int
6153 get_max_pos (state_t state)
6155 if (cpu_unit_reservation_p (state, pos_6))
6156 return 6;
6157 else if (cpu_unit_reservation_p (state, pos_5))
6158 return 5;
6159 else if (cpu_unit_reservation_p (state, pos_4))
6160 return 4;
6161 else if (cpu_unit_reservation_p (state, pos_3))
6162 return 3;
6163 else if (cpu_unit_reservation_p (state, pos_2))
6164 return 2;
6165 else if (cpu_unit_reservation_p (state, pos_1))
6166 return 1;
6167 else
6168 return 0;
6171 /* The function returns code of a possible template for given position
6172 and state. The function should be called only with 2 values of
6173 position equal to 3 or 6. */
6175 static int
6176 get_template (state_t state, int pos)
6178 switch (pos)
6180 case 3:
6181 if (cpu_unit_reservation_p (state, _0mii_))
6182 return 0;
6183 else if (cpu_unit_reservation_p (state, _0mmi_))
6184 return 1;
6185 else if (cpu_unit_reservation_p (state, _0mfi_))
6186 return 2;
6187 else if (cpu_unit_reservation_p (state, _0mmf_))
6188 return 3;
6189 else if (cpu_unit_reservation_p (state, _0bbb_))
6190 return 4;
6191 else if (cpu_unit_reservation_p (state, _0mbb_))
6192 return 5;
6193 else if (cpu_unit_reservation_p (state, _0mib_))
6194 return 6;
6195 else if (cpu_unit_reservation_p (state, _0mmb_))
6196 return 7;
6197 else if (cpu_unit_reservation_p (state, _0mfb_))
6198 return 8;
6199 else if (cpu_unit_reservation_p (state, _0mlx_))
6200 return 9;
6201 else
6202 abort ();
6203 case 6:
6204 if (cpu_unit_reservation_p (state, _1mii_))
6205 return 0;
6206 else if (cpu_unit_reservation_p (state, _1mmi_))
6207 return 1;
6208 else if (cpu_unit_reservation_p (state, _1mfi_))
6209 return 2;
6210 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6211 return 3;
6212 else if (cpu_unit_reservation_p (state, _1bbb_))
6213 return 4;
6214 else if (cpu_unit_reservation_p (state, _1mbb_))
6215 return 5;
6216 else if (cpu_unit_reservation_p (state, _1mib_))
6217 return 6;
6218 else if (cpu_unit_reservation_p (state, _1mmb_))
6219 return 7;
6220 else if (cpu_unit_reservation_p (state, _1mfb_))
6221 return 8;
6222 else if (cpu_unit_reservation_p (state, _1mlx_))
6223 return 9;
6224 else
6225 abort ();
6226 default:
6227 abort ();
6231 /* The following function returns an insn important for insn bundling
6232 followed by INSN and before TAIL. */
6234 static rtx
6235 get_next_important_insn (rtx insn, rtx tail)
6237 for (; insn && insn != tail; insn = NEXT_INSN (insn))
6238 if (INSN_P (insn)
6239 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6240 && GET_CODE (PATTERN (insn)) != USE
6241 && GET_CODE (PATTERN (insn)) != CLOBBER)
6242 return insn;
6243 return NULL_RTX;
6246 /* The following function does insn bundling. Bundling means
6247 inserting templates and nop insns to fit insn groups into permitted
6248 templates. Instruction scheduling uses NDFA (non-deterministic
6249 finite automata) encoding informations about the templates and the
6250 inserted nops. Nondeterminism of the automata permits follows
6251 all possible insn sequences very fast.
6253 Unfortunately it is not possible to get information about inserting
6254 nop insns and used templates from the automata states. The
6255 automata only says that we can issue an insn possibly inserting
6256 some nops before it and using some template. Therefore insn
6257 bundling in this function is implemented by using DFA
6258 (deterministic finite automata). We follows all possible insn
6259 sequences by inserting 0-2 nops (that is what the NDFA describe for
6260 insn scheduling) before/after each insn being bundled. We know the
6261 start of simulated processor cycle from insn scheduling (insn
6262 starting a new cycle has TImode).
6264 Simple implementation of insn bundling would create enormous
6265 number of possible insn sequences satisfying information about new
6266 cycle ticks taken from the insn scheduling. To make the algorithm
6267 practical we use dynamic programming. Each decision (about
6268 inserting nops and implicitly about previous decisions) is described
6269 by structure bundle_state (see above). If we generate the same
6270 bundle state (key is automaton state after issuing the insns and
6271 nops for it), we reuse already generated one. As consequence we
6272 reject some decisions which cannot improve the solution and
6273 reduce memory for the algorithm.
6275 When we reach the end of EBB (extended basic block), we choose the
6276 best sequence and then, moving back in EBB, insert templates for
6277 the best alternative. The templates are taken from querying
6278 automaton state for each insn in chosen bundle states.
6280 So the algorithm makes two (forward and backward) passes through
6281 EBB. There is an additional forward pass through EBB for Itanium1
6282 processor. This pass inserts more nops to make dependency between
6283 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
6285 static void
6286 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
6288 struct bundle_state *curr_state, *next_state, *best_state;
6289 rtx insn, next_insn;
6290 int insn_num;
6291 int i, bundle_end_p, only_bundle_end_p, asm_p;
6292 int pos = 0, max_pos, template0, template1;
6293 rtx b;
6294 rtx nop;
6295 enum attr_type type;
6297 insn_num = 0;
6298 /* Count insns in the EBB. */
6299 for (insn = NEXT_INSN (prev_head_insn);
6300 insn && insn != tail;
6301 insn = NEXT_INSN (insn))
6302 if (INSN_P (insn))
6303 insn_num++;
6304 if (insn_num == 0)
6305 return;
6306 bundling_p = 1;
6307 dfa_clean_insn_cache ();
6308 initiate_bundle_state_table ();
6309 index_to_bundle_states = xmalloc ((insn_num + 2)
6310 * sizeof (struct bundle_state *));
6311 /* First (forward) pass -- generation of bundle states. */
6312 curr_state = get_free_bundle_state ();
6313 curr_state->insn = NULL;
6314 curr_state->before_nops_num = 0;
6315 curr_state->after_nops_num = 0;
6316 curr_state->insn_num = 0;
6317 curr_state->cost = 0;
6318 curr_state->accumulated_insns_num = 0;
6319 curr_state->branch_deviation = 0;
6320 curr_state->next = NULL;
6321 curr_state->originator = NULL;
6322 state_reset (curr_state->dfa_state);
6323 index_to_bundle_states [0] = curr_state;
6324 insn_num = 0;
6325 /* Shift cycle mark if it is put on insn which could be ignored. */
6326 for (insn = NEXT_INSN (prev_head_insn);
6327 insn != tail;
6328 insn = NEXT_INSN (insn))
6329 if (INSN_P (insn)
6330 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6331 || GET_CODE (PATTERN (insn)) == USE
6332 || GET_CODE (PATTERN (insn)) == CLOBBER)
6333 && GET_MODE (insn) == TImode)
6335 PUT_MODE (insn, VOIDmode);
6336 for (next_insn = NEXT_INSN (insn);
6337 next_insn != tail;
6338 next_insn = NEXT_INSN (next_insn))
6339 if (INSN_P (next_insn)
6340 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
6341 && GET_CODE (PATTERN (next_insn)) != USE
6342 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
6344 PUT_MODE (next_insn, TImode);
6345 break;
6348 /* Froward pass: generation of bundle states. */
6349 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6350 insn != NULL_RTX;
6351 insn = next_insn)
6353 if (!INSN_P (insn)
6354 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6355 || GET_CODE (PATTERN (insn)) == USE
6356 || GET_CODE (PATTERN (insn)) == CLOBBER)
6357 abort ();
6358 type = ia64_safe_type (insn);
6359 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6360 insn_num++;
6361 index_to_bundle_states [insn_num] = NULL;
6362 for (curr_state = index_to_bundle_states [insn_num - 1];
6363 curr_state != NULL;
6364 curr_state = next_state)
6366 pos = curr_state->accumulated_insns_num % 3;
6367 next_state = curr_state->next;
6368 /* We must fill up the current bundle in order to start a
6369 subsequent asm insn in a new bundle. Asm insn is always
6370 placed in a separate bundle. */
6371 only_bundle_end_p
6372 = (next_insn != NULL_RTX
6373 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
6374 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
6375 /* We may fill up the current bundle if it is the cycle end
6376 without a group barrier. */
6377 bundle_end_p
6378 = (only_bundle_end_p || next_insn == NULL_RTX
6379 || (GET_MODE (next_insn) == TImode
6380 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
6381 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
6382 || type == TYPE_S
6383 /* We need to insert 2 nops for cases like M_MII. To
6384 guarantee issuing all insns on the same cycle for
6385 Itanium 1, we need to issue 2 nops after the first M
6386 insn (MnnMII where n is a nop insn). */
6387 || ((type == TYPE_M || type == TYPE_A)
6388 && ia64_tune == PROCESSOR_ITANIUM
6389 && !bundle_end_p && pos == 1))
6390 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
6391 only_bundle_end_p);
6392 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
6393 only_bundle_end_p);
6394 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
6395 only_bundle_end_p);
6397 if (index_to_bundle_states [insn_num] == NULL)
6398 abort ();
6399 for (curr_state = index_to_bundle_states [insn_num];
6400 curr_state != NULL;
6401 curr_state = curr_state->next)
6402 if (verbose >= 2 && dump)
6404 /* This structure is taken from generated code of the
6405 pipeline hazard recognizer (see file insn-attrtab.c).
6406 Please don't forget to change the structure if a new
6407 automaton is added to .md file. */
6408 struct DFA_chip
6410 unsigned short one_automaton_state;
6411 unsigned short oneb_automaton_state;
6412 unsigned short two_automaton_state;
6413 unsigned short twob_automaton_state;
6416 fprintf
6417 (dump,
6418 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6419 curr_state->unique_num,
6420 (curr_state->originator == NULL
6421 ? -1 : curr_state->originator->unique_num),
6422 curr_state->cost,
6423 curr_state->before_nops_num, curr_state->after_nops_num,
6424 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6425 (ia64_tune == PROCESSOR_ITANIUM
6426 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6427 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6428 INSN_UID (insn));
6431 if (index_to_bundle_states [insn_num] == NULL)
6432 /* We should find a solution because the 2nd insn scheduling has
6433 found one. */
6434 abort ();
6435 /* Find a state corresponding to the best insn sequence. */
6436 best_state = NULL;
6437 for (curr_state = index_to_bundle_states [insn_num];
6438 curr_state != NULL;
6439 curr_state = curr_state->next)
6440 /* We are just looking at the states with fully filled up last
6441 bundle. The first we prefer insn sequences with minimal cost
6442 then with minimal inserted nops and finally with branch insns
6443 placed in the 3rd slots. */
6444 if (curr_state->accumulated_insns_num % 3 == 0
6445 && (best_state == NULL || best_state->cost > curr_state->cost
6446 || (best_state->cost == curr_state->cost
6447 && (curr_state->accumulated_insns_num
6448 < best_state->accumulated_insns_num
6449 || (curr_state->accumulated_insns_num
6450 == best_state->accumulated_insns_num
6451 && curr_state->branch_deviation
6452 < best_state->branch_deviation)))))
6453 best_state = curr_state;
6454 /* Second (backward) pass: adding nops and templates. */
6455 insn_num = best_state->before_nops_num;
6456 template0 = template1 = -1;
6457 for (curr_state = best_state;
6458 curr_state->originator != NULL;
6459 curr_state = curr_state->originator)
6461 insn = curr_state->insn;
6462 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
6463 || asm_noperands (PATTERN (insn)) >= 0);
6464 insn_num++;
6465 if (verbose >= 2 && dump)
6467 struct DFA_chip
6469 unsigned short one_automaton_state;
6470 unsigned short oneb_automaton_state;
6471 unsigned short two_automaton_state;
6472 unsigned short twob_automaton_state;
6475 fprintf
6476 (dump,
6477 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6478 curr_state->unique_num,
6479 (curr_state->originator == NULL
6480 ? -1 : curr_state->originator->unique_num),
6481 curr_state->cost,
6482 curr_state->before_nops_num, curr_state->after_nops_num,
6483 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6484 (ia64_tune == PROCESSOR_ITANIUM
6485 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6486 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6487 INSN_UID (insn));
6489 /* Find the position in the current bundle window. The window can
6490 contain at most two bundles. Two bundle window means that
6491 the processor will make two bundle rotation. */
6492 max_pos = get_max_pos (curr_state->dfa_state);
6493 if (max_pos == 6
6494 /* The following (negative template number) means that the
6495 processor did one bundle rotation. */
6496 || (max_pos == 3 && template0 < 0))
6498 /* We are at the end of the window -- find template(s) for
6499 its bundle(s). */
6500 pos = max_pos;
6501 if (max_pos == 3)
6502 template0 = get_template (curr_state->dfa_state, 3);
6503 else
6505 template1 = get_template (curr_state->dfa_state, 3);
6506 template0 = get_template (curr_state->dfa_state, 6);
6509 if (max_pos > 3 && template1 < 0)
6510 /* It may happen when we have the stop inside a bundle. */
6512 if (pos > 3)
6513 abort ();
6514 template1 = get_template (curr_state->dfa_state, 3);
6515 pos += 3;
6517 if (!asm_p)
6518 /* Emit nops after the current insn. */
6519 for (i = 0; i < curr_state->after_nops_num; i++)
6521 nop = gen_nop ();
6522 emit_insn_after (nop, insn);
6523 pos--;
6524 if (pos < 0)
6525 abort ();
6526 if (pos % 3 == 0)
6528 /* We are at the start of a bundle: emit the template
6529 (it should be defined). */
6530 if (template0 < 0)
6531 abort ();
6532 b = gen_bundle_selector (GEN_INT (template0));
6533 ia64_emit_insn_before (b, nop);
6534 /* If we have two bundle window, we make one bundle
6535 rotation. Otherwise template0 will be undefined
6536 (negative value). */
6537 template0 = template1;
6538 template1 = -1;
6541 /* Move the position backward in the window. Group barrier has
6542 no slot. Asm insn takes all bundle. */
6543 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
6544 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6545 && asm_noperands (PATTERN (insn)) < 0)
6546 pos--;
6547 /* Long insn takes 2 slots. */
6548 if (ia64_safe_type (insn) == TYPE_L)
6549 pos--;
6550 if (pos < 0)
6551 abort ();
6552 if (pos % 3 == 0
6553 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
6554 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6555 && asm_noperands (PATTERN (insn)) < 0)
6557 /* The current insn is at the bundle start: emit the
6558 template. */
6559 if (template0 < 0)
6560 abort ();
6561 b = gen_bundle_selector (GEN_INT (template0));
6562 ia64_emit_insn_before (b, insn);
6563 b = PREV_INSN (insn);
6564 insn = b;
6565 /* See comment above in analogous place for emitting nops
6566 after the insn. */
6567 template0 = template1;
6568 template1 = -1;
6570 /* Emit nops after the current insn. */
6571 for (i = 0; i < curr_state->before_nops_num; i++)
6573 nop = gen_nop ();
6574 ia64_emit_insn_before (nop, insn);
6575 nop = PREV_INSN (insn);
6576 insn = nop;
6577 pos--;
6578 if (pos < 0)
6579 abort ();
6580 if (pos % 3 == 0)
6582 /* See comment above in analogous place for emitting nops
6583 after the insn. */
6584 if (template0 < 0)
6585 abort ();
6586 b = gen_bundle_selector (GEN_INT (template0));
6587 ia64_emit_insn_before (b, insn);
6588 b = PREV_INSN (insn);
6589 insn = b;
6590 template0 = template1;
6591 template1 = -1;
6595 if (ia64_tune == PROCESSOR_ITANIUM)
6596 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
6597 Itanium1 has a strange design, if the distance between an insn
6598 and dependent MM-insn is less 4 then we have a 6 additional
6599 cycles stall. So we make the distance equal to 4 cycles if it
6600 is less. */
6601 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6602 insn != NULL_RTX;
6603 insn = next_insn)
6605 if (!INSN_P (insn)
6606 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6607 || GET_CODE (PATTERN (insn)) == USE
6608 || GET_CODE (PATTERN (insn)) == CLOBBER)
6609 abort ();
6610 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6611 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
6612 /* We found a MM-insn which needs additional cycles. */
6614 rtx last;
6615 int i, j, n;
6616 int pred_stop_p;
6618 /* Now we are searching for a template of the bundle in
6619 which the MM-insn is placed and the position of the
6620 insn in the bundle (0, 1, 2). Also we are searching
6621 for that there is a stop before the insn. */
6622 last = prev_active_insn (insn);
6623 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
6624 if (pred_stop_p)
6625 last = prev_active_insn (last);
6626 n = 0;
6627 for (;; last = prev_active_insn (last))
6628 if (recog_memoized (last) == CODE_FOR_bundle_selector)
6630 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
6631 if (template0 == 9)
6632 /* The insn is in MLX bundle. Change the template
6633 onto MFI because we will add nops before the
6634 insn. It simplifies subsequent code a lot. */
6635 PATTERN (last)
6636 = gen_bundle_selector (const2_rtx); /* -> MFI */
6637 break;
6639 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
6640 && (ia64_safe_itanium_class (last)
6641 != ITANIUM_CLASS_IGNORE))
6642 n++;
6643 /* Some check of correctness: the stop is not at the
6644 bundle start, there are no more 3 insns in the bundle,
6645 and the MM-insn is not at the start of bundle with
6646 template MLX. */
6647 if ((pred_stop_p && n == 0) || n > 2
6648 || (template0 == 9 && n != 0))
6649 abort ();
6650 /* Put nops after the insn in the bundle. */
6651 for (j = 3 - n; j > 0; j --)
6652 ia64_emit_insn_before (gen_nop (), insn);
6653 /* It takes into account that we will add more N nops
6654 before the insn lately -- please see code below. */
6655 add_cycles [INSN_UID (insn)]--;
6656 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
6657 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6658 insn);
6659 if (pred_stop_p)
6660 add_cycles [INSN_UID (insn)]--;
6661 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
6663 /* Insert "MII;" template. */
6664 ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
6665 insn);
6666 ia64_emit_insn_before (gen_nop (), insn);
6667 ia64_emit_insn_before (gen_nop (), insn);
6668 if (i > 1)
6670 /* To decrease code size, we use "MI;I;"
6671 template. */
6672 ia64_emit_insn_before
6673 (gen_insn_group_barrier (GEN_INT (3)), insn);
6674 i--;
6676 ia64_emit_insn_before (gen_nop (), insn);
6677 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6678 insn);
6680 /* Put the MM-insn in the same slot of a bundle with the
6681 same template as the original one. */
6682 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
6683 insn);
6684 /* To put the insn in the same slot, add necessary number
6685 of nops. */
6686 for (j = n; j > 0; j --)
6687 ia64_emit_insn_before (gen_nop (), insn);
6688 /* Put the stop if the original bundle had it. */
6689 if (pred_stop_p)
6690 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6691 insn);
6694 free (index_to_bundle_states);
6695 finish_bundle_state_table ();
6696 bundling_p = 0;
6697 dfa_clean_insn_cache ();
6700 /* The following function is called at the end of scheduling BB or
6701 EBB. After reload, it inserts stop bits and does insn bundling. */
6703 static void
6704 ia64_sched_finish (FILE *dump, int sched_verbose)
6706 if (sched_verbose)
6707 fprintf (dump, "// Finishing schedule.\n");
6708 if (!reload_completed)
6709 return;
6710 if (reload_completed)
6712 final_emit_insn_group_barriers (dump);
6713 bundling (dump, sched_verbose, current_sched_info->prev_head,
6714 current_sched_info->next_tail);
6715 if (sched_verbose && dump)
6716 fprintf (dump, "// finishing %d-%d\n",
6717 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
6718 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
6720 return;
6724 /* The following function inserts stop bits in scheduled BB or EBB. */
6726 static void
6727 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6729 rtx insn;
6730 int need_barrier_p = 0;
6731 rtx prev_insn = NULL_RTX;
6733 init_insn_group_barriers ();
6735 for (insn = NEXT_INSN (current_sched_info->prev_head);
6736 insn != current_sched_info->next_tail;
6737 insn = NEXT_INSN (insn))
6739 if (GET_CODE (insn) == BARRIER)
6741 rtx last = prev_active_insn (insn);
6743 if (! last)
6744 continue;
6745 if (GET_CODE (last) == JUMP_INSN
6746 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6747 last = prev_active_insn (last);
6748 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6749 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6751 init_insn_group_barriers ();
6752 need_barrier_p = 0;
6753 prev_insn = NULL_RTX;
6755 else if (INSN_P (insn))
6757 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6759 init_insn_group_barriers ();
6760 need_barrier_p = 0;
6761 prev_insn = NULL_RTX;
6763 else if (need_barrier_p || group_barrier_needed_p (insn))
6765 if (TARGET_EARLY_STOP_BITS)
6767 rtx last;
6769 for (last = insn;
6770 last != current_sched_info->prev_head;
6771 last = PREV_INSN (last))
6772 if (INSN_P (last) && GET_MODE (last) == TImode
6773 && stops_p [INSN_UID (last)])
6774 break;
6775 if (last == current_sched_info->prev_head)
6776 last = insn;
6777 last = prev_active_insn (last);
6778 if (last
6779 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
6780 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
6781 last);
6782 init_insn_group_barriers ();
6783 for (last = NEXT_INSN (last);
6784 last != insn;
6785 last = NEXT_INSN (last))
6786 if (INSN_P (last))
6787 group_barrier_needed_p (last);
6789 else
6791 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6792 insn);
6793 init_insn_group_barriers ();
6795 group_barrier_needed_p (insn);
6796 prev_insn = NULL_RTX;
6798 else if (recog_memoized (insn) >= 0)
6799 prev_insn = insn;
6800 need_barrier_p = (GET_CODE (insn) == CALL_INSN
6801 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6802 || asm_noperands (PATTERN (insn)) >= 0);
6809 /* If the following function returns TRUE, we will use the the DFA
6810 insn scheduler. */
6812 static int
6813 ia64_first_cycle_multipass_dfa_lookahead (void)
6815 return (reload_completed ? 6 : 4);
6818 /* The following function initiates variable `dfa_pre_cycle_insn'. */
6820 static void
6821 ia64_init_dfa_pre_cycle_insn (void)
6823 if (temp_dfa_state == NULL)
6825 dfa_state_size = state_size ();
6826 temp_dfa_state = xmalloc (dfa_state_size);
6827 prev_cycle_state = xmalloc (dfa_state_size);
6829 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
6830 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
6831 recog_memoized (dfa_pre_cycle_insn);
6832 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
6833 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
6834 recog_memoized (dfa_stop_insn);
6837 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
6838 used by the DFA insn scheduler. */
6840 static rtx
6841 ia64_dfa_pre_cycle_insn (void)
6843 return dfa_pre_cycle_insn;
6846 /* The following function returns TRUE if PRODUCER (of type ilog or
6847 ld) produces address for CONSUMER (of type st or stf). */
6850 ia64_st_address_bypass_p (rtx producer, rtx consumer)
6852 rtx dest, reg, mem;
6854 if (producer == NULL_RTX || consumer == NULL_RTX)
6855 abort ();
6856 dest = ia64_single_set (producer);
6857 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
6858 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
6859 abort ();
6860 if (GET_CODE (reg) == SUBREG)
6861 reg = SUBREG_REG (reg);
6862 dest = ia64_single_set (consumer);
6863 if (dest == NULL_RTX || (mem = SET_DEST (dest)) == NULL_RTX
6864 || GET_CODE (mem) != MEM)
6865 abort ();
6866 return reg_mentioned_p (reg, mem);
6869 /* The following function returns TRUE if PRODUCER (of type ilog or
6870 ld) produces address for CONSUMER (of type ld or fld). */
6873 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
6875 rtx dest, src, reg, mem;
6877 if (producer == NULL_RTX || consumer == NULL_RTX)
6878 abort ();
6879 dest = ia64_single_set (producer);
6880 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
6881 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
6882 abort ();
6883 if (GET_CODE (reg) == SUBREG)
6884 reg = SUBREG_REG (reg);
6885 src = ia64_single_set (consumer);
6886 if (src == NULL_RTX || (mem = SET_SRC (src)) == NULL_RTX)
6887 abort ();
6888 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
6889 mem = XVECEXP (mem, 0, 0);
6890 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
6891 mem = XEXP (mem, 0);
6893 /* Note that LO_SUM is used for GOT loads. */
6894 if (GET_CODE (mem) != LO_SUM && GET_CODE (mem) != MEM)
6895 abort ();
6897 return reg_mentioned_p (reg, mem);
6900 /* The following function returns TRUE if INSN produces address for a
6901 load/store insn. We will place such insns into M slot because it
6902 decreases its latency time. */
6905 ia64_produce_address_p (rtx insn)
6907 return insn->call;
6911 /* Emit pseudo-ops for the assembler to describe predicate relations.
6912 At present this assumes that we only consider predicate pairs to
6913 be mutex, and that the assembler can deduce proper values from
6914 straight-line code. */
6916 static void
6917 emit_predicate_relation_info (void)
6919 basic_block bb;
6921 FOR_EACH_BB_REVERSE (bb)
6923 int r;
6924 rtx head = BB_HEAD (bb);
6926 /* We only need such notes at code labels. */
6927 if (GET_CODE (head) != CODE_LABEL)
6928 continue;
6929 if (GET_CODE (NEXT_INSN (head)) == NOTE
6930 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6931 head = NEXT_INSN (head);
6933 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6934 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6936 rtx p = gen_rtx_REG (BImode, r);
6937 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6938 if (head == BB_END (bb))
6939 BB_END (bb) = n;
6940 head = n;
6944 /* Look for conditional calls that do not return, and protect predicate
6945 relations around them. Otherwise the assembler will assume the call
6946 returns, and complain about uses of call-clobbered predicates after
6947 the call. */
6948 FOR_EACH_BB_REVERSE (bb)
6950 rtx insn = BB_HEAD (bb);
6952 while (1)
6954 if (GET_CODE (insn) == CALL_INSN
6955 && GET_CODE (PATTERN (insn)) == COND_EXEC
6956 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6958 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6959 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6960 if (BB_HEAD (bb) == insn)
6961 BB_HEAD (bb) = b;
6962 if (BB_END (bb) == insn)
6963 BB_END (bb) = a;
6966 if (insn == BB_END (bb))
6967 break;
6968 insn = NEXT_INSN (insn);
6973 /* Perform machine dependent operations on the rtl chain INSNS. */
6975 static void
6976 ia64_reorg (void)
6978 /* We are freeing block_for_insn in the toplev to keep compatibility
6979 with old MDEP_REORGS that are not CFG based. Recompute it now. */
6980 compute_bb_for_insn ();
6982 /* If optimizing, we'll have split before scheduling. */
6983 if (optimize == 0)
6984 split_all_insns (0);
6986 /* ??? update_life_info_in_dirty_blocks fails to terminate during
6987 non-optimizing bootstrap. */
6988 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
6990 if (ia64_flag_schedule_insns2)
6992 timevar_push (TV_SCHED2);
6993 ia64_final_schedule = 1;
6995 initiate_bundle_states ();
6996 ia64_nop = make_insn_raw (gen_nop ());
6997 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
6998 recog_memoized (ia64_nop);
6999 clocks_length = get_max_uid () + 1;
7000 stops_p = xcalloc (1, clocks_length);
7001 if (ia64_tune == PROCESSOR_ITANIUM)
7003 clocks = xcalloc (clocks_length, sizeof (int));
7004 add_cycles = xcalloc (clocks_length, sizeof (int));
7006 if (ia64_tune == PROCESSOR_ITANIUM2)
7008 pos_1 = get_cpu_unit_code ("2_1");
7009 pos_2 = get_cpu_unit_code ("2_2");
7010 pos_3 = get_cpu_unit_code ("2_3");
7011 pos_4 = get_cpu_unit_code ("2_4");
7012 pos_5 = get_cpu_unit_code ("2_5");
7013 pos_6 = get_cpu_unit_code ("2_6");
7014 _0mii_ = get_cpu_unit_code ("2b_0mii.");
7015 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7016 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7017 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7018 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7019 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7020 _0mib_ = get_cpu_unit_code ("2b_0mib.");
7021 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7022 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7023 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7024 _1mii_ = get_cpu_unit_code ("2b_1mii.");
7025 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7026 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7027 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7028 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7029 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7030 _1mib_ = get_cpu_unit_code ("2b_1mib.");
7031 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7032 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7033 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7035 else
7037 pos_1 = get_cpu_unit_code ("1_1");
7038 pos_2 = get_cpu_unit_code ("1_2");
7039 pos_3 = get_cpu_unit_code ("1_3");
7040 pos_4 = get_cpu_unit_code ("1_4");
7041 pos_5 = get_cpu_unit_code ("1_5");
7042 pos_6 = get_cpu_unit_code ("1_6");
7043 _0mii_ = get_cpu_unit_code ("1b_0mii.");
7044 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7045 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7046 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7047 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7048 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7049 _0mib_ = get_cpu_unit_code ("1b_0mib.");
7050 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7051 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7052 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7053 _1mii_ = get_cpu_unit_code ("1b_1mii.");
7054 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7055 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7056 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7057 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7058 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7059 _1mib_ = get_cpu_unit_code ("1b_1mib.");
7060 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7061 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7062 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7064 schedule_ebbs (dump_file);
7065 finish_bundle_states ();
7066 if (ia64_tune == PROCESSOR_ITANIUM)
7068 free (add_cycles);
7069 free (clocks);
7071 free (stops_p);
7072 emit_insn_group_barriers (dump_file);
7074 ia64_final_schedule = 0;
7075 timevar_pop (TV_SCHED2);
7077 else
7078 emit_all_insn_group_barriers (dump_file);
7080 /* A call must not be the last instruction in a function, so that the
7081 return address is still within the function, so that unwinding works
7082 properly. Note that IA-64 differs from dwarf2 on this point. */
7083 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7085 rtx insn;
7086 int saw_stop = 0;
7088 insn = get_last_insn ();
7089 if (! INSN_P (insn))
7090 insn = prev_active_insn (insn);
7091 /* Skip over insns that expand to nothing. */
7092 while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
7094 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7095 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7096 saw_stop = 1;
7097 insn = prev_active_insn (insn);
7099 if (GET_CODE (insn) == CALL_INSN)
7101 if (! saw_stop)
7102 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7103 emit_insn (gen_break_f ());
7104 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7108 fixup_errata ();
7109 emit_predicate_relation_info ();
7111 if (ia64_flag_var_tracking)
7113 timevar_push (TV_VAR_TRACKING);
7114 variable_tracking_main ();
7115 timevar_pop (TV_VAR_TRACKING);
7119 /* Return true if REGNO is used by the epilogue. */
7122 ia64_epilogue_uses (int regno)
7124 switch (regno)
7126 case R_GR (1):
7127 /* With a call to a function in another module, we will write a new
7128 value to "gp". After returning from such a call, we need to make
7129 sure the function restores the original gp-value, even if the
7130 function itself does not use the gp anymore. */
7131 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7133 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7134 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7135 /* For functions defined with the syscall_linkage attribute, all
7136 input registers are marked as live at all function exits. This
7137 prevents the register allocator from using the input registers,
7138 which in turn makes it possible to restart a system call after
7139 an interrupt without having to save/restore the input registers.
7140 This also prevents kernel data from leaking to application code. */
7141 return lookup_attribute ("syscall_linkage",
7142 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7144 case R_BR (0):
7145 /* Conditional return patterns can't represent the use of `b0' as
7146 the return address, so we force the value live this way. */
7147 return 1;
7149 case AR_PFS_REGNUM:
7150 /* Likewise for ar.pfs, which is used by br.ret. */
7151 return 1;
7153 default:
7154 return 0;
7158 /* Return true if REGNO is used by the frame unwinder. */
7161 ia64_eh_uses (int regno)
7163 if (! reload_completed)
7164 return 0;
7166 if (current_frame_info.reg_save_b0
7167 && regno == current_frame_info.reg_save_b0)
7168 return 1;
7169 if (current_frame_info.reg_save_pr
7170 && regno == current_frame_info.reg_save_pr)
7171 return 1;
7172 if (current_frame_info.reg_save_ar_pfs
7173 && regno == current_frame_info.reg_save_ar_pfs)
7174 return 1;
7175 if (current_frame_info.reg_save_ar_unat
7176 && regno == current_frame_info.reg_save_ar_unat)
7177 return 1;
7178 if (current_frame_info.reg_save_ar_lc
7179 && regno == current_frame_info.reg_save_ar_lc)
7180 return 1;
7182 return 0;
7185 /* Return true if this goes in small data/bss. */
7187 /* ??? We could also support own long data here. Generating movl/add/ld8
7188 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7189 code faster because there is one less load. This also includes incomplete
7190 types which can't go in sdata/sbss. */
7192 static bool
7193 ia64_in_small_data_p (tree exp)
7195 if (TARGET_NO_SDATA)
7196 return false;
7198 /* We want to merge strings, so we never consider them small data. */
7199 if (TREE_CODE (exp) == STRING_CST)
7200 return false;
7202 /* Functions are never small data. */
7203 if (TREE_CODE (exp) == FUNCTION_DECL)
7204 return false;
7206 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7208 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7209 if (strcmp (section, ".sdata") == 0
7210 || strcmp (section, ".sbss") == 0)
7211 return true;
7213 else
7215 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7217 /* If this is an incomplete type with size 0, then we can't put it
7218 in sdata because it might be too big when completed. */
7219 if (size > 0 && size <= ia64_section_threshold)
7220 return true;
7223 return false;
7226 /* Output assembly directives for prologue regions. */
7228 /* The current basic block number. */
7230 static bool last_block;
7232 /* True if we need a copy_state command at the start of the next block. */
7234 static bool need_copy_state;
7236 /* The function emits unwind directives for the start of an epilogue. */
7238 static void
7239 process_epilogue (void)
7241 /* If this isn't the last block of the function, then we need to label the
7242 current state, and copy it back in at the start of the next block. */
7244 if (!last_block)
7246 fprintf (asm_out_file, "\t.label_state 1\n");
7247 need_copy_state = true;
7250 fprintf (asm_out_file, "\t.restore sp\n");
7253 /* This function processes a SET pattern looking for specific patterns
7254 which result in emitting an assembly directive required for unwinding. */
7256 static int
7257 process_set (FILE *asm_out_file, rtx pat)
7259 rtx src = SET_SRC (pat);
7260 rtx dest = SET_DEST (pat);
7261 int src_regno, dest_regno;
7263 /* Look for the ALLOC insn. */
7264 if (GET_CODE (src) == UNSPEC_VOLATILE
7265 && XINT (src, 1) == UNSPECV_ALLOC
7266 && GET_CODE (dest) == REG)
7268 dest_regno = REGNO (dest);
7270 /* If this isn't the final destination for ar.pfs, the alloc
7271 shouldn't have been marked frame related. */
7272 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7273 abort ();
7275 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7276 ia64_dbx_register_number (dest_regno));
7277 return 1;
7280 /* Look for SP = .... */
7281 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7283 if (GET_CODE (src) == PLUS)
7285 rtx op0 = XEXP (src, 0);
7286 rtx op1 = XEXP (src, 1);
7287 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7289 if (INTVAL (op1) < 0)
7290 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
7291 -INTVAL (op1));
7292 else
7293 process_epilogue ();
7295 else
7296 abort ();
7298 else if (GET_CODE (src) == REG
7299 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7300 process_epilogue ();
7301 else
7302 abort ();
7304 return 1;
7307 /* Register move we need to look at. */
7308 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7310 src_regno = REGNO (src);
7311 dest_regno = REGNO (dest);
7313 switch (src_regno)
7315 case BR_REG (0):
7316 /* Saving return address pointer. */
7317 if (dest_regno != current_frame_info.reg_save_b0)
7318 abort ();
7319 fprintf (asm_out_file, "\t.save rp, r%d\n",
7320 ia64_dbx_register_number (dest_regno));
7321 return 1;
7323 case PR_REG (0):
7324 if (dest_regno != current_frame_info.reg_save_pr)
7325 abort ();
7326 fprintf (asm_out_file, "\t.save pr, r%d\n",
7327 ia64_dbx_register_number (dest_regno));
7328 return 1;
7330 case AR_UNAT_REGNUM:
7331 if (dest_regno != current_frame_info.reg_save_ar_unat)
7332 abort ();
7333 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7334 ia64_dbx_register_number (dest_regno));
7335 return 1;
7337 case AR_LC_REGNUM:
7338 if (dest_regno != current_frame_info.reg_save_ar_lc)
7339 abort ();
7340 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7341 ia64_dbx_register_number (dest_regno));
7342 return 1;
7344 case STACK_POINTER_REGNUM:
7345 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7346 || ! frame_pointer_needed)
7347 abort ();
7348 fprintf (asm_out_file, "\t.vframe r%d\n",
7349 ia64_dbx_register_number (dest_regno));
7350 return 1;
7352 default:
7353 /* Everything else should indicate being stored to memory. */
7354 abort ();
7358 /* Memory store we need to look at. */
7359 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7361 long off;
7362 rtx base;
7363 const char *saveop;
7365 if (GET_CODE (XEXP (dest, 0)) == REG)
7367 base = XEXP (dest, 0);
7368 off = 0;
7370 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7371 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7373 base = XEXP (XEXP (dest, 0), 0);
7374 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7376 else
7377 abort ();
7379 if (base == hard_frame_pointer_rtx)
7381 saveop = ".savepsp";
7382 off = - off;
7384 else if (base == stack_pointer_rtx)
7385 saveop = ".savesp";
7386 else
7387 abort ();
7389 src_regno = REGNO (src);
7390 switch (src_regno)
7392 case BR_REG (0):
7393 if (current_frame_info.reg_save_b0 != 0)
7394 abort ();
7395 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7396 return 1;
7398 case PR_REG (0):
7399 if (current_frame_info.reg_save_pr != 0)
7400 abort ();
7401 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7402 return 1;
7404 case AR_LC_REGNUM:
7405 if (current_frame_info.reg_save_ar_lc != 0)
7406 abort ();
7407 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7408 return 1;
7410 case AR_PFS_REGNUM:
7411 if (current_frame_info.reg_save_ar_pfs != 0)
7412 abort ();
7413 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7414 return 1;
7416 case AR_UNAT_REGNUM:
7417 if (current_frame_info.reg_save_ar_unat != 0)
7418 abort ();
7419 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7420 return 1;
7422 case GR_REG (4):
7423 case GR_REG (5):
7424 case GR_REG (6):
7425 case GR_REG (7):
7426 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7427 1 << (src_regno - GR_REG (4)));
7428 return 1;
7430 case BR_REG (1):
7431 case BR_REG (2):
7432 case BR_REG (3):
7433 case BR_REG (4):
7434 case BR_REG (5):
7435 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7436 1 << (src_regno - BR_REG (1)));
7437 return 1;
7439 case FR_REG (2):
7440 case FR_REG (3):
7441 case FR_REG (4):
7442 case FR_REG (5):
7443 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7444 1 << (src_regno - FR_REG (2)));
7445 return 1;
7447 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7448 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7449 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7450 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7451 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7452 1 << (src_regno - FR_REG (12)));
7453 return 1;
7455 default:
7456 return 0;
7460 return 0;
7464 /* This function looks at a single insn and emits any directives
7465 required to unwind this insn. */
7466 void
7467 process_for_unwind_directive (FILE *asm_out_file, rtx insn)
7469 if (flag_unwind_tables
7470 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7472 rtx pat;
7474 if (GET_CODE (insn) == NOTE
7475 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7477 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7479 /* Restore unwind state from immediately before the epilogue. */
7480 if (need_copy_state)
7482 fprintf (asm_out_file, "\t.body\n");
7483 fprintf (asm_out_file, "\t.copy_state 1\n");
7484 need_copy_state = false;
7488 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7489 return;
7491 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7492 if (pat)
7493 pat = XEXP (pat, 0);
7494 else
7495 pat = PATTERN (insn);
7497 switch (GET_CODE (pat))
7499 case SET:
7500 process_set (asm_out_file, pat);
7501 break;
7503 case PARALLEL:
7505 int par_index;
7506 int limit = XVECLEN (pat, 0);
7507 for (par_index = 0; par_index < limit; par_index++)
7509 rtx x = XVECEXP (pat, 0, par_index);
7510 if (GET_CODE (x) == SET)
7511 process_set (asm_out_file, x);
7513 break;
7516 default:
7517 abort ();
7523 void
7524 ia64_init_builtins (void)
7526 tree psi_type_node = build_pointer_type (integer_type_node);
7527 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7529 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7530 tree si_ftype_psi_si_si
7531 = build_function_type_list (integer_type_node,
7532 psi_type_node, integer_type_node,
7533 integer_type_node, NULL_TREE);
7535 /* __sync_val_compare_and_swap_di */
7536 tree di_ftype_pdi_di_di
7537 = build_function_type_list (long_integer_type_node,
7538 pdi_type_node, long_integer_type_node,
7539 long_integer_type_node, NULL_TREE);
7540 /* __sync_bool_compare_and_swap_di */
7541 tree si_ftype_pdi_di_di
7542 = build_function_type_list (integer_type_node,
7543 pdi_type_node, long_integer_type_node,
7544 long_integer_type_node, NULL_TREE);
7545 /* __sync_synchronize */
7546 tree void_ftype_void
7547 = build_function_type (void_type_node, void_list_node);
7549 /* __sync_lock_test_and_set_si */
7550 tree si_ftype_psi_si
7551 = build_function_type_list (integer_type_node,
7552 psi_type_node, integer_type_node, NULL_TREE);
7554 /* __sync_lock_test_and_set_di */
7555 tree di_ftype_pdi_di
7556 = build_function_type_list (long_integer_type_node,
7557 pdi_type_node, long_integer_type_node,
7558 NULL_TREE);
7560 /* __sync_lock_release_si */
7561 tree void_ftype_psi
7562 = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
7564 /* __sync_lock_release_di */
7565 tree void_ftype_pdi
7566 = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
7568 tree fpreg_type;
7569 tree float80_type;
7571 /* The __fpreg type. */
7572 fpreg_type = make_node (REAL_TYPE);
7573 /* ??? The back end should know to load/save __fpreg variables using
7574 the ldf.fill and stf.spill instructions. */
7575 TYPE_PRECISION (fpreg_type) = 80;
7576 layout_type (fpreg_type);
7577 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
7579 /* The __float80 type. */
7580 float80_type = make_node (REAL_TYPE);
7581 TYPE_PRECISION (float80_type) = 80;
7582 layout_type (float80_type);
7583 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
7585 /* The __float128 type. */
7586 if (!TARGET_HPUX)
7588 tree float128_type = make_node (REAL_TYPE);
7589 TYPE_PRECISION (float128_type) = 128;
7590 layout_type (float128_type);
7591 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
7593 else
7594 /* Under HPUX, this is a synonym for "long double". */
7595 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
7596 "__float128");
7598 #define def_builtin(name, type, code) \
7599 lang_hooks.builtin_function ((name), (type), (code), BUILT_IN_MD, \
7600 NULL, NULL_TREE)
7602 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7603 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7604 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7605 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7606 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7607 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7608 def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di,
7609 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7611 def_builtin ("__sync_synchronize", void_ftype_void,
7612 IA64_BUILTIN_SYNCHRONIZE);
7614 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7615 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7616 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7617 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7618 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7619 IA64_BUILTIN_LOCK_RELEASE_SI);
7620 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7621 IA64_BUILTIN_LOCK_RELEASE_DI);
7623 def_builtin ("__builtin_ia64_bsp",
7624 build_function_type (ptr_type_node, void_list_node),
7625 IA64_BUILTIN_BSP);
7627 def_builtin ("__builtin_ia64_flushrs",
7628 build_function_type (void_type_node, void_list_node),
7629 IA64_BUILTIN_FLUSHRS);
7631 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7632 IA64_BUILTIN_FETCH_AND_ADD_SI);
7633 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7634 IA64_BUILTIN_FETCH_AND_SUB_SI);
7635 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7636 IA64_BUILTIN_FETCH_AND_OR_SI);
7637 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7638 IA64_BUILTIN_FETCH_AND_AND_SI);
7639 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7640 IA64_BUILTIN_FETCH_AND_XOR_SI);
7641 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7642 IA64_BUILTIN_FETCH_AND_NAND_SI);
7644 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7645 IA64_BUILTIN_ADD_AND_FETCH_SI);
7646 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7647 IA64_BUILTIN_SUB_AND_FETCH_SI);
7648 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7649 IA64_BUILTIN_OR_AND_FETCH_SI);
7650 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7651 IA64_BUILTIN_AND_AND_FETCH_SI);
7652 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7653 IA64_BUILTIN_XOR_AND_FETCH_SI);
7654 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7655 IA64_BUILTIN_NAND_AND_FETCH_SI);
7657 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7658 IA64_BUILTIN_FETCH_AND_ADD_DI);
7659 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7660 IA64_BUILTIN_FETCH_AND_SUB_DI);
7661 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7662 IA64_BUILTIN_FETCH_AND_OR_DI);
7663 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7664 IA64_BUILTIN_FETCH_AND_AND_DI);
7665 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7666 IA64_BUILTIN_FETCH_AND_XOR_DI);
7667 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7668 IA64_BUILTIN_FETCH_AND_NAND_DI);
7670 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7671 IA64_BUILTIN_ADD_AND_FETCH_DI);
7672 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7673 IA64_BUILTIN_SUB_AND_FETCH_DI);
7674 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7675 IA64_BUILTIN_OR_AND_FETCH_DI);
7676 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7677 IA64_BUILTIN_AND_AND_FETCH_DI);
7678 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7679 IA64_BUILTIN_XOR_AND_FETCH_DI);
7680 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7681 IA64_BUILTIN_NAND_AND_FETCH_DI);
7683 #undef def_builtin
7686 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7689 tmp = [ptr];
7690 do {
7691 ret = tmp;
7692 ar.ccv = tmp;
7693 tmp <op>= value;
7694 cmpxchgsz.acq tmp = [ptr], tmp
7695 } while (tmp != ret)
7698 static rtx
7699 ia64_expand_fetch_and_op (optab binoptab, enum machine_mode mode,
7700 tree arglist, rtx target)
7702 rtx ret, label, tmp, ccv, insn, mem, value;
7703 tree arg0, arg1;
7705 arg0 = TREE_VALUE (arglist);
7706 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7707 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7708 #ifdef POINTERS_EXTEND_UNSIGNED
7709 if (GET_MODE(mem) != Pmode)
7710 mem = convert_memory_address (Pmode, mem);
7711 #endif
7712 value = expand_expr (arg1, NULL_RTX, mode, 0);
7714 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7715 MEM_VOLATILE_P (mem) = 1;
7717 if (target && register_operand (target, mode))
7718 ret = target;
7719 else
7720 ret = gen_reg_rtx (mode);
7722 emit_insn (gen_mf ());
7724 /* Special case for fetchadd instructions. */
7725 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7727 if (mode == SImode)
7728 insn = gen_fetchadd_acq_si (ret, mem, value);
7729 else
7730 insn = gen_fetchadd_acq_di (ret, mem, value);
7731 emit_insn (insn);
7732 return ret;
7735 tmp = gen_reg_rtx (mode);
7736 /* ar.ccv must always be loaded with a zero-extended DImode value. */
7737 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
7738 emit_move_insn (tmp, mem);
7740 label = gen_label_rtx ();
7741 emit_label (label);
7742 emit_move_insn (ret, tmp);
7743 convert_move (ccv, tmp, /*unsignedp=*/1);
7745 /* Perform the specific operation. Special case NAND by noticing
7746 one_cmpl_optab instead. */
7747 if (binoptab == one_cmpl_optab)
7749 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7750 binoptab = and_optab;
7752 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7754 if (mode == SImode)
7755 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7756 else
7757 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7758 emit_insn (insn);
7760 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
7762 return ret;
7765 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7768 tmp = [ptr];
7769 do {
7770 old = tmp;
7771 ar.ccv = tmp;
7772 ret = tmp <op> value;
7773 cmpxchgsz.acq tmp = [ptr], ret
7774 } while (tmp != old)
7777 static rtx
7778 ia64_expand_op_and_fetch (optab binoptab, enum machine_mode mode,
7779 tree arglist, rtx target)
7781 rtx old, label, tmp, ret, ccv, insn, mem, value;
7782 tree arg0, arg1;
7784 arg0 = TREE_VALUE (arglist);
7785 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7786 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7787 #ifdef POINTERS_EXTEND_UNSIGNED
7788 if (GET_MODE(mem) != Pmode)
7789 mem = convert_memory_address (Pmode, mem);
7790 #endif
7792 value = expand_expr (arg1, NULL_RTX, mode, 0);
7794 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7795 MEM_VOLATILE_P (mem) = 1;
7797 if (target && ! register_operand (target, mode))
7798 target = NULL_RTX;
7800 emit_insn (gen_mf ());
7801 tmp = gen_reg_rtx (mode);
7802 old = gen_reg_rtx (mode);
7803 /* ar.ccv must always be loaded with a zero-extended DImode value. */
7804 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
7806 emit_move_insn (tmp, mem);
7808 label = gen_label_rtx ();
7809 emit_label (label);
7810 emit_move_insn (old, tmp);
7811 convert_move (ccv, tmp, /*unsignedp=*/1);
7813 /* Perform the specific operation. Special case NAND by noticing
7814 one_cmpl_optab instead. */
7815 if (binoptab == one_cmpl_optab)
7817 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7818 binoptab = and_optab;
7820 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
7822 if (mode == SImode)
7823 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
7824 else
7825 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7826 emit_insn (insn);
7828 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
7830 return ret;
7833 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7835 ar.ccv = oldval
7837 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7838 return ret
7840 For bool_ it's the same except return ret == oldval.
7843 static rtx
7844 ia64_expand_compare_and_swap (enum machine_mode rmode, enum machine_mode mode,
7845 int boolp, tree arglist, rtx target)
7847 tree arg0, arg1, arg2;
7848 rtx mem, old, new, ccv, tmp, insn;
7850 arg0 = TREE_VALUE (arglist);
7851 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7852 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7853 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
7854 old = expand_expr (arg1, NULL_RTX, mode, 0);
7855 new = expand_expr (arg2, NULL_RTX, mode, 0);
7857 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
7858 MEM_VOLATILE_P (mem) = 1;
7860 if (GET_MODE (old) != mode)
7861 old = convert_to_mode (mode, old, /*unsignedp=*/1);
7862 if (GET_MODE (new) != mode)
7863 new = convert_to_mode (mode, new, /*unsignedp=*/1);
7865 if (! register_operand (old, mode))
7866 old = copy_to_mode_reg (mode, old);
7867 if (! register_operand (new, mode))
7868 new = copy_to_mode_reg (mode, new);
7870 if (! boolp && target && register_operand (target, mode))
7871 tmp = target;
7872 else
7873 tmp = gen_reg_rtx (mode);
7875 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
7876 convert_move (ccv, old, /*unsignedp=*/1);
7877 emit_insn (gen_mf ());
7878 if (mode == SImode)
7879 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7880 else
7881 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7882 emit_insn (insn);
7884 if (boolp)
7886 if (! target)
7887 target = gen_reg_rtx (rmode);
7888 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
7890 else
7891 return tmp;
7894 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7896 static rtx
7897 ia64_expand_lock_test_and_set (enum machine_mode mode, tree arglist,
7898 rtx target)
7900 tree arg0, arg1;
7901 rtx mem, new, ret, insn;
7903 arg0 = TREE_VALUE (arglist);
7904 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7905 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
7906 new = expand_expr (arg1, NULL_RTX, mode, 0);
7908 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
7909 MEM_VOLATILE_P (mem) = 1;
7910 if (! register_operand (new, mode))
7911 new = copy_to_mode_reg (mode, new);
7913 if (target && register_operand (target, mode))
7914 ret = target;
7915 else
7916 ret = gen_reg_rtx (mode);
7918 if (mode == SImode)
7919 insn = gen_xchgsi (ret, mem, new);
7920 else
7921 insn = gen_xchgdi (ret, mem, new);
7922 emit_insn (insn);
7924 return ret;
7927 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7929 static rtx
7930 ia64_expand_lock_release (enum machine_mode mode, tree arglist,
7931 rtx target ATTRIBUTE_UNUSED)
7933 tree arg0;
7934 rtx mem;
7936 arg0 = TREE_VALUE (arglist);
7937 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
7939 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
7940 MEM_VOLATILE_P (mem) = 1;
7942 emit_move_insn (mem, const0_rtx);
7944 return const0_rtx;
7948 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
7949 enum machine_mode mode ATTRIBUTE_UNUSED,
7950 int ignore ATTRIBUTE_UNUSED)
7952 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7953 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
7954 tree arglist = TREE_OPERAND (exp, 1);
7955 enum machine_mode rmode = VOIDmode;
7957 switch (fcode)
7959 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7960 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7961 mode = SImode;
7962 rmode = SImode;
7963 break;
7965 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7966 case IA64_BUILTIN_LOCK_RELEASE_SI:
7967 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7968 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7969 case IA64_BUILTIN_FETCH_AND_OR_SI:
7970 case IA64_BUILTIN_FETCH_AND_AND_SI:
7971 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7972 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7973 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7974 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7975 case IA64_BUILTIN_OR_AND_FETCH_SI:
7976 case IA64_BUILTIN_AND_AND_FETCH_SI:
7977 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7978 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7979 mode = SImode;
7980 break;
7982 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7983 mode = DImode;
7984 rmode = SImode;
7985 break;
7987 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7988 mode = DImode;
7989 rmode = DImode;
7990 break;
7992 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7993 case IA64_BUILTIN_LOCK_RELEASE_DI:
7994 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7995 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7996 case IA64_BUILTIN_FETCH_AND_OR_DI:
7997 case IA64_BUILTIN_FETCH_AND_AND_DI:
7998 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7999 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8000 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8001 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8002 case IA64_BUILTIN_OR_AND_FETCH_DI:
8003 case IA64_BUILTIN_AND_AND_FETCH_DI:
8004 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8005 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8006 mode = DImode;
8007 break;
8009 default:
8010 break;
8013 switch (fcode)
8015 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8016 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8017 return ia64_expand_compare_and_swap (rmode, mode, 1, arglist,
8018 target);
8020 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8021 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8022 return ia64_expand_compare_and_swap (rmode, mode, 0, arglist,
8023 target);
8025 case IA64_BUILTIN_SYNCHRONIZE:
8026 emit_insn (gen_mf ());
8027 return const0_rtx;
8029 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8030 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8031 return ia64_expand_lock_test_and_set (mode, arglist, target);
8033 case IA64_BUILTIN_LOCK_RELEASE_SI:
8034 case IA64_BUILTIN_LOCK_RELEASE_DI:
8035 return ia64_expand_lock_release (mode, arglist, target);
8037 case IA64_BUILTIN_BSP:
8038 if (! target || ! register_operand (target, DImode))
8039 target = gen_reg_rtx (DImode);
8040 emit_insn (gen_bsp_value (target));
8041 #ifdef POINTERS_EXTEND_UNSIGNED
8042 target = convert_memory_address (ptr_mode, target);
8043 #endif
8044 return target;
8046 case IA64_BUILTIN_FLUSHRS:
8047 emit_insn (gen_flushrs ());
8048 return const0_rtx;
8050 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8051 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8052 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8054 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8055 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8056 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8058 case IA64_BUILTIN_FETCH_AND_OR_SI:
8059 case IA64_BUILTIN_FETCH_AND_OR_DI:
8060 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8062 case IA64_BUILTIN_FETCH_AND_AND_SI:
8063 case IA64_BUILTIN_FETCH_AND_AND_DI:
8064 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8066 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8067 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8068 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8070 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8071 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8072 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8074 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8075 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8076 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8078 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8079 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8080 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8082 case IA64_BUILTIN_OR_AND_FETCH_SI:
8083 case IA64_BUILTIN_OR_AND_FETCH_DI:
8084 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8086 case IA64_BUILTIN_AND_AND_FETCH_SI:
8087 case IA64_BUILTIN_AND_AND_FETCH_DI:
8088 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8090 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8091 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8092 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8094 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8095 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8096 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8098 default:
8099 break;
8102 return NULL_RTX;
8105 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8106 most significant bits of the stack slot. */
8108 enum direction
8109 ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
8111 /* Exception to normal case for structures/unions/etc. */
8113 if (type && AGGREGATE_TYPE_P (type)
8114 && int_size_in_bytes (type) < UNITS_PER_WORD)
8115 return upward;
8117 /* Fall back to the default. */
8118 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
8121 /* Linked list of all external functions that are to be emitted by GCC.
8122 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8123 order to avoid putting out names that are never really used. */
8125 struct extern_func_list GTY(())
8127 struct extern_func_list *next;
8128 tree decl;
8131 static GTY(()) struct extern_func_list *extern_func_head;
8133 static void
8134 ia64_hpux_add_extern_decl (tree decl)
8136 struct extern_func_list *p = ggc_alloc (sizeof (struct extern_func_list));
8138 p->decl = decl;
8139 p->next = extern_func_head;
8140 extern_func_head = p;
8143 /* Print out the list of used global functions. */
8145 static void
8146 ia64_hpux_file_end (void)
8148 struct extern_func_list *p;
8150 for (p = extern_func_head; p; p = p->next)
8152 tree decl = p->decl;
8153 tree id = DECL_ASSEMBLER_NAME (decl);
8155 if (!id)
8156 abort ();
8158 if (!TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (id))
8160 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
8162 TREE_ASM_WRITTEN (decl) = 1;
8163 (*targetm.asm_out.globalize_label) (asm_out_file, name);
8164 fputs (TYPE_ASM_OP, asm_out_file);
8165 assemble_name (asm_out_file, name);
8166 fprintf (asm_out_file, "," TYPE_OPERAND_FMT "\n", "function");
8170 extern_func_head = 0;
8173 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
8174 modes of word_mode and larger. Rename the TFmode libfuncs using the
8175 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
8176 backward compatibility. */
8178 static void
8179 ia64_init_libfuncs (void)
8181 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
8182 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
8183 set_optab_libfunc (smod_optab, SImode, "__modsi3");
8184 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
8186 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
8187 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
8188 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
8189 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
8190 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
8192 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
8193 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
8194 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
8195 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
8196 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
8197 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
8199 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
8200 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
8201 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
8202 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
8204 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
8205 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
8208 /* Rename all the TFmode libfuncs using the HPUX conventions. */
8210 static void
8211 ia64_hpux_init_libfuncs (void)
8213 ia64_init_libfuncs ();
8215 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
8216 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
8217 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
8219 /* ia64_expand_compare uses this. */
8220 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
8222 /* These should never be used. */
8223 set_optab_libfunc (eq_optab, TFmode, 0);
8224 set_optab_libfunc (ne_optab, TFmode, 0);
8225 set_optab_libfunc (gt_optab, TFmode, 0);
8226 set_optab_libfunc (ge_optab, TFmode, 0);
8227 set_optab_libfunc (lt_optab, TFmode, 0);
8228 set_optab_libfunc (le_optab, TFmode, 0);
8231 /* Rename the division and modulus functions in VMS. */
8233 static void
8234 ia64_vms_init_libfuncs (void)
8236 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
8237 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
8238 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
8239 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
8240 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
8241 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
8242 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
8243 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
8246 /* Rename the TFmode libfuncs available from soft-fp in glibc using
8247 the HPUX conventions. */
8249 static void
8250 ia64_sysv4_init_libfuncs (void)
8252 ia64_init_libfuncs ();
8254 /* These functions are not part of the HPUX TFmode interface. We
8255 use them instead of _U_Qfcmp, which doesn't work the way we
8256 expect. */
8257 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
8258 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
8259 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
8260 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
8261 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
8262 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
8264 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
8265 glibc doesn't have them. */
8268 /* Switch to the section to which we should output X. The only thing
8269 special we do here is to honor small data. */
8271 static void
8272 ia64_select_rtx_section (enum machine_mode mode, rtx x,
8273 unsigned HOST_WIDE_INT align)
8275 if (GET_MODE_SIZE (mode) > 0
8276 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8277 sdata_section ();
8278 else
8279 default_elf_select_rtx_section (mode, x, align);
8282 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8283 Pretend flag_pic is always set. */
8285 static void
8286 ia64_rwreloc_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
8288 default_elf_select_section_1 (exp, reloc, align, true);
8291 static void
8292 ia64_rwreloc_unique_section (tree decl, int reloc)
8294 default_unique_section_1 (decl, reloc, true);
8297 static void
8298 ia64_rwreloc_select_rtx_section (enum machine_mode mode, rtx x,
8299 unsigned HOST_WIDE_INT align)
8301 int save_pic = flag_pic;
8302 flag_pic = 1;
8303 ia64_select_rtx_section (mode, x, align);
8304 flag_pic = save_pic;
8307 static unsigned int
8308 ia64_rwreloc_section_type_flags (tree decl, const char *name, int reloc)
8310 return default_section_type_flags_1 (decl, name, reloc, true);
8313 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
8314 structure type and that the address of that type should be passed
8315 in out0, rather than in r8. */
8317 static bool
8318 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
8320 tree ret_type = TREE_TYPE (fntype);
8322 /* The Itanium C++ ABI requires that out0, rather than r8, be used
8323 as the structure return address parameter, if the return value
8324 type has a non-trivial copy constructor or destructor. It is not
8325 clear if this same convention should be used for other
8326 programming languages. Until G++ 3.4, we incorrectly used r8 for
8327 these return values. */
8328 return (abi_version_at_least (2)
8329 && ret_type
8330 && TYPE_MODE (ret_type) == BLKmode
8331 && TREE_ADDRESSABLE (ret_type)
8332 && strcmp (lang_hooks.name, "GNU C++") == 0);
8335 /* Output the assembler code for a thunk function. THUNK_DECL is the
8336 declaration for the thunk function itself, FUNCTION is the decl for
8337 the target function. DELTA is an immediate constant offset to be
8338 added to THIS. If VCALL_OFFSET is nonzero, the word at
8339 *(*this + vcall_offset) should be added to THIS. */
8341 static void
8342 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
8343 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8344 tree function)
8346 rtx this, insn, funexp;
8347 unsigned int this_parmno;
8348 unsigned int this_regno;
8350 reload_completed = 1;
8351 epilogue_completed = 1;
8352 no_new_pseudos = 1;
8353 reset_block_changes ();
8355 /* Set things up as ia64_expand_prologue might. */
8356 last_scratch_gr_reg = 15;
8358 memset (&current_frame_info, 0, sizeof (current_frame_info));
8359 current_frame_info.spill_cfa_off = -16;
8360 current_frame_info.n_input_regs = 1;
8361 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8363 /* Mark the end of the (empty) prologue. */
8364 emit_note (NOTE_INSN_PROLOGUE_END);
8366 /* Figure out whether "this" will be the first parameter (the
8367 typical case) or the second parameter (as happens when the
8368 virtual function returns certain class objects). */
8369 this_parmno
8370 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
8371 ? 1 : 0);
8372 this_regno = IN_REG (this_parmno);
8373 if (!TARGET_REG_NAMES)
8374 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
8376 this = gen_rtx_REG (Pmode, this_regno);
8377 if (TARGET_ILP32)
8379 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
8380 REG_POINTER (tmp) = 1;
8381 if (delta && CONST_OK_FOR_I (delta))
8383 emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
8384 delta = 0;
8386 else
8387 emit_insn (gen_ptr_extend (this, tmp));
8390 /* Apply the constant offset, if required. */
8391 if (delta)
8393 rtx delta_rtx = GEN_INT (delta);
8395 if (!CONST_OK_FOR_I (delta))
8397 rtx tmp = gen_rtx_REG (Pmode, 2);
8398 emit_move_insn (tmp, delta_rtx);
8399 delta_rtx = tmp;
8401 emit_insn (gen_adddi3 (this, this, delta_rtx));
8404 /* Apply the offset from the vtable, if required. */
8405 if (vcall_offset)
8407 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8408 rtx tmp = gen_rtx_REG (Pmode, 2);
8410 if (TARGET_ILP32)
8412 rtx t = gen_rtx_REG (ptr_mode, 2);
8413 REG_POINTER (t) = 1;
8414 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
8415 if (CONST_OK_FOR_I (vcall_offset))
8417 emit_insn (gen_ptr_extend_plus_imm (tmp, t,
8418 vcall_offset_rtx));
8419 vcall_offset = 0;
8421 else
8422 emit_insn (gen_ptr_extend (tmp, t));
8424 else
8425 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8427 if (vcall_offset)
8429 if (!CONST_OK_FOR_J (vcall_offset))
8431 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8432 emit_move_insn (tmp2, vcall_offset_rtx);
8433 vcall_offset_rtx = tmp2;
8435 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8438 if (TARGET_ILP32)
8439 emit_move_insn (gen_rtx_REG (ptr_mode, 2),
8440 gen_rtx_MEM (ptr_mode, tmp));
8441 else
8442 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8444 emit_insn (gen_adddi3 (this, this, tmp));
8447 /* Generate a tail call to the target function. */
8448 if (! TREE_USED (function))
8450 assemble_external (function);
8451 TREE_USED (function) = 1;
8453 funexp = XEXP (DECL_RTL (function), 0);
8454 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8455 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8456 insn = get_last_insn ();
8457 SIBLING_CALL_P (insn) = 1;
8459 /* Code generation for calls relies on splitting. */
8460 reload_completed = 1;
8461 epilogue_completed = 1;
8462 try_split (PATTERN (insn), insn, 0);
8464 emit_barrier ();
8466 /* Run just enough of rest_of_compilation to get the insns emitted.
8467 There's not really enough bulk here to make other passes such as
8468 instruction scheduling worth while. Note that use_thunk calls
8469 assemble_start_function and assemble_end_function. */
8471 insn_locators_initialize ();
8472 emit_all_insn_group_barriers (NULL);
8473 insn = get_insns ();
8474 shorten_branches (insn);
8475 final_start_function (insn, file, 1);
8476 final (insn, file, 1, 0);
8477 final_end_function ();
8479 reload_completed = 0;
8480 epilogue_completed = 0;
8481 no_new_pseudos = 0;
8484 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
8486 static rtx
8487 ia64_struct_value_rtx (tree fntype,
8488 int incoming ATTRIBUTE_UNUSED)
8490 if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
8491 return NULL_RTX;
8492 return gen_rtx_REG (Pmode, GR_REG (8));
8495 #include "gt-ia64.h"