* combine.c (try_combine): Use hard_regno_nregs array instead of
[official-gcc.git] / gcc / config / ia64 / ia64.c
blobe119ecb2b1a83c089732cc3684def021f311de6a
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson <wilson@cygnus.com> and
5 David Mosberger <davidm@hpl.hp.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "regs.h"
31 #include "hard-reg-set.h"
32 #include "real.h"
33 #include "insn-config.h"
34 #include "conditions.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "recog.h"
39 #include "expr.h"
40 #include "optabs.h"
41 #include "except.h"
42 #include "function.h"
43 #include "ggc.h"
44 #include "basic-block.h"
45 #include "toplev.h"
46 #include "sched-int.h"
47 #include "timevar.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "tm_p.h"
51 #include "hashtab.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
54 #include "tree-gimple.h"
56 /* This is used for communication between ASM_OUTPUT_LABEL and
57 ASM_OUTPUT_LABELREF. */
58 int ia64_asm_output_label = 0;
60 /* Define the information needed to generate branch and scc insns. This is
61 stored from the compare operation. */
62 struct rtx_def * ia64_compare_op0;
63 struct rtx_def * ia64_compare_op1;
65 /* Register names for ia64_expand_prologue. */
66 static const char * const ia64_reg_numbers[96] =
67 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
68 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
69 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
70 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
71 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
72 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
73 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
74 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
75 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
76 "r104","r105","r106","r107","r108","r109","r110","r111",
77 "r112","r113","r114","r115","r116","r117","r118","r119",
78 "r120","r121","r122","r123","r124","r125","r126","r127"};
80 /* ??? These strings could be shared with REGISTER_NAMES. */
81 static const char * const ia64_input_reg_names[8] =
82 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
84 /* ??? These strings could be shared with REGISTER_NAMES. */
85 static const char * const ia64_local_reg_names[80] =
86 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
87 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
88 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
89 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
90 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
91 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
92 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
93 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
94 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
95 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
97 /* ??? These strings could be shared with REGISTER_NAMES. */
98 static const char * const ia64_output_reg_names[8] =
99 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
101 /* Which cpu are we scheduling for. */
102 enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
104 /* Determines whether we run our final scheduling pass or not. We always
105 avoid the normal second scheduling pass. */
106 static int ia64_flag_schedule_insns2;
108 /* Determines whether we run variable tracking in machine dependent
109 reorganization. */
110 static int ia64_flag_var_tracking;
112 /* Variables which are this size or smaller are put in the sdata/sbss
113 sections. */
115 unsigned int ia64_section_threshold;
117 /* The following variable is used by the DFA insn scheduler. The value is
118 TRUE if we do insn bundling instead of insn scheduling. */
119 int bundling_p = 0;
121 /* Structure to be filled in by ia64_compute_frame_size with register
122 save masks and offsets for the current function. */
124 struct ia64_frame_info
126 HOST_WIDE_INT total_size; /* size of the stack frame, not including
127 the caller's scratch area. */
128 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
129 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
130 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
131 HARD_REG_SET mask; /* mask of saved registers. */
132 unsigned int gr_used_mask; /* mask of registers in use as gr spill
133 registers or long-term scratches. */
134 int n_spilled; /* number of spilled registers. */
135 int reg_fp; /* register for fp. */
136 int reg_save_b0; /* save register for b0. */
137 int reg_save_pr; /* save register for prs. */
138 int reg_save_ar_pfs; /* save register for ar.pfs. */
139 int reg_save_ar_unat; /* save register for ar.unat. */
140 int reg_save_ar_lc; /* save register for ar.lc. */
141 int reg_save_gp; /* save register for gp. */
142 int n_input_regs; /* number of input registers used. */
143 int n_local_regs; /* number of local registers used. */
144 int n_output_regs; /* number of output registers used. */
145 int n_rotate_regs; /* number of rotating registers used. */
147 char need_regstk; /* true if a .regstk directive needed. */
148 char initialized; /* true if the data is finalized. */
151 /* Current frame information calculated by ia64_compute_frame_size. */
152 static struct ia64_frame_info current_frame_info;
154 static int ia64_first_cycle_multipass_dfa_lookahead (void);
155 static void ia64_dependencies_evaluation_hook (rtx, rtx);
156 static void ia64_init_dfa_pre_cycle_insn (void);
157 static rtx ia64_dfa_pre_cycle_insn (void);
158 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
159 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
160 static rtx gen_tls_get_addr (void);
161 static rtx gen_thread_pointer (void);
162 static int find_gr_spill (int);
163 static int next_scratch_gr_reg (void);
164 static void mark_reg_gr_used_mask (rtx, void *);
165 static void ia64_compute_frame_size (HOST_WIDE_INT);
166 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
167 static void finish_spill_pointers (void);
168 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
169 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
170 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
171 static rtx gen_movdi_x (rtx, rtx, rtx);
172 static rtx gen_fr_spill_x (rtx, rtx, rtx);
173 static rtx gen_fr_restore_x (rtx, rtx, rtx);
175 static enum machine_mode hfa_element_mode (tree, bool);
176 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
177 tree, int *, int);
178 static bool ia64_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
179 tree, bool);
180 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
181 tree, bool);
182 static bool ia64_function_ok_for_sibcall (tree, tree);
183 static bool ia64_return_in_memory (tree, tree);
184 static bool ia64_rtx_costs (rtx, int, int, int *);
185 static void fix_range (const char *);
186 static bool ia64_handle_option (size_t, const char *, int);
187 static struct machine_function * ia64_init_machine_status (void);
188 static void emit_insn_group_barriers (FILE *);
189 static void emit_all_insn_group_barriers (FILE *);
190 static void final_emit_insn_group_barriers (FILE *);
191 static void emit_predicate_relation_info (void);
192 static void ia64_reorg (void);
193 static bool ia64_in_small_data_p (tree);
194 static void process_epilogue (void);
195 static int process_set (FILE *, rtx);
197 static bool ia64_assemble_integer (rtx, unsigned int, int);
198 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
199 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
200 static void ia64_output_function_end_prologue (FILE *);
202 static int ia64_issue_rate (void);
203 static int ia64_adjust_cost (rtx, rtx, rtx, int);
204 static void ia64_sched_init (FILE *, int, int);
205 static void ia64_sched_finish (FILE *, int);
206 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
207 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
208 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
209 static int ia64_variable_issue (FILE *, int, rtx, int);
211 static struct bundle_state *get_free_bundle_state (void);
212 static void free_bundle_state (struct bundle_state *);
213 static void initiate_bundle_states (void);
214 static void finish_bundle_states (void);
215 static unsigned bundle_state_hash (const void *);
216 static int bundle_state_eq_p (const void *, const void *);
217 static int insert_bundle_state (struct bundle_state *);
218 static void initiate_bundle_state_table (void);
219 static void finish_bundle_state_table (void);
220 static int try_issue_nops (struct bundle_state *, int);
221 static int try_issue_insn (struct bundle_state *, rtx);
222 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
223 static int get_max_pos (state_t);
224 static int get_template (state_t, int);
226 static rtx get_next_important_insn (rtx, rtx);
227 static void bundling (FILE *, int, rtx, rtx);
229 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
230 HOST_WIDE_INT, tree);
231 static void ia64_file_start (void);
233 static void ia64_select_rtx_section (enum machine_mode, rtx,
234 unsigned HOST_WIDE_INT);
235 static void ia64_rwreloc_select_section (tree, int, unsigned HOST_WIDE_INT)
236 ATTRIBUTE_UNUSED;
237 static void ia64_rwreloc_unique_section (tree, int)
238 ATTRIBUTE_UNUSED;
239 static void ia64_rwreloc_select_rtx_section (enum machine_mode, rtx,
240 unsigned HOST_WIDE_INT)
241 ATTRIBUTE_UNUSED;
242 static unsigned int ia64_section_type_flags (tree, const char *, int);
243 static void ia64_hpux_add_extern_decl (tree decl)
244 ATTRIBUTE_UNUSED;
245 static void ia64_hpux_file_end (void)
246 ATTRIBUTE_UNUSED;
247 static void ia64_init_libfuncs (void)
248 ATTRIBUTE_UNUSED;
249 static void ia64_hpux_init_libfuncs (void)
250 ATTRIBUTE_UNUSED;
251 static void ia64_sysv4_init_libfuncs (void)
252 ATTRIBUTE_UNUSED;
253 static void ia64_vms_init_libfuncs (void)
254 ATTRIBUTE_UNUSED;
256 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
257 static void ia64_encode_section_info (tree, rtx, int);
258 static rtx ia64_struct_value_rtx (tree, int);
259 static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
260 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
261 static bool ia64_vector_mode_supported_p (enum machine_mode mode);
262 static bool ia64_cannot_force_const_mem (rtx);
264 /* Table of valid machine attributes. */
265 static const struct attribute_spec ia64_attribute_table[] =
267 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
268 { "syscall_linkage", 0, 0, false, true, true, NULL },
269 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
270 { NULL, 0, 0, false, false, false, NULL }
273 /* Initialize the GCC target structure. */
274 #undef TARGET_ATTRIBUTE_TABLE
275 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
277 #undef TARGET_INIT_BUILTINS
278 #define TARGET_INIT_BUILTINS ia64_init_builtins
280 #undef TARGET_EXPAND_BUILTIN
281 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
283 #undef TARGET_ASM_BYTE_OP
284 #define TARGET_ASM_BYTE_OP "\tdata1\t"
285 #undef TARGET_ASM_ALIGNED_HI_OP
286 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
287 #undef TARGET_ASM_ALIGNED_SI_OP
288 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
289 #undef TARGET_ASM_ALIGNED_DI_OP
290 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
291 #undef TARGET_ASM_UNALIGNED_HI_OP
292 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
293 #undef TARGET_ASM_UNALIGNED_SI_OP
294 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
295 #undef TARGET_ASM_UNALIGNED_DI_OP
296 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
297 #undef TARGET_ASM_INTEGER
298 #define TARGET_ASM_INTEGER ia64_assemble_integer
300 #undef TARGET_ASM_FUNCTION_PROLOGUE
301 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
302 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
303 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
304 #undef TARGET_ASM_FUNCTION_EPILOGUE
305 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
307 #undef TARGET_IN_SMALL_DATA_P
308 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
310 #undef TARGET_SCHED_ADJUST_COST
311 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
312 #undef TARGET_SCHED_ISSUE_RATE
313 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
314 #undef TARGET_SCHED_VARIABLE_ISSUE
315 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
316 #undef TARGET_SCHED_INIT
317 #define TARGET_SCHED_INIT ia64_sched_init
318 #undef TARGET_SCHED_FINISH
319 #define TARGET_SCHED_FINISH ia64_sched_finish
320 #undef TARGET_SCHED_REORDER
321 #define TARGET_SCHED_REORDER ia64_sched_reorder
322 #undef TARGET_SCHED_REORDER2
323 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
325 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
326 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
328 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
329 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
331 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
332 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
333 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
334 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
336 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
337 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
338 ia64_first_cycle_multipass_dfa_lookahead_guard
340 #undef TARGET_SCHED_DFA_NEW_CYCLE
341 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
343 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
344 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
345 #undef TARGET_PASS_BY_REFERENCE
346 #define TARGET_PASS_BY_REFERENCE ia64_pass_by_reference
347 #undef TARGET_ARG_PARTIAL_BYTES
348 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
350 #undef TARGET_ASM_OUTPUT_MI_THUNK
351 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
352 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
353 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
355 #undef TARGET_ASM_FILE_START
356 #define TARGET_ASM_FILE_START ia64_file_start
358 #undef TARGET_RTX_COSTS
359 #define TARGET_RTX_COSTS ia64_rtx_costs
360 #undef TARGET_ADDRESS_COST
361 #define TARGET_ADDRESS_COST hook_int_rtx_0
363 #undef TARGET_MACHINE_DEPENDENT_REORG
364 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
366 #undef TARGET_ENCODE_SECTION_INFO
367 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
369 #undef TARGET_SECTION_TYPE_FLAGS
370 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
372 /* ??? ABI doesn't allow us to define this. */
373 #if 0
374 #undef TARGET_PROMOTE_FUNCTION_ARGS
375 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
376 #endif
378 /* ??? ABI doesn't allow us to define this. */
379 #if 0
380 #undef TARGET_PROMOTE_FUNCTION_RETURN
381 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
382 #endif
384 /* ??? Investigate. */
385 #if 0
386 #undef TARGET_PROMOTE_PROTOTYPES
387 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
388 #endif
390 #undef TARGET_STRUCT_VALUE_RTX
391 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
392 #undef TARGET_RETURN_IN_MEMORY
393 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
394 #undef TARGET_SETUP_INCOMING_VARARGS
395 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
396 #undef TARGET_STRICT_ARGUMENT_NAMING
397 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
398 #undef TARGET_MUST_PASS_IN_STACK
399 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
401 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
402 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
404 #undef TARGET_UNWIND_EMIT
405 #define TARGET_UNWIND_EMIT process_for_unwind_directive
407 #undef TARGET_SCALAR_MODE_SUPPORTED_P
408 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
409 #undef TARGET_VECTOR_MODE_SUPPORTED_P
410 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
412 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
413 in an order different from the specified program order. */
414 #undef TARGET_RELAXED_ORDERING
415 #define TARGET_RELAXED_ORDERING true
417 #undef TARGET_DEFAULT_TARGET_FLAGS
418 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
419 #undef TARGET_HANDLE_OPTION
420 #define TARGET_HANDLE_OPTION ia64_handle_option
422 #undef TARGET_CANNOT_FORCE_CONST_MEM
423 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
425 struct gcc_target targetm = TARGET_INITIALIZER;
427 typedef enum
429 ADDR_AREA_NORMAL, /* normal address area */
430 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
432 ia64_addr_area;
434 static GTY(()) tree small_ident1;
435 static GTY(()) tree small_ident2;
437 static void
438 init_idents (void)
440 if (small_ident1 == 0)
442 small_ident1 = get_identifier ("small");
443 small_ident2 = get_identifier ("__small__");
447 /* Retrieve the address area that has been chosen for the given decl. */
449 static ia64_addr_area
450 ia64_get_addr_area (tree decl)
452 tree model_attr;
454 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
455 if (model_attr)
457 tree id;
459 init_idents ();
460 id = TREE_VALUE (TREE_VALUE (model_attr));
461 if (id == small_ident1 || id == small_ident2)
462 return ADDR_AREA_SMALL;
464 return ADDR_AREA_NORMAL;
467 static tree
468 ia64_handle_model_attribute (tree *node, tree name, tree args,
469 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
471 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
472 ia64_addr_area area;
473 tree arg, decl = *node;
475 init_idents ();
476 arg = TREE_VALUE (args);
477 if (arg == small_ident1 || arg == small_ident2)
479 addr_area = ADDR_AREA_SMALL;
481 else
483 warning (OPT_Wattributes, "invalid argument of %qs attribute",
484 IDENTIFIER_POINTER (name));
485 *no_add_attrs = true;
488 switch (TREE_CODE (decl))
490 case VAR_DECL:
491 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
492 == FUNCTION_DECL)
493 && !TREE_STATIC (decl))
495 error ("%Jan address area attribute cannot be specified for "
496 "local variables", decl, decl);
497 *no_add_attrs = true;
499 area = ia64_get_addr_area (decl);
500 if (area != ADDR_AREA_NORMAL && addr_area != area)
502 error ("%Jaddress area of '%s' conflicts with previous "
503 "declaration", decl, decl);
504 *no_add_attrs = true;
506 break;
508 case FUNCTION_DECL:
509 error ("%Jaddress area attribute cannot be specified for functions",
510 decl, decl);
511 *no_add_attrs = true;
512 break;
514 default:
515 warning (OPT_Wattributes, "%qs attribute ignored",
516 IDENTIFIER_POINTER (name));
517 *no_add_attrs = true;
518 break;
521 return NULL_TREE;
524 static void
525 ia64_encode_addr_area (tree decl, rtx symbol)
527 int flags;
529 flags = SYMBOL_REF_FLAGS (symbol);
530 switch (ia64_get_addr_area (decl))
532 case ADDR_AREA_NORMAL: break;
533 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
534 default: gcc_unreachable ();
536 SYMBOL_REF_FLAGS (symbol) = flags;
539 static void
540 ia64_encode_section_info (tree decl, rtx rtl, int first)
542 default_encode_section_info (decl, rtl, first);
544 /* Careful not to prod global register variables. */
545 if (TREE_CODE (decl) == VAR_DECL
546 && GET_CODE (DECL_RTL (decl)) == MEM
547 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
548 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
549 ia64_encode_addr_area (decl, XEXP (rtl, 0));
552 /* Implement CONST_OK_FOR_LETTER_P. */
554 bool
555 ia64_const_ok_for_letter_p (HOST_WIDE_INT value, char c)
557 switch (c)
559 case 'I':
560 return CONST_OK_FOR_I (value);
561 case 'J':
562 return CONST_OK_FOR_J (value);
563 case 'K':
564 return CONST_OK_FOR_K (value);
565 case 'L':
566 return CONST_OK_FOR_L (value);
567 case 'M':
568 return CONST_OK_FOR_M (value);
569 case 'N':
570 return CONST_OK_FOR_N (value);
571 case 'O':
572 return CONST_OK_FOR_O (value);
573 case 'P':
574 return CONST_OK_FOR_P (value);
575 default:
576 return false;
580 /* Implement CONST_DOUBLE_OK_FOR_LETTER_P. */
582 bool
583 ia64_const_double_ok_for_letter_p (rtx value, char c)
585 switch (c)
587 case 'G':
588 return CONST_DOUBLE_OK_FOR_G (value);
589 default:
590 return false;
594 /* Implement EXTRA_CONSTRAINT. */
596 bool
597 ia64_extra_constraint (rtx value, char c)
599 switch (c)
601 case 'Q':
602 /* Non-volatile memory for FP_REG loads/stores. */
603 return memory_operand(value, VOIDmode) && !MEM_VOLATILE_P (value);
605 case 'R':
606 /* 1..4 for shladd arguments. */
607 return (GET_CODE (value) == CONST_INT
608 && INTVAL (value) >= 1 && INTVAL (value) <= 4);
610 case 'S':
611 /* Non-post-inc memory for asms and other unsavory creatures. */
612 return (GET_CODE (value) == MEM
613 && GET_RTX_CLASS (GET_CODE (XEXP (value, 0))) != RTX_AUTOINC
614 && (reload_in_progress || memory_operand (value, VOIDmode)));
616 case 'T':
617 /* Symbol ref to small-address-area. */
618 return small_addr_symbolic_operand (value, VOIDmode);
620 case 'U':
621 /* Vector zero. */
622 return value == CONST0_RTX (GET_MODE (value));
624 case 'W':
625 /* An integer vector, such that conversion to an integer yields a
626 value appropriate for an integer 'J' constraint. */
627 if (GET_CODE (value) == CONST_VECTOR
628 && GET_MODE_CLASS (GET_MODE (value)) == MODE_VECTOR_INT)
630 value = simplify_subreg (DImode, value, GET_MODE (value), 0);
631 return ia64_const_ok_for_letter_p (INTVAL (value), 'J');
633 return false;
635 case 'Y':
636 /* A V2SF vector containing elements that satisfy 'G'. */
637 return
638 (GET_CODE (value) == CONST_VECTOR
639 && GET_MODE (value) == V2SFmode
640 && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 0), 'G')
641 && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 1), 'G'));
643 default:
644 return false;
648 /* Return 1 if the operands of a move are ok. */
651 ia64_move_ok (rtx dst, rtx src)
653 /* If we're under init_recog_no_volatile, we'll not be able to use
654 memory_operand. So check the code directly and don't worry about
655 the validity of the underlying address, which should have been
656 checked elsewhere anyway. */
657 if (GET_CODE (dst) != MEM)
658 return 1;
659 if (GET_CODE (src) == MEM)
660 return 0;
661 if (register_operand (src, VOIDmode))
662 return 1;
664 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
665 if (INTEGRAL_MODE_P (GET_MODE (dst)))
666 return src == const0_rtx;
667 else
668 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
672 addp4_optimize_ok (rtx op1, rtx op2)
674 return (basereg_operand (op1, GET_MODE(op1)) !=
675 basereg_operand (op2, GET_MODE(op2)));
678 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
679 Return the length of the field, or <= 0 on failure. */
682 ia64_depz_field_mask (rtx rop, rtx rshift)
684 unsigned HOST_WIDE_INT op = INTVAL (rop);
685 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
687 /* Get rid of the zero bits we're shifting in. */
688 op >>= shift;
690 /* We must now have a solid block of 1's at bit 0. */
691 return exact_log2 (op + 1);
694 /* Return the TLS model to use for ADDR. */
696 static enum tls_model
697 tls_symbolic_operand_type (rtx addr)
699 enum tls_model tls_kind = 0;
701 if (GET_CODE (addr) == CONST)
703 if (GET_CODE (XEXP (addr, 0)) == PLUS
704 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
705 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
707 else if (GET_CODE (addr) == SYMBOL_REF)
708 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
710 return tls_kind;
713 /* Return true if X is a constant that is valid for some immediate
714 field in an instruction. */
716 bool
717 ia64_legitimate_constant_p (rtx x)
719 switch (GET_CODE (x))
721 case CONST_INT:
722 case LABEL_REF:
723 return true;
725 case CONST_DOUBLE:
726 if (GET_MODE (x) == VOIDmode)
727 return true;
728 return CONST_DOUBLE_OK_FOR_G (x);
730 case CONST:
731 case SYMBOL_REF:
732 return tls_symbolic_operand_type (x) == 0;
734 default:
735 return false;
739 /* Don't allow TLS addresses to get spilled to memory. */
741 static bool
742 ia64_cannot_force_const_mem (rtx x)
744 return tls_symbolic_operand_type (x) != 0;
747 /* Expand a symbolic constant load. */
749 bool
750 ia64_expand_load_address (rtx dest, rtx src)
752 gcc_assert (GET_CODE (dest) == REG);
754 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
755 having to pointer-extend the value afterward. Other forms of address
756 computation below are also more natural to compute as 64-bit quantities.
757 If we've been given an SImode destination register, change it. */
758 if (GET_MODE (dest) != Pmode)
759 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest), 0);
761 if (TARGET_NO_PIC)
762 return false;
763 if (small_addr_symbolic_operand (src, VOIDmode))
764 return false;
766 if (TARGET_AUTO_PIC)
767 emit_insn (gen_load_gprel64 (dest, src));
768 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
769 emit_insn (gen_load_fptr (dest, src));
770 else if (sdata_symbolic_operand (src, VOIDmode))
771 emit_insn (gen_load_gprel (dest, src));
772 else
774 HOST_WIDE_INT addend = 0;
775 rtx tmp;
777 /* We did split constant offsets in ia64_expand_move, and we did try
778 to keep them split in move_operand, but we also allowed reload to
779 rematerialize arbitrary constants rather than spill the value to
780 the stack and reload it. So we have to be prepared here to split
781 them apart again. */
782 if (GET_CODE (src) == CONST)
784 HOST_WIDE_INT hi, lo;
786 hi = INTVAL (XEXP (XEXP (src, 0), 1));
787 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
788 hi = hi - lo;
790 if (lo != 0)
792 addend = lo;
793 src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
797 tmp = gen_rtx_HIGH (Pmode, src);
798 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
799 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
801 tmp = gen_rtx_LO_SUM (Pmode, dest, src);
802 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
804 if (addend)
806 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
807 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
811 return true;
814 static GTY(()) rtx gen_tls_tga;
815 static rtx
816 gen_tls_get_addr (void)
818 if (!gen_tls_tga)
819 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
820 return gen_tls_tga;
823 static GTY(()) rtx thread_pointer_rtx;
824 static rtx
825 gen_thread_pointer (void)
827 if (!thread_pointer_rtx)
828 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
829 return thread_pointer_rtx;
832 static rtx
833 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
834 HOST_WIDE_INT addend)
836 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
837 rtx orig_op0 = op0, orig_op1 = op1;
838 HOST_WIDE_INT addend_lo, addend_hi;
840 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
841 addend_hi = addend - addend_lo;
843 switch (tls_kind)
845 case TLS_MODEL_GLOBAL_DYNAMIC:
846 start_sequence ();
848 tga_op1 = gen_reg_rtx (Pmode);
849 emit_insn (gen_load_dtpmod (tga_op1, op1));
851 tga_op2 = gen_reg_rtx (Pmode);
852 emit_insn (gen_load_dtprel (tga_op2, op1));
854 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
855 LCT_CONST, Pmode, 2, tga_op1,
856 Pmode, tga_op2, Pmode);
858 insns = get_insns ();
859 end_sequence ();
861 if (GET_MODE (op0) != Pmode)
862 op0 = tga_ret;
863 emit_libcall_block (insns, op0, tga_ret, op1);
864 break;
866 case TLS_MODEL_LOCAL_DYNAMIC:
867 /* ??? This isn't the completely proper way to do local-dynamic
868 If the call to __tls_get_addr is used only by a single symbol,
869 then we should (somehow) move the dtprel to the second arg
870 to avoid the extra add. */
871 start_sequence ();
873 tga_op1 = gen_reg_rtx (Pmode);
874 emit_insn (gen_load_dtpmod (tga_op1, op1));
876 tga_op2 = const0_rtx;
878 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
879 LCT_CONST, Pmode, 2, tga_op1,
880 Pmode, tga_op2, Pmode);
882 insns = get_insns ();
883 end_sequence ();
885 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
886 UNSPEC_LD_BASE);
887 tmp = gen_reg_rtx (Pmode);
888 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
890 if (!register_operand (op0, Pmode))
891 op0 = gen_reg_rtx (Pmode);
892 if (TARGET_TLS64)
894 emit_insn (gen_load_dtprel (op0, op1));
895 emit_insn (gen_adddi3 (op0, tmp, op0));
897 else
898 emit_insn (gen_add_dtprel (op0, op1, tmp));
899 break;
901 case TLS_MODEL_INITIAL_EXEC:
902 op1 = plus_constant (op1, addend_hi);
903 addend = addend_lo;
905 tmp = gen_reg_rtx (Pmode);
906 emit_insn (gen_load_tprel (tmp, op1));
908 if (!register_operand (op0, Pmode))
909 op0 = gen_reg_rtx (Pmode);
910 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
911 break;
913 case TLS_MODEL_LOCAL_EXEC:
914 if (!register_operand (op0, Pmode))
915 op0 = gen_reg_rtx (Pmode);
917 op1 = orig_op1;
918 addend = 0;
919 if (TARGET_TLS64)
921 emit_insn (gen_load_tprel (op0, op1));
922 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
924 else
925 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
926 break;
928 default:
929 gcc_unreachable ();
932 if (addend)
933 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
934 orig_op0, 1, OPTAB_DIRECT);
935 if (orig_op0 == op0)
936 return NULL_RTX;
937 if (GET_MODE (orig_op0) == Pmode)
938 return op0;
939 return gen_lowpart (GET_MODE (orig_op0), op0);
943 ia64_expand_move (rtx op0, rtx op1)
945 enum machine_mode mode = GET_MODE (op0);
947 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
948 op1 = force_reg (mode, op1);
950 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
952 HOST_WIDE_INT addend = 0;
953 enum tls_model tls_kind;
954 rtx sym = op1;
956 if (GET_CODE (op1) == CONST
957 && GET_CODE (XEXP (op1, 0)) == PLUS
958 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
960 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
961 sym = XEXP (XEXP (op1, 0), 0);
964 tls_kind = tls_symbolic_operand_type (sym);
965 if (tls_kind)
966 return ia64_expand_tls_address (tls_kind, op0, sym, addend);
968 if (any_offset_symbol_operand (sym, mode))
969 addend = 0;
970 else if (aligned_offset_symbol_operand (sym, mode))
972 HOST_WIDE_INT addend_lo, addend_hi;
974 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
975 addend_hi = addend - addend_lo;
977 if (addend_lo != 0)
979 op1 = plus_constant (sym, addend_hi);
980 addend = addend_lo;
982 else
983 addend = 0;
985 else
986 op1 = sym;
988 if (reload_completed)
990 /* We really should have taken care of this offset earlier. */
991 gcc_assert (addend == 0);
992 if (ia64_expand_load_address (op0, op1))
993 return NULL_RTX;
996 if (addend)
998 rtx subtarget = no_new_pseudos ? op0 : gen_reg_rtx (mode);
1000 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1002 op1 = expand_simple_binop (mode, PLUS, subtarget,
1003 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1004 if (op0 == op1)
1005 return NULL_RTX;
1009 return op1;
1012 /* Split a move from OP1 to OP0 conditional on COND. */
1014 void
1015 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1017 rtx insn, first = get_last_insn ();
1019 emit_move_insn (op0, op1);
1021 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1022 if (INSN_P (insn))
1023 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1024 PATTERN (insn));
1027 /* Split a post-reload TImode or TFmode reference into two DImode
1028 components. This is made extra difficult by the fact that we do
1029 not get any scratch registers to work with, because reload cannot
1030 be prevented from giving us a scratch that overlaps the register
1031 pair involved. So instead, when addressing memory, we tweak the
1032 pointer register up and back down with POST_INCs. Or up and not
1033 back down when we can get away with it.
1035 REVERSED is true when the loads must be done in reversed order
1036 (high word first) for correctness. DEAD is true when the pointer
1037 dies with the second insn we generate and therefore the second
1038 address must not carry a postmodify.
1040 May return an insn which is to be emitted after the moves. */
1042 static rtx
1043 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1045 rtx fixup = 0;
1047 switch (GET_CODE (in))
1049 case REG:
1050 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1051 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1052 break;
1054 case CONST_INT:
1055 case CONST_DOUBLE:
1056 /* Cannot occur reversed. */
1057 gcc_assert (!reversed);
1059 if (GET_MODE (in) != TFmode)
1060 split_double (in, &out[0], &out[1]);
1061 else
1062 /* split_double does not understand how to split a TFmode
1063 quantity into a pair of DImode constants. */
1065 REAL_VALUE_TYPE r;
1066 unsigned HOST_WIDE_INT p[2];
1067 long l[4]; /* TFmode is 128 bits */
1069 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1070 real_to_target (l, &r, TFmode);
1072 if (FLOAT_WORDS_BIG_ENDIAN)
1074 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1075 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1077 else
1079 p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1080 p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1082 out[0] = GEN_INT (p[0]);
1083 out[1] = GEN_INT (p[1]);
1085 break;
1087 case MEM:
1089 rtx base = XEXP (in, 0);
1090 rtx offset;
1092 switch (GET_CODE (base))
1094 case REG:
1095 if (!reversed)
1097 out[0] = adjust_automodify_address
1098 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1099 out[1] = adjust_automodify_address
1100 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1102 else
1104 /* Reversal requires a pre-increment, which can only
1105 be done as a separate insn. */
1106 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1107 out[0] = adjust_automodify_address
1108 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1109 out[1] = adjust_address (in, DImode, 0);
1111 break;
1113 case POST_INC:
1114 gcc_assert (!reversed && !dead);
1116 /* Just do the increment in two steps. */
1117 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1118 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1119 break;
1121 case POST_DEC:
1122 gcc_assert (!reversed && !dead);
1124 /* Add 8, subtract 24. */
1125 base = XEXP (base, 0);
1126 out[0] = adjust_automodify_address
1127 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1128 out[1] = adjust_automodify_address
1129 (in, DImode,
1130 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1132 break;
1134 case POST_MODIFY:
1135 gcc_assert (!reversed && !dead);
1137 /* Extract and adjust the modification. This case is
1138 trickier than the others, because we might have an
1139 index register, or we might have a combined offset that
1140 doesn't fit a signed 9-bit displacement field. We can
1141 assume the incoming expression is already legitimate. */
1142 offset = XEXP (base, 1);
1143 base = XEXP (base, 0);
1145 out[0] = adjust_automodify_address
1146 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1148 if (GET_CODE (XEXP (offset, 1)) == REG)
1150 /* Can't adjust the postmodify to match. Emit the
1151 original, then a separate addition insn. */
1152 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1153 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1155 else
1157 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1158 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1160 /* Again the postmodify cannot be made to match,
1161 but in this case it's more efficient to get rid
1162 of the postmodify entirely and fix up with an
1163 add insn. */
1164 out[1] = adjust_automodify_address (in, DImode, base, 8);
1165 fixup = gen_adddi3
1166 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1168 else
1170 /* Combined offset still fits in the displacement field.
1171 (We cannot overflow it at the high end.) */
1172 out[1] = adjust_automodify_address
1173 (in, DImode, gen_rtx_POST_MODIFY
1174 (Pmode, base, gen_rtx_PLUS
1175 (Pmode, base,
1176 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1180 break;
1182 default:
1183 gcc_unreachable ();
1185 break;
1188 default:
1189 gcc_unreachable ();
1192 return fixup;
1195 /* Split a TImode or TFmode move instruction after reload.
1196 This is used by *movtf_internal and *movti_internal. */
1197 void
1198 ia64_split_tmode_move (rtx operands[])
1200 rtx in[2], out[2], insn;
1201 rtx fixup[2];
1202 bool dead = false;
1203 bool reversed = false;
1205 /* It is possible for reload to decide to overwrite a pointer with
1206 the value it points to. In that case we have to do the loads in
1207 the appropriate order so that the pointer is not destroyed too
1208 early. Also we must not generate a postmodify for that second
1209 load, or rws_access_regno will die. */
1210 if (GET_CODE (operands[1]) == MEM
1211 && reg_overlap_mentioned_p (operands[0], operands[1]))
1213 rtx base = XEXP (operands[1], 0);
1214 while (GET_CODE (base) != REG)
1215 base = XEXP (base, 0);
1217 if (REGNO (base) == REGNO (operands[0]))
1218 reversed = true;
1219 dead = true;
1221 /* Another reason to do the moves in reversed order is if the first
1222 element of the target register pair is also the second element of
1223 the source register pair. */
1224 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1225 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1226 reversed = true;
1228 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1229 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1231 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1232 if (GET_CODE (EXP) == MEM \
1233 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1234 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1235 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1236 REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
1237 XEXP (XEXP (EXP, 0), 0), \
1238 REG_NOTES (INSN))
1240 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1241 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1242 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1244 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1245 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1246 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1248 if (fixup[0])
1249 emit_insn (fixup[0]);
1250 if (fixup[1])
1251 emit_insn (fixup[1]);
1253 #undef MAYBE_ADD_REG_INC_NOTE
1256 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1257 through memory plus an extra GR scratch register. Except that you can
1258 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1259 SECONDARY_RELOAD_CLASS, but not both.
1261 We got into problems in the first place by allowing a construct like
1262 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1263 This solution attempts to prevent this situation from occurring. When
1264 we see something like the above, we spill the inner register to memory. */
1267 spill_xfmode_operand (rtx in, int force)
1269 if (GET_CODE (in) == SUBREG
1270 && GET_MODE (SUBREG_REG (in)) == TImode
1271 && GET_CODE (SUBREG_REG (in)) == REG)
1273 rtx memt = assign_stack_temp (TImode, 16, 0);
1274 emit_move_insn (memt, SUBREG_REG (in));
1275 return adjust_address (memt, XFmode, 0);
1277 else if (force && GET_CODE (in) == REG)
1279 rtx memx = assign_stack_temp (XFmode, 16, 0);
1280 emit_move_insn (memx, in);
1281 return memx;
1283 else
1284 return in;
1287 /* Emit comparison instruction if necessary, returning the expression
1288 that holds the compare result in the proper mode. */
1290 static GTY(()) rtx cmptf_libfunc;
1293 ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1295 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1296 rtx cmp;
1298 /* If we have a BImode input, then we already have a compare result, and
1299 do not need to emit another comparison. */
1300 if (GET_MODE (op0) == BImode)
1302 gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx);
1303 cmp = op0;
1305 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1306 magic number as its third argument, that indicates what to do.
1307 The return value is an integer to be compared against zero. */
1308 else if (GET_MODE (op0) == TFmode)
1310 enum qfcmp_magic {
1311 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1312 QCMP_UNORD = 2,
1313 QCMP_EQ = 4,
1314 QCMP_LT = 8,
1315 QCMP_GT = 16
1316 } magic;
1317 enum rtx_code ncode;
1318 rtx ret, insns;
1320 gcc_assert (cmptf_libfunc && GET_MODE (op1) == TFmode);
1321 switch (code)
1323 /* 1 = equal, 0 = not equal. Equality operators do
1324 not raise FP_INVALID when given an SNaN operand. */
1325 case EQ: magic = QCMP_EQ; ncode = NE; break;
1326 case NE: magic = QCMP_EQ; ncode = EQ; break;
1327 /* isunordered() from C99. */
1328 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1329 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1330 /* Relational operators raise FP_INVALID when given
1331 an SNaN operand. */
1332 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1333 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1334 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1335 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1336 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1337 Expanders for buneq etc. weuld have to be added to ia64.md
1338 for this to be useful. */
1339 default: gcc_unreachable ();
1342 start_sequence ();
1344 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1345 op0, TFmode, op1, TFmode,
1346 GEN_INT (magic), DImode);
1347 cmp = gen_reg_rtx (BImode);
1348 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1349 gen_rtx_fmt_ee (ncode, BImode,
1350 ret, const0_rtx)));
1352 insns = get_insns ();
1353 end_sequence ();
1355 emit_libcall_block (insns, cmp, cmp,
1356 gen_rtx_fmt_ee (code, BImode, op0, op1));
1357 code = NE;
1359 else
1361 cmp = gen_reg_rtx (BImode);
1362 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1363 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1364 code = NE;
1367 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1370 /* Generate an integral vector comparison. */
1372 static bool
1373 ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1374 rtx dest, rtx op0, rtx op1)
1376 bool negate = false;
1377 rtx x;
1379 switch (code)
1381 case EQ:
1382 case GT:
1383 break;
1385 case NE:
1386 code = EQ;
1387 negate = true;
1388 break;
1390 case LE:
1391 code = GT;
1392 negate = true;
1393 break;
1395 case GE:
1396 negate = true;
1397 /* FALLTHRU */
1399 case LT:
1400 x = op0;
1401 op0 = op1;
1402 op1 = x;
1403 code = GT;
1404 break;
1406 case GTU:
1407 case GEU:
1408 case LTU:
1409 case LEU:
1411 rtx w0h, w0l, w1h, w1l, ch, cl;
1412 enum machine_mode wmode;
1413 rtx (*unpack_l) (rtx, rtx, rtx);
1414 rtx (*unpack_h) (rtx, rtx, rtx);
1415 rtx (*pack) (rtx, rtx, rtx);
1417 /* We don't have native unsigned comparisons, but we can generate
1418 them better than generic code can. */
1420 gcc_assert (mode != V2SImode);
1421 switch (mode)
1423 case V8QImode:
1424 wmode = V4HImode;
1425 pack = gen_pack2_sss;
1426 unpack_l = gen_unpack1_l;
1427 unpack_h = gen_unpack1_h;
1428 break;
1430 case V4HImode:
1431 wmode = V2SImode;
1432 pack = gen_pack4_sss;
1433 unpack_l = gen_unpack2_l;
1434 unpack_h = gen_unpack2_h;
1435 break;
1437 default:
1438 gcc_unreachable ();
1441 /* Unpack into wider vectors, zero extending the elements. */
1443 w0l = gen_reg_rtx (wmode);
1444 w0h = gen_reg_rtx (wmode);
1445 w1l = gen_reg_rtx (wmode);
1446 w1h = gen_reg_rtx (wmode);
1447 emit_insn (unpack_l (gen_lowpart (mode, w0l), op0, CONST0_RTX (mode)));
1448 emit_insn (unpack_h (gen_lowpart (mode, w0h), op0, CONST0_RTX (mode)));
1449 emit_insn (unpack_l (gen_lowpart (mode, w1l), op1, CONST0_RTX (mode)));
1450 emit_insn (unpack_h (gen_lowpart (mode, w1h), op1, CONST0_RTX (mode)));
1452 /* Compare in the wider mode. */
1454 cl = gen_reg_rtx (wmode);
1455 ch = gen_reg_rtx (wmode);
1456 code = signed_condition (code);
1457 ia64_expand_vecint_compare (code, wmode, cl, w0l, w1l);
1458 negate = ia64_expand_vecint_compare (code, wmode, ch, w0h, w1h);
1460 /* Repack into a single narrower vector. */
1462 emit_insn (pack (dest, cl, ch));
1464 return negate;
1466 default:
1467 gcc_unreachable ();
1470 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1471 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1473 return negate;
1476 static void
1477 ia64_expand_vcondu_v2si (enum rtx_code code, rtx operands[])
1479 rtx dl, dh, bl, bh, op1l, op1h, op2l, op2h, op4l, op4h, op5l, op5h, x;
1481 /* In this case, we extract the two SImode quantities and generate
1482 normal comparisons for each of them. */
1484 op1l = gen_lowpart (SImode, operands[1]);
1485 op2l = gen_lowpart (SImode, operands[2]);
1486 op4l = gen_lowpart (SImode, operands[4]);
1487 op5l = gen_lowpart (SImode, operands[5]);
1489 op1h = gen_reg_rtx (SImode);
1490 op2h = gen_reg_rtx (SImode);
1491 op4h = gen_reg_rtx (SImode);
1492 op5h = gen_reg_rtx (SImode);
1494 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op1h),
1495 gen_lowpart (DImode, operands[1]), GEN_INT (32)));
1496 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op2h),
1497 gen_lowpart (DImode, operands[2]), GEN_INT (32)));
1498 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op4h),
1499 gen_lowpart (DImode, operands[4]), GEN_INT (32)));
1500 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op5h),
1501 gen_lowpart (DImode, operands[5]), GEN_INT (32)));
1503 bl = gen_reg_rtx (BImode);
1504 x = gen_rtx_fmt_ee (code, BImode, op4l, op5l);
1505 emit_insn (gen_rtx_SET (VOIDmode, bl, x));
1507 bh = gen_reg_rtx (BImode);
1508 x = gen_rtx_fmt_ee (code, BImode, op4h, op5h);
1509 emit_insn (gen_rtx_SET (VOIDmode, bh, x));
1511 /* With the results of the comparisons, emit conditional moves. */
1513 dl = gen_reg_rtx (SImode);
1514 x = gen_rtx_IF_THEN_ELSE (SImode, bl, op1l, op2l);
1515 emit_insn (gen_rtx_SET (VOIDmode, dl, x));
1517 dh = gen_reg_rtx (SImode);
1518 x = gen_rtx_IF_THEN_ELSE (SImode, bh, op1h, op2h);
1519 emit_insn (gen_rtx_SET (VOIDmode, dh, x));
1521 /* Merge the two partial results back into a vector. */
1523 x = gen_rtx_VEC_CONCAT (V2SImode, dl, dh);
1524 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1527 /* Emit an integral vector conditional move. */
1529 void
1530 ia64_expand_vecint_cmov (rtx operands[])
1532 enum machine_mode mode = GET_MODE (operands[0]);
1533 enum rtx_code code = GET_CODE (operands[3]);
1534 bool negate;
1535 rtx cmp, x, ot, of;
1537 /* Since we don't have unsigned V2SImode comparisons, it's more efficient
1538 to special-case them entirely. */
1539 if (mode == V2SImode
1540 && (code == GTU || code == GEU || code == LEU || code == LTU))
1542 ia64_expand_vcondu_v2si (code, operands);
1543 return;
1546 cmp = gen_reg_rtx (mode);
1547 negate = ia64_expand_vecint_compare (code, mode, cmp,
1548 operands[4], operands[5]);
1550 ot = operands[1+negate];
1551 of = operands[2-negate];
1553 if (ot == CONST0_RTX (mode))
1555 if (of == CONST0_RTX (mode))
1557 emit_move_insn (operands[0], ot);
1558 return;
1561 x = gen_rtx_NOT (mode, cmp);
1562 x = gen_rtx_AND (mode, x, of);
1563 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1565 else if (of == CONST0_RTX (mode))
1567 x = gen_rtx_AND (mode, cmp, ot);
1568 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1570 else
1572 rtx t, f;
1574 t = gen_reg_rtx (mode);
1575 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1576 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1578 f = gen_reg_rtx (mode);
1579 x = gen_rtx_NOT (mode, cmp);
1580 x = gen_rtx_AND (mode, x, operands[2-negate]);
1581 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1583 x = gen_rtx_IOR (mode, t, f);
1584 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1588 /* Emit an integral vector min or max operation. Return true if all done. */
1590 bool
1591 ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1592 rtx operands[])
1594 rtx xops[5];
1596 /* These four combinations are supported directly. */
1597 if (mode == V8QImode && (code == UMIN || code == UMAX))
1598 return false;
1599 if (mode == V4HImode && (code == SMIN || code == SMAX))
1600 return false;
1602 /* Everything else implemented via vector comparisons. */
1603 xops[0] = operands[0];
1604 xops[4] = xops[1] = operands[1];
1605 xops[5] = xops[2] = operands[2];
1607 switch (code)
1609 case UMIN:
1610 code = LTU;
1611 break;
1612 case UMAX:
1613 code = GTU;
1614 break;
1615 case SMIN:
1616 code = LT;
1617 break;
1618 case SMAX:
1619 code = GT;
1620 break;
1621 default:
1622 gcc_unreachable ();
1624 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1626 ia64_expand_vecint_cmov (xops);
1627 return true;
1630 /* Emit the appropriate sequence for a call. */
1632 void
1633 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1634 int sibcall_p)
1636 rtx insn, b0;
1638 addr = XEXP (addr, 0);
1639 addr = convert_memory_address (DImode, addr);
1640 b0 = gen_rtx_REG (DImode, R_BR (0));
1642 /* ??? Should do this for functions known to bind local too. */
1643 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1645 if (sibcall_p)
1646 insn = gen_sibcall_nogp (addr);
1647 else if (! retval)
1648 insn = gen_call_nogp (addr, b0);
1649 else
1650 insn = gen_call_value_nogp (retval, addr, b0);
1651 insn = emit_call_insn (insn);
1653 else
1655 if (sibcall_p)
1656 insn = gen_sibcall_gp (addr);
1657 else if (! retval)
1658 insn = gen_call_gp (addr, b0);
1659 else
1660 insn = gen_call_value_gp (retval, addr, b0);
1661 insn = emit_call_insn (insn);
1663 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1666 if (sibcall_p)
1667 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1670 void
1671 ia64_reload_gp (void)
1673 rtx tmp;
1675 if (current_frame_info.reg_save_gp)
1676 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1677 else
1679 HOST_WIDE_INT offset;
1681 offset = (current_frame_info.spill_cfa_off
1682 + current_frame_info.spill_size);
1683 if (frame_pointer_needed)
1685 tmp = hard_frame_pointer_rtx;
1686 offset = -offset;
1688 else
1690 tmp = stack_pointer_rtx;
1691 offset = current_frame_info.total_size - offset;
1694 if (CONST_OK_FOR_I (offset))
1695 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1696 tmp, GEN_INT (offset)));
1697 else
1699 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1700 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1701 pic_offset_table_rtx, tmp));
1704 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1707 emit_move_insn (pic_offset_table_rtx, tmp);
1710 void
1711 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1712 rtx scratch_b, int noreturn_p, int sibcall_p)
1714 rtx insn;
1715 bool is_desc = false;
1717 /* If we find we're calling through a register, then we're actually
1718 calling through a descriptor, so load up the values. */
1719 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1721 rtx tmp;
1722 bool addr_dead_p;
1724 /* ??? We are currently constrained to *not* use peep2, because
1725 we can legitimately change the global lifetime of the GP
1726 (in the form of killing where previously live). This is
1727 because a call through a descriptor doesn't use the previous
1728 value of the GP, while a direct call does, and we do not
1729 commit to either form until the split here.
1731 That said, this means that we lack precise life info for
1732 whether ADDR is dead after this call. This is not terribly
1733 important, since we can fix things up essentially for free
1734 with the POST_DEC below, but it's nice to not use it when we
1735 can immediately tell it's not necessary. */
1736 addr_dead_p = ((noreturn_p || sibcall_p
1737 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1738 REGNO (addr)))
1739 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1741 /* Load the code address into scratch_b. */
1742 tmp = gen_rtx_POST_INC (Pmode, addr);
1743 tmp = gen_rtx_MEM (Pmode, tmp);
1744 emit_move_insn (scratch_r, tmp);
1745 emit_move_insn (scratch_b, scratch_r);
1747 /* Load the GP address. If ADDR is not dead here, then we must
1748 revert the change made above via the POST_INCREMENT. */
1749 if (!addr_dead_p)
1750 tmp = gen_rtx_POST_DEC (Pmode, addr);
1751 else
1752 tmp = addr;
1753 tmp = gen_rtx_MEM (Pmode, tmp);
1754 emit_move_insn (pic_offset_table_rtx, tmp);
1756 is_desc = true;
1757 addr = scratch_b;
1760 if (sibcall_p)
1761 insn = gen_sibcall_nogp (addr);
1762 else if (retval)
1763 insn = gen_call_value_nogp (retval, addr, retaddr);
1764 else
1765 insn = gen_call_nogp (addr, retaddr);
1766 emit_call_insn (insn);
1768 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1769 ia64_reload_gp ();
1772 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
1774 This differs from the generic code in that we know about the zero-extending
1775 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
1776 also know that ld.acq+cmpxchg.rel equals a full barrier.
1778 The loop we want to generate looks like
1780 cmp_reg = mem;
1781 label:
1782 old_reg = cmp_reg;
1783 new_reg = cmp_reg op val;
1784 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
1785 if (cmp_reg != old_reg)
1786 goto label;
1788 Note that we only do the plain load from memory once. Subsequent
1789 iterations use the value loaded by the compare-and-swap pattern. */
1791 void
1792 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
1793 rtx old_dst, rtx new_dst)
1795 enum machine_mode mode = GET_MODE (mem);
1796 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
1797 enum insn_code icode;
1799 /* Special case for using fetchadd. */
1800 if ((mode == SImode || mode == DImode) && fetchadd_operand (val, mode))
1802 if (!old_dst)
1803 old_dst = gen_reg_rtx (mode);
1805 emit_insn (gen_memory_barrier ());
1807 if (mode == SImode)
1808 icode = CODE_FOR_fetchadd_acq_si;
1809 else
1810 icode = CODE_FOR_fetchadd_acq_di;
1811 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
1813 if (new_dst)
1815 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
1816 true, OPTAB_WIDEN);
1817 if (new_reg != new_dst)
1818 emit_move_insn (new_dst, new_reg);
1820 return;
1823 /* Because of the volatile mem read, we get an ld.acq, which is the
1824 front half of the full barrier. The end half is the cmpxchg.rel. */
1825 gcc_assert (MEM_VOLATILE_P (mem));
1827 old_reg = gen_reg_rtx (DImode);
1828 cmp_reg = gen_reg_rtx (DImode);
1829 label = gen_label_rtx ();
1831 if (mode != DImode)
1833 val = simplify_gen_subreg (DImode, val, mode, 0);
1834 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
1836 else
1837 emit_move_insn (cmp_reg, mem);
1839 emit_label (label);
1841 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
1842 emit_move_insn (old_reg, cmp_reg);
1843 emit_move_insn (ar_ccv, cmp_reg);
1845 if (old_dst)
1846 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
1848 new_reg = cmp_reg;
1849 if (code == NOT)
1851 new_reg = expand_simple_unop (DImode, NOT, new_reg, NULL_RTX, true);
1852 code = AND;
1854 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
1855 true, OPTAB_DIRECT);
1857 if (mode != DImode)
1858 new_reg = gen_lowpart (mode, new_reg);
1859 if (new_dst)
1860 emit_move_insn (new_dst, new_reg);
1862 switch (mode)
1864 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
1865 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
1866 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
1867 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
1868 default:
1869 gcc_unreachable ();
1872 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
1874 emit_cmp_and_jump_insns (cmp_reg, old_reg, EQ, NULL, DImode, true, label);
1877 /* Begin the assembly file. */
1879 static void
1880 ia64_file_start (void)
1882 /* Variable tracking should be run after all optimizations which change order
1883 of insns. It also needs a valid CFG. This can't be done in
1884 ia64_override_options, because flag_var_tracking is finalized after
1885 that. */
1886 ia64_flag_var_tracking = flag_var_tracking;
1887 flag_var_tracking = 0;
1889 default_file_start ();
1890 emit_safe_across_calls ();
1893 void
1894 emit_safe_across_calls (void)
1896 unsigned int rs, re;
1897 int out_state;
1899 rs = 1;
1900 out_state = 0;
1901 while (1)
1903 while (rs < 64 && call_used_regs[PR_REG (rs)])
1904 rs++;
1905 if (rs >= 64)
1906 break;
1907 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1908 continue;
1909 if (out_state == 0)
1911 fputs ("\t.pred.safe_across_calls ", asm_out_file);
1912 out_state = 1;
1914 else
1915 fputc (',', asm_out_file);
1916 if (re == rs + 1)
1917 fprintf (asm_out_file, "p%u", rs);
1918 else
1919 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
1920 rs = re + 1;
1922 if (out_state)
1923 fputc ('\n', asm_out_file);
1926 /* Helper function for ia64_compute_frame_size: find an appropriate general
1927 register to spill some special register to. SPECIAL_SPILL_MASK contains
1928 bits in GR0 to GR31 that have already been allocated by this routine.
1929 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1931 static int
1932 find_gr_spill (int try_locals)
1934 int regno;
1936 /* If this is a leaf function, first try an otherwise unused
1937 call-clobbered register. */
1938 if (current_function_is_leaf)
1940 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1941 if (! regs_ever_live[regno]
1942 && call_used_regs[regno]
1943 && ! fixed_regs[regno]
1944 && ! global_regs[regno]
1945 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1947 current_frame_info.gr_used_mask |= 1 << regno;
1948 return regno;
1952 if (try_locals)
1954 regno = current_frame_info.n_local_regs;
1955 /* If there is a frame pointer, then we can't use loc79, because
1956 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1957 reg_name switching code in ia64_expand_prologue. */
1958 if (regno < (80 - frame_pointer_needed))
1960 current_frame_info.n_local_regs = regno + 1;
1961 return LOC_REG (0) + regno;
1965 /* Failed to find a general register to spill to. Must use stack. */
1966 return 0;
1969 /* In order to make for nice schedules, we try to allocate every temporary
1970 to a different register. We must of course stay away from call-saved,
1971 fixed, and global registers. We must also stay away from registers
1972 allocated in current_frame_info.gr_used_mask, since those include regs
1973 used all through the prologue.
1975 Any register allocated here must be used immediately. The idea is to
1976 aid scheduling, not to solve data flow problems. */
1978 static int last_scratch_gr_reg;
1980 static int
1981 next_scratch_gr_reg (void)
1983 int i, regno;
1985 for (i = 0; i < 32; ++i)
1987 regno = (last_scratch_gr_reg + i + 1) & 31;
1988 if (call_used_regs[regno]
1989 && ! fixed_regs[regno]
1990 && ! global_regs[regno]
1991 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1993 last_scratch_gr_reg = regno;
1994 return regno;
1998 /* There must be _something_ available. */
1999 gcc_unreachable ();
2002 /* Helper function for ia64_compute_frame_size, called through
2003 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2005 static void
2006 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2008 unsigned int regno = REGNO (reg);
2009 if (regno < 32)
2011 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2012 for (i = 0; i < n; ++i)
2013 current_frame_info.gr_used_mask |= 1 << (regno + i);
2017 /* Returns the number of bytes offset between the frame pointer and the stack
2018 pointer for the current function. SIZE is the number of bytes of space
2019 needed for local variables. */
2021 static void
2022 ia64_compute_frame_size (HOST_WIDE_INT size)
2024 HOST_WIDE_INT total_size;
2025 HOST_WIDE_INT spill_size = 0;
2026 HOST_WIDE_INT extra_spill_size = 0;
2027 HOST_WIDE_INT pretend_args_size;
2028 HARD_REG_SET mask;
2029 int n_spilled = 0;
2030 int spilled_gr_p = 0;
2031 int spilled_fr_p = 0;
2032 unsigned int regno;
2033 int i;
2035 if (current_frame_info.initialized)
2036 return;
2038 memset (&current_frame_info, 0, sizeof current_frame_info);
2039 CLEAR_HARD_REG_SET (mask);
2041 /* Don't allocate scratches to the return register. */
2042 diddle_return_value (mark_reg_gr_used_mask, NULL);
2044 /* Don't allocate scratches to the EH scratch registers. */
2045 if (cfun->machine->ia64_eh_epilogue_sp)
2046 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2047 if (cfun->machine->ia64_eh_epilogue_bsp)
2048 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2050 /* Find the size of the register stack frame. We have only 80 local
2051 registers, because we reserve 8 for the inputs and 8 for the
2052 outputs. */
2054 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2055 since we'll be adjusting that down later. */
2056 regno = LOC_REG (78) + ! frame_pointer_needed;
2057 for (; regno >= LOC_REG (0); regno--)
2058 if (regs_ever_live[regno])
2059 break;
2060 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2062 /* For functions marked with the syscall_linkage attribute, we must mark
2063 all eight input registers as in use, so that locals aren't visible to
2064 the caller. */
2066 if (cfun->machine->n_varargs > 0
2067 || lookup_attribute ("syscall_linkage",
2068 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2069 current_frame_info.n_input_regs = 8;
2070 else
2072 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2073 if (regs_ever_live[regno])
2074 break;
2075 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2078 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2079 if (regs_ever_live[regno])
2080 break;
2081 i = regno - OUT_REG (0) + 1;
2083 /* When -p profiling, we need one output register for the mcount argument.
2084 Likewise for -a profiling for the bb_init_func argument. For -ax
2085 profiling, we need two output registers for the two bb_init_trace_func
2086 arguments. */
2087 if (current_function_profile)
2088 i = MAX (i, 1);
2089 current_frame_info.n_output_regs = i;
2091 /* ??? No rotating register support yet. */
2092 current_frame_info.n_rotate_regs = 0;
2094 /* Discover which registers need spilling, and how much room that
2095 will take. Begin with floating point and general registers,
2096 which will always wind up on the stack. */
2098 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2099 if (regs_ever_live[regno] && ! call_used_regs[regno])
2101 SET_HARD_REG_BIT (mask, regno);
2102 spill_size += 16;
2103 n_spilled += 1;
2104 spilled_fr_p = 1;
2107 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2108 if (regs_ever_live[regno] && ! call_used_regs[regno])
2110 SET_HARD_REG_BIT (mask, regno);
2111 spill_size += 8;
2112 n_spilled += 1;
2113 spilled_gr_p = 1;
2116 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2117 if (regs_ever_live[regno] && ! call_used_regs[regno])
2119 SET_HARD_REG_BIT (mask, regno);
2120 spill_size += 8;
2121 n_spilled += 1;
2124 /* Now come all special registers that might get saved in other
2125 general registers. */
2127 if (frame_pointer_needed)
2129 current_frame_info.reg_fp = find_gr_spill (1);
2130 /* If we did not get a register, then we take LOC79. This is guaranteed
2131 to be free, even if regs_ever_live is already set, because this is
2132 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2133 as we don't count loc79 above. */
2134 if (current_frame_info.reg_fp == 0)
2136 current_frame_info.reg_fp = LOC_REG (79);
2137 current_frame_info.n_local_regs++;
2141 if (! current_function_is_leaf)
2143 /* Emit a save of BR0 if we call other functions. Do this even
2144 if this function doesn't return, as EH depends on this to be
2145 able to unwind the stack. */
2146 SET_HARD_REG_BIT (mask, BR_REG (0));
2148 current_frame_info.reg_save_b0 = find_gr_spill (1);
2149 if (current_frame_info.reg_save_b0 == 0)
2151 spill_size += 8;
2152 n_spilled += 1;
2155 /* Similarly for ar.pfs. */
2156 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2157 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2158 if (current_frame_info.reg_save_ar_pfs == 0)
2160 extra_spill_size += 8;
2161 n_spilled += 1;
2164 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2165 registers are clobbered, so we fall back to the stack. */
2166 current_frame_info.reg_save_gp
2167 = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
2168 if (current_frame_info.reg_save_gp == 0)
2170 SET_HARD_REG_BIT (mask, GR_REG (1));
2171 spill_size += 8;
2172 n_spilled += 1;
2175 else
2177 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
2179 SET_HARD_REG_BIT (mask, BR_REG (0));
2180 spill_size += 8;
2181 n_spilled += 1;
2184 if (regs_ever_live[AR_PFS_REGNUM])
2186 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2187 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2188 if (current_frame_info.reg_save_ar_pfs == 0)
2190 extra_spill_size += 8;
2191 n_spilled += 1;
2196 /* Unwind descriptor hackery: things are most efficient if we allocate
2197 consecutive GR save registers for RP, PFS, FP in that order. However,
2198 it is absolutely critical that FP get the only hard register that's
2199 guaranteed to be free, so we allocated it first. If all three did
2200 happen to be allocated hard regs, and are consecutive, rearrange them
2201 into the preferred order now. */
2202 if (current_frame_info.reg_fp != 0
2203 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
2204 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
2206 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
2207 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
2208 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
2211 /* See if we need to store the predicate register block. */
2212 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2213 if (regs_ever_live[regno] && ! call_used_regs[regno])
2214 break;
2215 if (regno <= PR_REG (63))
2217 SET_HARD_REG_BIT (mask, PR_REG (0));
2218 current_frame_info.reg_save_pr = find_gr_spill (1);
2219 if (current_frame_info.reg_save_pr == 0)
2221 extra_spill_size += 8;
2222 n_spilled += 1;
2225 /* ??? Mark them all as used so that register renaming and such
2226 are free to use them. */
2227 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2228 regs_ever_live[regno] = 1;
2231 /* If we're forced to use st8.spill, we're forced to save and restore
2232 ar.unat as well. The check for existing liveness allows inline asm
2233 to touch ar.unat. */
2234 if (spilled_gr_p || cfun->machine->n_varargs
2235 || regs_ever_live[AR_UNAT_REGNUM])
2237 regs_ever_live[AR_UNAT_REGNUM] = 1;
2238 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2239 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
2240 if (current_frame_info.reg_save_ar_unat == 0)
2242 extra_spill_size += 8;
2243 n_spilled += 1;
2247 if (regs_ever_live[AR_LC_REGNUM])
2249 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2250 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
2251 if (current_frame_info.reg_save_ar_lc == 0)
2253 extra_spill_size += 8;
2254 n_spilled += 1;
2258 /* If we have an odd number of words of pretend arguments written to
2259 the stack, then the FR save area will be unaligned. We round the
2260 size of this area up to keep things 16 byte aligned. */
2261 if (spilled_fr_p)
2262 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2263 else
2264 pretend_args_size = current_function_pretend_args_size;
2266 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2267 + current_function_outgoing_args_size);
2268 total_size = IA64_STACK_ALIGN (total_size);
2270 /* We always use the 16-byte scratch area provided by the caller, but
2271 if we are a leaf function, there's no one to which we need to provide
2272 a scratch area. */
2273 if (current_function_is_leaf)
2274 total_size = MAX (0, total_size - 16);
2276 current_frame_info.total_size = total_size;
2277 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2278 current_frame_info.spill_size = spill_size;
2279 current_frame_info.extra_spill_size = extra_spill_size;
2280 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2281 current_frame_info.n_spilled = n_spilled;
2282 current_frame_info.initialized = reload_completed;
2285 /* Compute the initial difference between the specified pair of registers. */
2287 HOST_WIDE_INT
2288 ia64_initial_elimination_offset (int from, int to)
2290 HOST_WIDE_INT offset;
2292 ia64_compute_frame_size (get_frame_size ());
2293 switch (from)
2295 case FRAME_POINTER_REGNUM:
2296 switch (to)
2298 case HARD_FRAME_POINTER_REGNUM:
2299 if (current_function_is_leaf)
2300 offset = -current_frame_info.total_size;
2301 else
2302 offset = -(current_frame_info.total_size
2303 - current_function_outgoing_args_size - 16);
2304 break;
2306 case STACK_POINTER_REGNUM:
2307 if (current_function_is_leaf)
2308 offset = 0;
2309 else
2310 offset = 16 + current_function_outgoing_args_size;
2311 break;
2313 default:
2314 gcc_unreachable ();
2316 break;
2318 case ARG_POINTER_REGNUM:
2319 /* Arguments start above the 16 byte save area, unless stdarg
2320 in which case we store through the 16 byte save area. */
2321 switch (to)
2323 case HARD_FRAME_POINTER_REGNUM:
2324 offset = 16 - current_function_pretend_args_size;
2325 break;
2327 case STACK_POINTER_REGNUM:
2328 offset = (current_frame_info.total_size
2329 + 16 - current_function_pretend_args_size);
2330 break;
2332 default:
2333 gcc_unreachable ();
2335 break;
2337 default:
2338 gcc_unreachable ();
2341 return offset;
2344 /* If there are more than a trivial number of register spills, we use
2345 two interleaved iterators so that we can get two memory references
2346 per insn group.
2348 In order to simplify things in the prologue and epilogue expanders,
2349 we use helper functions to fix up the memory references after the
2350 fact with the appropriate offsets to a POST_MODIFY memory mode.
2351 The following data structure tracks the state of the two iterators
2352 while insns are being emitted. */
2354 struct spill_fill_data
2356 rtx init_after; /* point at which to emit initializations */
2357 rtx init_reg[2]; /* initial base register */
2358 rtx iter_reg[2]; /* the iterator registers */
2359 rtx *prev_addr[2]; /* address of last memory use */
2360 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2361 HOST_WIDE_INT prev_off[2]; /* last offset */
2362 int n_iter; /* number of iterators in use */
2363 int next_iter; /* next iterator to use */
2364 unsigned int save_gr_used_mask;
2367 static struct spill_fill_data spill_fill_data;
2369 static void
2370 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2372 int i;
2374 spill_fill_data.init_after = get_last_insn ();
2375 spill_fill_data.init_reg[0] = init_reg;
2376 spill_fill_data.init_reg[1] = init_reg;
2377 spill_fill_data.prev_addr[0] = NULL;
2378 spill_fill_data.prev_addr[1] = NULL;
2379 spill_fill_data.prev_insn[0] = NULL;
2380 spill_fill_data.prev_insn[1] = NULL;
2381 spill_fill_data.prev_off[0] = cfa_off;
2382 spill_fill_data.prev_off[1] = cfa_off;
2383 spill_fill_data.next_iter = 0;
2384 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2386 spill_fill_data.n_iter = 1 + (n_spills > 2);
2387 for (i = 0; i < spill_fill_data.n_iter; ++i)
2389 int regno = next_scratch_gr_reg ();
2390 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2391 current_frame_info.gr_used_mask |= 1 << regno;
2395 static void
2396 finish_spill_pointers (void)
2398 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2401 static rtx
2402 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2404 int iter = spill_fill_data.next_iter;
2405 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2406 rtx disp_rtx = GEN_INT (disp);
2407 rtx mem;
2409 if (spill_fill_data.prev_addr[iter])
2411 if (CONST_OK_FOR_N (disp))
2413 *spill_fill_data.prev_addr[iter]
2414 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2415 gen_rtx_PLUS (DImode,
2416 spill_fill_data.iter_reg[iter],
2417 disp_rtx));
2418 REG_NOTES (spill_fill_data.prev_insn[iter])
2419 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2420 REG_NOTES (spill_fill_data.prev_insn[iter]));
2422 else
2424 /* ??? Could use register post_modify for loads. */
2425 if (! CONST_OK_FOR_I (disp))
2427 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2428 emit_move_insn (tmp, disp_rtx);
2429 disp_rtx = tmp;
2431 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2432 spill_fill_data.iter_reg[iter], disp_rtx));
2435 /* Micro-optimization: if we've created a frame pointer, it's at
2436 CFA 0, which may allow the real iterator to be initialized lower,
2437 slightly increasing parallelism. Also, if there are few saves
2438 it may eliminate the iterator entirely. */
2439 else if (disp == 0
2440 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2441 && frame_pointer_needed)
2443 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2444 set_mem_alias_set (mem, get_varargs_alias_set ());
2445 return mem;
2447 else
2449 rtx seq, insn;
2451 if (disp == 0)
2452 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2453 spill_fill_data.init_reg[iter]);
2454 else
2456 start_sequence ();
2458 if (! CONST_OK_FOR_I (disp))
2460 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2461 emit_move_insn (tmp, disp_rtx);
2462 disp_rtx = tmp;
2465 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2466 spill_fill_data.init_reg[iter],
2467 disp_rtx));
2469 seq = get_insns ();
2470 end_sequence ();
2473 /* Careful for being the first insn in a sequence. */
2474 if (spill_fill_data.init_after)
2475 insn = emit_insn_after (seq, spill_fill_data.init_after);
2476 else
2478 rtx first = get_insns ();
2479 if (first)
2480 insn = emit_insn_before (seq, first);
2481 else
2482 insn = emit_insn (seq);
2484 spill_fill_data.init_after = insn;
2486 /* If DISP is 0, we may or may not have a further adjustment
2487 afterward. If we do, then the load/store insn may be modified
2488 to be a post-modify. If we don't, then this copy may be
2489 eliminated by copyprop_hardreg_forward, which makes this
2490 insn garbage, which runs afoul of the sanity check in
2491 propagate_one_insn. So mark this insn as legal to delete. */
2492 if (disp == 0)
2493 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2494 REG_NOTES (insn));
2497 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2499 /* ??? Not all of the spills are for varargs, but some of them are.
2500 The rest of the spills belong in an alias set of their own. But
2501 it doesn't actually hurt to include them here. */
2502 set_mem_alias_set (mem, get_varargs_alias_set ());
2504 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2505 spill_fill_data.prev_off[iter] = cfa_off;
2507 if (++iter >= spill_fill_data.n_iter)
2508 iter = 0;
2509 spill_fill_data.next_iter = iter;
2511 return mem;
2514 static void
2515 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2516 rtx frame_reg)
2518 int iter = spill_fill_data.next_iter;
2519 rtx mem, insn;
2521 mem = spill_restore_mem (reg, cfa_off);
2522 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2523 spill_fill_data.prev_insn[iter] = insn;
2525 if (frame_reg)
2527 rtx base;
2528 HOST_WIDE_INT off;
2530 RTX_FRAME_RELATED_P (insn) = 1;
2532 /* Don't even pretend that the unwind code can intuit its way
2533 through a pair of interleaved post_modify iterators. Just
2534 provide the correct answer. */
2536 if (frame_pointer_needed)
2538 base = hard_frame_pointer_rtx;
2539 off = - cfa_off;
2541 else
2543 base = stack_pointer_rtx;
2544 off = current_frame_info.total_size - cfa_off;
2547 REG_NOTES (insn)
2548 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2549 gen_rtx_SET (VOIDmode,
2550 gen_rtx_MEM (GET_MODE (reg),
2551 plus_constant (base, off)),
2552 frame_reg),
2553 REG_NOTES (insn));
2557 static void
2558 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
2560 int iter = spill_fill_data.next_iter;
2561 rtx insn;
2563 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2564 GEN_INT (cfa_off)));
2565 spill_fill_data.prev_insn[iter] = insn;
2568 /* Wrapper functions that discards the CONST_INT spill offset. These
2569 exist so that we can give gr_spill/gr_fill the offset they need and
2570 use a consistent function interface. */
2572 static rtx
2573 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2575 return gen_movdi (dest, src);
2578 static rtx
2579 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2581 return gen_fr_spill (dest, src);
2584 static rtx
2585 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2587 return gen_fr_restore (dest, src);
2590 /* Called after register allocation to add any instructions needed for the
2591 prologue. Using a prologue insn is favored compared to putting all of the
2592 instructions in output_function_prologue(), since it allows the scheduler
2593 to intermix instructions with the saves of the caller saved registers. In
2594 some cases, it might be necessary to emit a barrier instruction as the last
2595 insn to prevent such scheduling.
2597 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2598 so that the debug info generation code can handle them properly.
2600 The register save area is layed out like so:
2601 cfa+16
2602 [ varargs spill area ]
2603 [ fr register spill area ]
2604 [ br register spill area ]
2605 [ ar register spill area ]
2606 [ pr register spill area ]
2607 [ gr register spill area ] */
2609 /* ??? Get inefficient code when the frame size is larger than can fit in an
2610 adds instruction. */
2612 void
2613 ia64_expand_prologue (void)
2615 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2616 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2617 rtx reg, alt_reg;
2619 ia64_compute_frame_size (get_frame_size ());
2620 last_scratch_gr_reg = 15;
2622 /* If there is no epilogue, then we don't need some prologue insns.
2623 We need to avoid emitting the dead prologue insns, because flow
2624 will complain about them. */
2625 if (optimize)
2627 edge e;
2628 edge_iterator ei;
2630 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2631 if ((e->flags & EDGE_FAKE) == 0
2632 && (e->flags & EDGE_FALLTHRU) != 0)
2633 break;
2634 epilogue_p = (e != NULL);
2636 else
2637 epilogue_p = 1;
2639 /* Set the local, input, and output register names. We need to do this
2640 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2641 half. If we use in/loc/out register names, then we get assembler errors
2642 in crtn.S because there is no alloc insn or regstk directive in there. */
2643 if (! TARGET_REG_NAMES)
2645 int inputs = current_frame_info.n_input_regs;
2646 int locals = current_frame_info.n_local_regs;
2647 int outputs = current_frame_info.n_output_regs;
2649 for (i = 0; i < inputs; i++)
2650 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2651 for (i = 0; i < locals; i++)
2652 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2653 for (i = 0; i < outputs; i++)
2654 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2657 /* Set the frame pointer register name. The regnum is logically loc79,
2658 but of course we'll not have allocated that many locals. Rather than
2659 worrying about renumbering the existing rtxs, we adjust the name. */
2660 /* ??? This code means that we can never use one local register when
2661 there is a frame pointer. loc79 gets wasted in this case, as it is
2662 renamed to a register that will never be used. See also the try_locals
2663 code in find_gr_spill. */
2664 if (current_frame_info.reg_fp)
2666 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2667 reg_names[HARD_FRAME_POINTER_REGNUM]
2668 = reg_names[current_frame_info.reg_fp];
2669 reg_names[current_frame_info.reg_fp] = tmp;
2672 /* We don't need an alloc instruction if we've used no outputs or locals. */
2673 if (current_frame_info.n_local_regs == 0
2674 && current_frame_info.n_output_regs == 0
2675 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2676 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2678 /* If there is no alloc, but there are input registers used, then we
2679 need a .regstk directive. */
2680 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2681 ar_pfs_save_reg = NULL_RTX;
2683 else
2685 current_frame_info.need_regstk = 0;
2687 if (current_frame_info.reg_save_ar_pfs)
2688 regno = current_frame_info.reg_save_ar_pfs;
2689 else
2690 regno = next_scratch_gr_reg ();
2691 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2693 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2694 GEN_INT (current_frame_info.n_input_regs),
2695 GEN_INT (current_frame_info.n_local_regs),
2696 GEN_INT (current_frame_info.n_output_regs),
2697 GEN_INT (current_frame_info.n_rotate_regs)));
2698 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2701 /* Set up frame pointer, stack pointer, and spill iterators. */
2703 n_varargs = cfun->machine->n_varargs;
2704 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2705 stack_pointer_rtx, 0);
2707 if (frame_pointer_needed)
2709 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2710 RTX_FRAME_RELATED_P (insn) = 1;
2713 if (current_frame_info.total_size != 0)
2715 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2716 rtx offset;
2718 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2719 offset = frame_size_rtx;
2720 else
2722 regno = next_scratch_gr_reg ();
2723 offset = gen_rtx_REG (DImode, regno);
2724 emit_move_insn (offset, frame_size_rtx);
2727 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2728 stack_pointer_rtx, offset));
2730 if (! frame_pointer_needed)
2732 RTX_FRAME_RELATED_P (insn) = 1;
2733 if (GET_CODE (offset) != CONST_INT)
2735 REG_NOTES (insn)
2736 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2737 gen_rtx_SET (VOIDmode,
2738 stack_pointer_rtx,
2739 gen_rtx_PLUS (DImode,
2740 stack_pointer_rtx,
2741 frame_size_rtx)),
2742 REG_NOTES (insn));
2746 /* ??? At this point we must generate a magic insn that appears to
2747 modify the stack pointer, the frame pointer, and all spill
2748 iterators. This would allow the most scheduling freedom. For
2749 now, just hard stop. */
2750 emit_insn (gen_blockage ());
2753 /* Must copy out ar.unat before doing any integer spills. */
2754 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2756 if (current_frame_info.reg_save_ar_unat)
2757 ar_unat_save_reg
2758 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2759 else
2761 alt_regno = next_scratch_gr_reg ();
2762 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2763 current_frame_info.gr_used_mask |= 1 << alt_regno;
2766 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2767 insn = emit_move_insn (ar_unat_save_reg, reg);
2768 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2770 /* Even if we're not going to generate an epilogue, we still
2771 need to save the register so that EH works. */
2772 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2773 emit_insn (gen_prologue_use (ar_unat_save_reg));
2775 else
2776 ar_unat_save_reg = NULL_RTX;
2778 /* Spill all varargs registers. Do this before spilling any GR registers,
2779 since we want the UNAT bits for the GR registers to override the UNAT
2780 bits from varargs, which we don't care about. */
2782 cfa_off = -16;
2783 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2785 reg = gen_rtx_REG (DImode, regno);
2786 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2789 /* Locate the bottom of the register save area. */
2790 cfa_off = (current_frame_info.spill_cfa_off
2791 + current_frame_info.spill_size
2792 + current_frame_info.extra_spill_size);
2794 /* Save the predicate register block either in a register or in memory. */
2795 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2797 reg = gen_rtx_REG (DImode, PR_REG (0));
2798 if (current_frame_info.reg_save_pr != 0)
2800 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2801 insn = emit_move_insn (alt_reg, reg);
2803 /* ??? Denote pr spill/fill by a DImode move that modifies all
2804 64 hard registers. */
2805 RTX_FRAME_RELATED_P (insn) = 1;
2806 REG_NOTES (insn)
2807 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2808 gen_rtx_SET (VOIDmode, alt_reg, reg),
2809 REG_NOTES (insn));
2811 /* Even if we're not going to generate an epilogue, we still
2812 need to save the register so that EH works. */
2813 if (! epilogue_p)
2814 emit_insn (gen_prologue_use (alt_reg));
2816 else
2818 alt_regno = next_scratch_gr_reg ();
2819 alt_reg = gen_rtx_REG (DImode, alt_regno);
2820 insn = emit_move_insn (alt_reg, reg);
2821 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2822 cfa_off -= 8;
2826 /* Handle AR regs in numerical order. All of them get special handling. */
2827 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2828 && current_frame_info.reg_save_ar_unat == 0)
2830 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2831 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2832 cfa_off -= 8;
2835 /* The alloc insn already copied ar.pfs into a general register. The
2836 only thing we have to do now is copy that register to a stack slot
2837 if we'd not allocated a local register for the job. */
2838 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2839 && current_frame_info.reg_save_ar_pfs == 0)
2841 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2842 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2843 cfa_off -= 8;
2846 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2848 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2849 if (current_frame_info.reg_save_ar_lc != 0)
2851 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2852 insn = emit_move_insn (alt_reg, reg);
2853 RTX_FRAME_RELATED_P (insn) = 1;
2855 /* Even if we're not going to generate an epilogue, we still
2856 need to save the register so that EH works. */
2857 if (! epilogue_p)
2858 emit_insn (gen_prologue_use (alt_reg));
2860 else
2862 alt_regno = next_scratch_gr_reg ();
2863 alt_reg = gen_rtx_REG (DImode, alt_regno);
2864 emit_move_insn (alt_reg, reg);
2865 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2866 cfa_off -= 8;
2870 if (current_frame_info.reg_save_gp)
2872 insn = emit_move_insn (gen_rtx_REG (DImode,
2873 current_frame_info.reg_save_gp),
2874 pic_offset_table_rtx);
2875 /* We don't know for sure yet if this is actually needed, since
2876 we've not split the PIC call patterns. If all of the calls
2877 are indirect, and not followed by any uses of the gp, then
2878 this save is dead. Allow it to go away. */
2879 REG_NOTES (insn)
2880 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2883 /* We should now be at the base of the gr/br/fr spill area. */
2884 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
2885 + current_frame_info.spill_size));
2887 /* Spill all general registers. */
2888 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2889 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2891 reg = gen_rtx_REG (DImode, regno);
2892 do_spill (gen_gr_spill, reg, cfa_off, reg);
2893 cfa_off -= 8;
2896 /* Handle BR0 specially -- it may be getting stored permanently in
2897 some GR register. */
2898 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2900 reg = gen_rtx_REG (DImode, BR_REG (0));
2901 if (current_frame_info.reg_save_b0 != 0)
2903 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2904 insn = emit_move_insn (alt_reg, reg);
2905 RTX_FRAME_RELATED_P (insn) = 1;
2907 /* Even if we're not going to generate an epilogue, we still
2908 need to save the register so that EH works. */
2909 if (! epilogue_p)
2910 emit_insn (gen_prologue_use (alt_reg));
2912 else
2914 alt_regno = next_scratch_gr_reg ();
2915 alt_reg = gen_rtx_REG (DImode, alt_regno);
2916 emit_move_insn (alt_reg, reg);
2917 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2918 cfa_off -= 8;
2922 /* Spill the rest of the BR registers. */
2923 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2924 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2926 alt_regno = next_scratch_gr_reg ();
2927 alt_reg = gen_rtx_REG (DImode, alt_regno);
2928 reg = gen_rtx_REG (DImode, regno);
2929 emit_move_insn (alt_reg, reg);
2930 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2931 cfa_off -= 8;
2934 /* Align the frame and spill all FR registers. */
2935 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2936 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2938 gcc_assert (!(cfa_off & 15));
2939 reg = gen_rtx_REG (XFmode, regno);
2940 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2941 cfa_off -= 16;
2944 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
2946 finish_spill_pointers ();
2949 /* Called after register allocation to add any instructions needed for the
2950 epilogue. Using an epilogue insn is favored compared to putting all of the
2951 instructions in output_function_prologue(), since it allows the scheduler
2952 to intermix instructions with the saves of the caller saved registers. In
2953 some cases, it might be necessary to emit a barrier instruction as the last
2954 insn to prevent such scheduling. */
2956 void
2957 ia64_expand_epilogue (int sibcall_p)
2959 rtx insn, reg, alt_reg, ar_unat_save_reg;
2960 int regno, alt_regno, cfa_off;
2962 ia64_compute_frame_size (get_frame_size ());
2964 /* If there is a frame pointer, then we use it instead of the stack
2965 pointer, so that the stack pointer does not need to be valid when
2966 the epilogue starts. See EXIT_IGNORE_STACK. */
2967 if (frame_pointer_needed)
2968 setup_spill_pointers (current_frame_info.n_spilled,
2969 hard_frame_pointer_rtx, 0);
2970 else
2971 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2972 current_frame_info.total_size);
2974 if (current_frame_info.total_size != 0)
2976 /* ??? At this point we must generate a magic insn that appears to
2977 modify the spill iterators and the frame pointer. This would
2978 allow the most scheduling freedom. For now, just hard stop. */
2979 emit_insn (gen_blockage ());
2982 /* Locate the bottom of the register save area. */
2983 cfa_off = (current_frame_info.spill_cfa_off
2984 + current_frame_info.spill_size
2985 + current_frame_info.extra_spill_size);
2987 /* Restore the predicate registers. */
2988 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2990 if (current_frame_info.reg_save_pr != 0)
2991 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2992 else
2994 alt_regno = next_scratch_gr_reg ();
2995 alt_reg = gen_rtx_REG (DImode, alt_regno);
2996 do_restore (gen_movdi_x, alt_reg, cfa_off);
2997 cfa_off -= 8;
2999 reg = gen_rtx_REG (DImode, PR_REG (0));
3000 emit_move_insn (reg, alt_reg);
3003 /* Restore the application registers. */
3005 /* Load the saved unat from the stack, but do not restore it until
3006 after the GRs have been restored. */
3007 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3009 if (current_frame_info.reg_save_ar_unat != 0)
3010 ar_unat_save_reg
3011 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
3012 else
3014 alt_regno = next_scratch_gr_reg ();
3015 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3016 current_frame_info.gr_used_mask |= 1 << alt_regno;
3017 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3018 cfa_off -= 8;
3021 else
3022 ar_unat_save_reg = NULL_RTX;
3024 if (current_frame_info.reg_save_ar_pfs != 0)
3026 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
3027 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3028 emit_move_insn (reg, alt_reg);
3030 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3032 alt_regno = next_scratch_gr_reg ();
3033 alt_reg = gen_rtx_REG (DImode, alt_regno);
3034 do_restore (gen_movdi_x, alt_reg, cfa_off);
3035 cfa_off -= 8;
3036 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3037 emit_move_insn (reg, alt_reg);
3040 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3042 if (current_frame_info.reg_save_ar_lc != 0)
3043 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3044 else
3046 alt_regno = next_scratch_gr_reg ();
3047 alt_reg = gen_rtx_REG (DImode, alt_regno);
3048 do_restore (gen_movdi_x, alt_reg, cfa_off);
3049 cfa_off -= 8;
3051 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3052 emit_move_insn (reg, alt_reg);
3055 /* We should now be at the base of the gr/br/fr spill area. */
3056 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3057 + current_frame_info.spill_size));
3059 /* The GP may be stored on the stack in the prologue, but it's
3060 never restored in the epilogue. Skip the stack slot. */
3061 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3062 cfa_off -= 8;
3064 /* Restore all general registers. */
3065 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3066 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3068 reg = gen_rtx_REG (DImode, regno);
3069 do_restore (gen_gr_restore, reg, cfa_off);
3070 cfa_off -= 8;
3073 /* Restore the branch registers. Handle B0 specially, as it may
3074 have gotten stored in some GR register. */
3075 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3077 if (current_frame_info.reg_save_b0 != 0)
3078 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3079 else
3081 alt_regno = next_scratch_gr_reg ();
3082 alt_reg = gen_rtx_REG (DImode, alt_regno);
3083 do_restore (gen_movdi_x, alt_reg, cfa_off);
3084 cfa_off -= 8;
3086 reg = gen_rtx_REG (DImode, BR_REG (0));
3087 emit_move_insn (reg, alt_reg);
3090 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3091 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3093 alt_regno = next_scratch_gr_reg ();
3094 alt_reg = gen_rtx_REG (DImode, alt_regno);
3095 do_restore (gen_movdi_x, alt_reg, cfa_off);
3096 cfa_off -= 8;
3097 reg = gen_rtx_REG (DImode, regno);
3098 emit_move_insn (reg, alt_reg);
3101 /* Restore floating point registers. */
3102 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3103 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3105 gcc_assert (!(cfa_off & 15));
3106 reg = gen_rtx_REG (XFmode, regno);
3107 do_restore (gen_fr_restore_x, reg, cfa_off);
3108 cfa_off -= 16;
3111 /* Restore ar.unat for real. */
3112 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3114 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3115 emit_move_insn (reg, ar_unat_save_reg);
3118 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3120 finish_spill_pointers ();
3122 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
3124 /* ??? At this point we must generate a magic insn that appears to
3125 modify the spill iterators, the stack pointer, and the frame
3126 pointer. This would allow the most scheduling freedom. For now,
3127 just hard stop. */
3128 emit_insn (gen_blockage ());
3131 if (cfun->machine->ia64_eh_epilogue_sp)
3132 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3133 else if (frame_pointer_needed)
3135 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3136 RTX_FRAME_RELATED_P (insn) = 1;
3138 else if (current_frame_info.total_size)
3140 rtx offset, frame_size_rtx;
3142 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3143 if (CONST_OK_FOR_I (current_frame_info.total_size))
3144 offset = frame_size_rtx;
3145 else
3147 regno = next_scratch_gr_reg ();
3148 offset = gen_rtx_REG (DImode, regno);
3149 emit_move_insn (offset, frame_size_rtx);
3152 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3153 offset));
3155 RTX_FRAME_RELATED_P (insn) = 1;
3156 if (GET_CODE (offset) != CONST_INT)
3158 REG_NOTES (insn)
3159 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3160 gen_rtx_SET (VOIDmode,
3161 stack_pointer_rtx,
3162 gen_rtx_PLUS (DImode,
3163 stack_pointer_rtx,
3164 frame_size_rtx)),
3165 REG_NOTES (insn));
3169 if (cfun->machine->ia64_eh_epilogue_bsp)
3170 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3172 if (! sibcall_p)
3173 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3174 else
3176 int fp = GR_REG (2);
3177 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3178 first available call clobbered register. If there was a frame_pointer
3179 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3180 so we have to make sure we're using the string "r2" when emitting
3181 the register name for the assembler. */
3182 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
3183 fp = HARD_FRAME_POINTER_REGNUM;
3185 /* We must emit an alloc to force the input registers to become output
3186 registers. Otherwise, if the callee tries to pass its parameters
3187 through to another call without an intervening alloc, then these
3188 values get lost. */
3189 /* ??? We don't need to preserve all input registers. We only need to
3190 preserve those input registers used as arguments to the sibling call.
3191 It is unclear how to compute that number here. */
3192 if (current_frame_info.n_input_regs != 0)
3194 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3195 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3196 const0_rtx, const0_rtx,
3197 n_inputs, const0_rtx));
3198 RTX_FRAME_RELATED_P (insn) = 1;
3203 /* Return 1 if br.ret can do all the work required to return from a
3204 function. */
3207 ia64_direct_return (void)
3209 if (reload_completed && ! frame_pointer_needed)
3211 ia64_compute_frame_size (get_frame_size ());
3213 return (current_frame_info.total_size == 0
3214 && current_frame_info.n_spilled == 0
3215 && current_frame_info.reg_save_b0 == 0
3216 && current_frame_info.reg_save_pr == 0
3217 && current_frame_info.reg_save_ar_pfs == 0
3218 && current_frame_info.reg_save_ar_unat == 0
3219 && current_frame_info.reg_save_ar_lc == 0);
3221 return 0;
3224 /* Return the magic cookie that we use to hold the return address
3225 during early compilation. */
3228 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3230 if (count != 0)
3231 return NULL;
3232 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3235 /* Split this value after reload, now that we know where the return
3236 address is saved. */
3238 void
3239 ia64_split_return_addr_rtx (rtx dest)
3241 rtx src;
3243 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3245 if (current_frame_info.reg_save_b0 != 0)
3246 src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3247 else
3249 HOST_WIDE_INT off;
3250 unsigned int regno;
3252 /* Compute offset from CFA for BR0. */
3253 /* ??? Must be kept in sync with ia64_expand_prologue. */
3254 off = (current_frame_info.spill_cfa_off
3255 + current_frame_info.spill_size);
3256 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3257 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3258 off -= 8;
3260 /* Convert CFA offset to a register based offset. */
3261 if (frame_pointer_needed)
3262 src = hard_frame_pointer_rtx;
3263 else
3265 src = stack_pointer_rtx;
3266 off += current_frame_info.total_size;
3269 /* Load address into scratch register. */
3270 if (CONST_OK_FOR_I (off))
3271 emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
3272 else
3274 emit_move_insn (dest, GEN_INT (off));
3275 emit_insn (gen_adddi3 (dest, src, dest));
3278 src = gen_rtx_MEM (Pmode, dest);
3281 else
3282 src = gen_rtx_REG (DImode, BR_REG (0));
3284 emit_move_insn (dest, src);
3288 ia64_hard_regno_rename_ok (int from, int to)
3290 /* Don't clobber any of the registers we reserved for the prologue. */
3291 if (to == current_frame_info.reg_fp
3292 || to == current_frame_info.reg_save_b0
3293 || to == current_frame_info.reg_save_pr
3294 || to == current_frame_info.reg_save_ar_pfs
3295 || to == current_frame_info.reg_save_ar_unat
3296 || to == current_frame_info.reg_save_ar_lc)
3297 return 0;
3299 if (from == current_frame_info.reg_fp
3300 || from == current_frame_info.reg_save_b0
3301 || from == current_frame_info.reg_save_pr
3302 || from == current_frame_info.reg_save_ar_pfs
3303 || from == current_frame_info.reg_save_ar_unat
3304 || from == current_frame_info.reg_save_ar_lc)
3305 return 0;
3307 /* Don't use output registers outside the register frame. */
3308 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3309 return 0;
3311 /* Retain even/oddness on predicate register pairs. */
3312 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3313 return (from & 1) == (to & 1);
3315 return 1;
3318 /* Target hook for assembling integer objects. Handle word-sized
3319 aligned objects and detect the cases when @fptr is needed. */
3321 static bool
3322 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3324 if (size == POINTER_SIZE / BITS_PER_UNIT
3325 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3326 && GET_CODE (x) == SYMBOL_REF
3327 && SYMBOL_REF_FUNCTION_P (x))
3329 static const char * const directive[2][2] = {
3330 /* 64-bit pointer */ /* 32-bit pointer */
3331 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3332 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3334 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
3335 output_addr_const (asm_out_file, x);
3336 fputs (")\n", asm_out_file);
3337 return true;
3339 return default_assemble_integer (x, size, aligned_p);
3342 /* Emit the function prologue. */
3344 static void
3345 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3347 int mask, grsave, grsave_prev;
3349 if (current_frame_info.need_regstk)
3350 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3351 current_frame_info.n_input_regs,
3352 current_frame_info.n_local_regs,
3353 current_frame_info.n_output_regs,
3354 current_frame_info.n_rotate_regs);
3356 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3357 return;
3359 /* Emit the .prologue directive. */
3361 mask = 0;
3362 grsave = grsave_prev = 0;
3363 if (current_frame_info.reg_save_b0 != 0)
3365 mask |= 8;
3366 grsave = grsave_prev = current_frame_info.reg_save_b0;
3368 if (current_frame_info.reg_save_ar_pfs != 0
3369 && (grsave_prev == 0
3370 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3372 mask |= 4;
3373 if (grsave_prev == 0)
3374 grsave = current_frame_info.reg_save_ar_pfs;
3375 grsave_prev = current_frame_info.reg_save_ar_pfs;
3377 if (current_frame_info.reg_fp != 0
3378 && (grsave_prev == 0
3379 || current_frame_info.reg_fp == grsave_prev + 1))
3381 mask |= 2;
3382 if (grsave_prev == 0)
3383 grsave = HARD_FRAME_POINTER_REGNUM;
3384 grsave_prev = current_frame_info.reg_fp;
3386 if (current_frame_info.reg_save_pr != 0
3387 && (grsave_prev == 0
3388 || current_frame_info.reg_save_pr == grsave_prev + 1))
3390 mask |= 1;
3391 if (grsave_prev == 0)
3392 grsave = current_frame_info.reg_save_pr;
3395 if (mask && TARGET_GNU_AS)
3396 fprintf (file, "\t.prologue %d, %d\n", mask,
3397 ia64_dbx_register_number (grsave));
3398 else
3399 fputs ("\t.prologue\n", file);
3401 /* Emit a .spill directive, if necessary, to relocate the base of
3402 the register spill area. */
3403 if (current_frame_info.spill_cfa_off != -16)
3404 fprintf (file, "\t.spill %ld\n",
3405 (long) (current_frame_info.spill_cfa_off
3406 + current_frame_info.spill_size));
3409 /* Emit the .body directive at the scheduled end of the prologue. */
3411 static void
3412 ia64_output_function_end_prologue (FILE *file)
3414 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3415 return;
3417 fputs ("\t.body\n", file);
3420 /* Emit the function epilogue. */
3422 static void
3423 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3424 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3426 int i;
3428 if (current_frame_info.reg_fp)
3430 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3431 reg_names[HARD_FRAME_POINTER_REGNUM]
3432 = reg_names[current_frame_info.reg_fp];
3433 reg_names[current_frame_info.reg_fp] = tmp;
3435 if (! TARGET_REG_NAMES)
3437 for (i = 0; i < current_frame_info.n_input_regs; i++)
3438 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3439 for (i = 0; i < current_frame_info.n_local_regs; i++)
3440 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3441 for (i = 0; i < current_frame_info.n_output_regs; i++)
3442 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3445 current_frame_info.initialized = 0;
3449 ia64_dbx_register_number (int regno)
3451 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3452 from its home at loc79 to something inside the register frame. We
3453 must perform the same renumbering here for the debug info. */
3454 if (current_frame_info.reg_fp)
3456 if (regno == HARD_FRAME_POINTER_REGNUM)
3457 regno = current_frame_info.reg_fp;
3458 else if (regno == current_frame_info.reg_fp)
3459 regno = HARD_FRAME_POINTER_REGNUM;
3462 if (IN_REGNO_P (regno))
3463 return 32 + regno - IN_REG (0);
3464 else if (LOC_REGNO_P (regno))
3465 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3466 else if (OUT_REGNO_P (regno))
3467 return (32 + current_frame_info.n_input_regs
3468 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3469 else
3470 return regno;
3473 void
3474 ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
3476 rtx addr_reg, eight = GEN_INT (8);
3478 /* The Intel assembler requires that the global __ia64_trampoline symbol
3479 be declared explicitly */
3480 if (!TARGET_GNU_AS)
3482 static bool declared_ia64_trampoline = false;
3484 if (!declared_ia64_trampoline)
3486 declared_ia64_trampoline = true;
3487 (*targetm.asm_out.globalize_label) (asm_out_file,
3488 "__ia64_trampoline");
3492 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
3493 addr = convert_memory_address (Pmode, addr);
3494 fnaddr = convert_memory_address (Pmode, fnaddr);
3495 static_chain = convert_memory_address (Pmode, static_chain);
3497 /* Load up our iterator. */
3498 addr_reg = gen_reg_rtx (Pmode);
3499 emit_move_insn (addr_reg, addr);
3501 /* The first two words are the fake descriptor:
3502 __ia64_trampoline, ADDR+16. */
3503 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3504 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3505 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3507 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3508 copy_to_reg (plus_constant (addr, 16)));
3509 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3511 /* The third word is the target descriptor. */
3512 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3513 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3515 /* The fourth word is the static chain. */
3516 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3519 /* Do any needed setup for a variadic function. CUM has not been updated
3520 for the last named argument which has type TYPE and mode MODE.
3522 We generate the actual spill instructions during prologue generation. */
3524 static void
3525 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3526 tree type, int * pretend_size,
3527 int second_time ATTRIBUTE_UNUSED)
3529 CUMULATIVE_ARGS next_cum = *cum;
3531 /* Skip the current argument. */
3532 ia64_function_arg_advance (&next_cum, mode, type, 1);
3534 if (next_cum.words < MAX_ARGUMENT_SLOTS)
3536 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
3537 *pretend_size = n * UNITS_PER_WORD;
3538 cfun->machine->n_varargs = n;
3542 /* Check whether TYPE is a homogeneous floating point aggregate. If
3543 it is, return the mode of the floating point type that appears
3544 in all leafs. If it is not, return VOIDmode.
3546 An aggregate is a homogeneous floating point aggregate is if all
3547 fields/elements in it have the same floating point type (e.g,
3548 SFmode). 128-bit quad-precision floats are excluded.
3550 Variable sized aggregates should never arrive here, since we should
3551 have already decided to pass them by reference. Top-level zero-sized
3552 aggregates are excluded because our parallels crash the middle-end. */
3554 static enum machine_mode
3555 hfa_element_mode (tree type, bool nested)
3557 enum machine_mode element_mode = VOIDmode;
3558 enum machine_mode mode;
3559 enum tree_code code = TREE_CODE (type);
3560 int know_element_mode = 0;
3561 tree t;
3563 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
3564 return VOIDmode;
3566 switch (code)
3568 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
3569 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
3570 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
3571 case LANG_TYPE: case FUNCTION_TYPE:
3572 return VOIDmode;
3574 /* Fortran complex types are supposed to be HFAs, so we need to handle
3575 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3576 types though. */
3577 case COMPLEX_TYPE:
3578 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3579 && TYPE_MODE (type) != TCmode)
3580 return GET_MODE_INNER (TYPE_MODE (type));
3581 else
3582 return VOIDmode;
3584 case REAL_TYPE:
3585 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3586 mode if this is contained within an aggregate. */
3587 if (nested && TYPE_MODE (type) != TFmode)
3588 return TYPE_MODE (type);
3589 else
3590 return VOIDmode;
3592 case ARRAY_TYPE:
3593 return hfa_element_mode (TREE_TYPE (type), 1);
3595 case RECORD_TYPE:
3596 case UNION_TYPE:
3597 case QUAL_UNION_TYPE:
3598 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3600 if (TREE_CODE (t) != FIELD_DECL)
3601 continue;
3603 mode = hfa_element_mode (TREE_TYPE (t), 1);
3604 if (know_element_mode)
3606 if (mode != element_mode)
3607 return VOIDmode;
3609 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3610 return VOIDmode;
3611 else
3613 know_element_mode = 1;
3614 element_mode = mode;
3617 return element_mode;
3619 default:
3620 /* If we reach here, we probably have some front-end specific type
3621 that the backend doesn't know about. This can happen via the
3622 aggregate_value_p call in init_function_start. All we can do is
3623 ignore unknown tree types. */
3624 return VOIDmode;
3627 return VOIDmode;
3630 /* Return the number of words required to hold a quantity of TYPE and MODE
3631 when passed as an argument. */
3632 static int
3633 ia64_function_arg_words (tree type, enum machine_mode mode)
3635 int words;
3637 if (mode == BLKmode)
3638 words = int_size_in_bytes (type);
3639 else
3640 words = GET_MODE_SIZE (mode);
3642 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
3645 /* Return the number of registers that should be skipped so the current
3646 argument (described by TYPE and WORDS) will be properly aligned.
3648 Integer and float arguments larger than 8 bytes start at the next
3649 even boundary. Aggregates larger than 8 bytes start at the next
3650 even boundary if the aggregate has 16 byte alignment. Note that
3651 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3652 but are still to be aligned in registers.
3654 ??? The ABI does not specify how to handle aggregates with
3655 alignment from 9 to 15 bytes, or greater than 16. We handle them
3656 all as if they had 16 byte alignment. Such aggregates can occur
3657 only if gcc extensions are used. */
3658 static int
3659 ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3661 if ((cum->words & 1) == 0)
3662 return 0;
3664 if (type
3665 && TREE_CODE (type) != INTEGER_TYPE
3666 && TREE_CODE (type) != REAL_TYPE)
3667 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3668 else
3669 return words > 1;
3672 /* Return rtx for register where argument is passed, or zero if it is passed
3673 on the stack. */
3674 /* ??? 128-bit quad-precision floats are always passed in general
3675 registers. */
3678 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3679 int named, int incoming)
3681 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3682 int words = ia64_function_arg_words (type, mode);
3683 int offset = ia64_function_arg_offset (cum, type, words);
3684 enum machine_mode hfa_mode = VOIDmode;
3686 /* If all argument slots are used, then it must go on the stack. */
3687 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3688 return 0;
3690 /* Check for and handle homogeneous FP aggregates. */
3691 if (type)
3692 hfa_mode = hfa_element_mode (type, 0);
3694 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3695 and unprototyped hfas are passed specially. */
3696 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3698 rtx loc[16];
3699 int i = 0;
3700 int fp_regs = cum->fp_regs;
3701 int int_regs = cum->words + offset;
3702 int hfa_size = GET_MODE_SIZE (hfa_mode);
3703 int byte_size;
3704 int args_byte_size;
3706 /* If prototyped, pass it in FR regs then GR regs.
3707 If not prototyped, pass it in both FR and GR regs.
3709 If this is an SFmode aggregate, then it is possible to run out of
3710 FR regs while GR regs are still left. In that case, we pass the
3711 remaining part in the GR regs. */
3713 /* Fill the FP regs. We do this always. We stop if we reach the end
3714 of the argument, the last FP register, or the last argument slot. */
3716 byte_size = ((mode == BLKmode)
3717 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3718 args_byte_size = int_regs * UNITS_PER_WORD;
3719 offset = 0;
3720 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3721 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3723 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3724 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3725 + fp_regs)),
3726 GEN_INT (offset));
3727 offset += hfa_size;
3728 args_byte_size += hfa_size;
3729 fp_regs++;
3732 /* If no prototype, then the whole thing must go in GR regs. */
3733 if (! cum->prototype)
3734 offset = 0;
3735 /* If this is an SFmode aggregate, then we might have some left over
3736 that needs to go in GR regs. */
3737 else if (byte_size != offset)
3738 int_regs += offset / UNITS_PER_WORD;
3740 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3742 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3744 enum machine_mode gr_mode = DImode;
3745 unsigned int gr_size;
3747 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3748 then this goes in a GR reg left adjusted/little endian, right
3749 adjusted/big endian. */
3750 /* ??? Currently this is handled wrong, because 4-byte hunks are
3751 always right adjusted/little endian. */
3752 if (offset & 0x4)
3753 gr_mode = SImode;
3754 /* If we have an even 4 byte hunk because the aggregate is a
3755 multiple of 4 bytes in size, then this goes in a GR reg right
3756 adjusted/little endian. */
3757 else if (byte_size - offset == 4)
3758 gr_mode = SImode;
3760 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3761 gen_rtx_REG (gr_mode, (basereg
3762 + int_regs)),
3763 GEN_INT (offset));
3765 gr_size = GET_MODE_SIZE (gr_mode);
3766 offset += gr_size;
3767 if (gr_size == UNITS_PER_WORD
3768 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
3769 int_regs++;
3770 else if (gr_size > UNITS_PER_WORD)
3771 int_regs += gr_size / UNITS_PER_WORD;
3773 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3776 /* Integral and aggregates go in general registers. If we have run out of
3777 FR registers, then FP values must also go in general registers. This can
3778 happen when we have a SFmode HFA. */
3779 else if (mode == TFmode || mode == TCmode
3780 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3782 int byte_size = ((mode == BLKmode)
3783 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3784 if (BYTES_BIG_ENDIAN
3785 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3786 && byte_size < UNITS_PER_WORD
3787 && byte_size > 0)
3789 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3790 gen_rtx_REG (DImode,
3791 (basereg + cum->words
3792 + offset)),
3793 const0_rtx);
3794 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3796 else
3797 return gen_rtx_REG (mode, basereg + cum->words + offset);
3801 /* If there is a prototype, then FP values go in a FR register when
3802 named, and in a GR register when unnamed. */
3803 else if (cum->prototype)
3805 if (named)
3806 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3807 /* In big-endian mode, an anonymous SFmode value must be represented
3808 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
3809 the value into the high half of the general register. */
3810 else if (BYTES_BIG_ENDIAN && mode == SFmode)
3811 return gen_rtx_PARALLEL (mode,
3812 gen_rtvec (1,
3813 gen_rtx_EXPR_LIST (VOIDmode,
3814 gen_rtx_REG (DImode, basereg + cum->words + offset),
3815 const0_rtx)));
3816 else
3817 return gen_rtx_REG (mode, basereg + cum->words + offset);
3819 /* If there is no prototype, then FP values go in both FR and GR
3820 registers. */
3821 else
3823 /* See comment above. */
3824 enum machine_mode inner_mode =
3825 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
3827 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3828 gen_rtx_REG (mode, (FR_ARG_FIRST
3829 + cum->fp_regs)),
3830 const0_rtx);
3831 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3832 gen_rtx_REG (inner_mode,
3833 (basereg + cum->words
3834 + offset)),
3835 const0_rtx);
3837 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3841 /* Return number of bytes, at the beginning of the argument, that must be
3842 put in registers. 0 is the argument is entirely in registers or entirely
3843 in memory. */
3845 static int
3846 ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3847 tree type, bool named ATTRIBUTE_UNUSED)
3849 int words = ia64_function_arg_words (type, mode);
3850 int offset = ia64_function_arg_offset (cum, type, words);
3852 /* If all argument slots are used, then it must go on the stack. */
3853 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3854 return 0;
3856 /* It doesn't matter whether the argument goes in FR or GR regs. If
3857 it fits within the 8 argument slots, then it goes entirely in
3858 registers. If it extends past the last argument slot, then the rest
3859 goes on the stack. */
3861 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3862 return 0;
3864 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
3867 /* Update CUM to point after this argument. This is patterned after
3868 ia64_function_arg. */
3870 void
3871 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3872 tree type, int named)
3874 int words = ia64_function_arg_words (type, mode);
3875 int offset = ia64_function_arg_offset (cum, type, words);
3876 enum machine_mode hfa_mode = VOIDmode;
3878 /* If all arg slots are already full, then there is nothing to do. */
3879 if (cum->words >= MAX_ARGUMENT_SLOTS)
3880 return;
3882 cum->words += words + offset;
3884 /* Check for and handle homogeneous FP aggregates. */
3885 if (type)
3886 hfa_mode = hfa_element_mode (type, 0);
3888 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3889 and unprototyped hfas are passed specially. */
3890 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3892 int fp_regs = cum->fp_regs;
3893 /* This is the original value of cum->words + offset. */
3894 int int_regs = cum->words - words;
3895 int hfa_size = GET_MODE_SIZE (hfa_mode);
3896 int byte_size;
3897 int args_byte_size;
3899 /* If prototyped, pass it in FR regs then GR regs.
3900 If not prototyped, pass it in both FR and GR regs.
3902 If this is an SFmode aggregate, then it is possible to run out of
3903 FR regs while GR regs are still left. In that case, we pass the
3904 remaining part in the GR regs. */
3906 /* Fill the FP regs. We do this always. We stop if we reach the end
3907 of the argument, the last FP register, or the last argument slot. */
3909 byte_size = ((mode == BLKmode)
3910 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3911 args_byte_size = int_regs * UNITS_PER_WORD;
3912 offset = 0;
3913 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3914 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3916 offset += hfa_size;
3917 args_byte_size += hfa_size;
3918 fp_regs++;
3921 cum->fp_regs = fp_regs;
3924 /* Integral and aggregates go in general registers. So do TFmode FP values.
3925 If we have run out of FR registers, then other FP values must also go in
3926 general registers. This can happen when we have a SFmode HFA. */
3927 else if (mode == TFmode || mode == TCmode
3928 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3929 cum->int_regs = cum->words;
3931 /* If there is a prototype, then FP values go in a FR register when
3932 named, and in a GR register when unnamed. */
3933 else if (cum->prototype)
3935 if (! named)
3936 cum->int_regs = cum->words;
3937 else
3938 /* ??? Complex types should not reach here. */
3939 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3941 /* If there is no prototype, then FP values go in both FR and GR
3942 registers. */
3943 else
3945 /* ??? Complex types should not reach here. */
3946 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3947 cum->int_regs = cum->words;
3951 /* Arguments with alignment larger than 8 bytes start at the next even
3952 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
3953 even though their normal alignment is 8 bytes. See ia64_function_arg. */
3956 ia64_function_arg_boundary (enum machine_mode mode, tree type)
3959 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
3960 return PARM_BOUNDARY * 2;
3962 if (type)
3964 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
3965 return PARM_BOUNDARY * 2;
3966 else
3967 return PARM_BOUNDARY;
3970 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
3971 return PARM_BOUNDARY * 2;
3972 else
3973 return PARM_BOUNDARY;
3976 /* Variable sized types are passed by reference. */
3977 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3979 static bool
3980 ia64_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3981 enum machine_mode mode ATTRIBUTE_UNUSED,
3982 tree type, bool named ATTRIBUTE_UNUSED)
3984 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3987 /* True if it is OK to do sibling call optimization for the specified
3988 call expression EXP. DECL will be the called function, or NULL if
3989 this is an indirect call. */
3990 static bool
3991 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3993 /* We can't perform a sibcall if the current function has the syscall_linkage
3994 attribute. */
3995 if (lookup_attribute ("syscall_linkage",
3996 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
3997 return false;
3999 /* We must always return with our current GP. This means we can
4000 only sibcall to functions defined in the current module. */
4001 return decl && (*targetm.binds_local_p) (decl);
4005 /* Implement va_arg. */
4007 static tree
4008 ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4010 /* Variable sized types are passed by reference. */
4011 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4013 tree ptrtype = build_pointer_type (type);
4014 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4015 return build_va_arg_indirect_ref (addr);
4018 /* Aggregate arguments with alignment larger than 8 bytes start at
4019 the next even boundary. Integer and floating point arguments
4020 do so if they are larger than 8 bytes, whether or not they are
4021 also aligned larger than 8 bytes. */
4022 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4023 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4025 tree t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
4026 build_int_cst (NULL_TREE, 2 * UNITS_PER_WORD - 1));
4027 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
4028 build_int_cst (NULL_TREE, -2 * UNITS_PER_WORD));
4029 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
4030 gimplify_and_add (t, pre_p);
4033 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4036 /* Return 1 if function return value returned in memory. Return 0 if it is
4037 in a register. */
4039 static bool
4040 ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
4042 enum machine_mode mode;
4043 enum machine_mode hfa_mode;
4044 HOST_WIDE_INT byte_size;
4046 mode = TYPE_MODE (valtype);
4047 byte_size = GET_MODE_SIZE (mode);
4048 if (mode == BLKmode)
4050 byte_size = int_size_in_bytes (valtype);
4051 if (byte_size < 0)
4052 return true;
4055 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4057 hfa_mode = hfa_element_mode (valtype, 0);
4058 if (hfa_mode != VOIDmode)
4060 int hfa_size = GET_MODE_SIZE (hfa_mode);
4062 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4063 return true;
4064 else
4065 return false;
4067 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4068 return true;
4069 else
4070 return false;
4073 /* Return rtx for register that holds the function return value. */
4076 ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
4078 enum machine_mode mode;
4079 enum machine_mode hfa_mode;
4081 mode = TYPE_MODE (valtype);
4082 hfa_mode = hfa_element_mode (valtype, 0);
4084 if (hfa_mode != VOIDmode)
4086 rtx loc[8];
4087 int i;
4088 int hfa_size;
4089 int byte_size;
4090 int offset;
4092 hfa_size = GET_MODE_SIZE (hfa_mode);
4093 byte_size = ((mode == BLKmode)
4094 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4095 offset = 0;
4096 for (i = 0; offset < byte_size; i++)
4098 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4099 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4100 GEN_INT (offset));
4101 offset += hfa_size;
4103 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4105 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4106 return gen_rtx_REG (mode, FR_ARG_FIRST);
4107 else
4109 bool need_parallel = false;
4111 /* In big-endian mode, we need to manage the layout of aggregates
4112 in the registers so that we get the bits properly aligned in
4113 the highpart of the registers. */
4114 if (BYTES_BIG_ENDIAN
4115 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4116 need_parallel = true;
4118 /* Something like struct S { long double x; char a[0] } is not an
4119 HFA structure, and therefore doesn't go in fp registers. But
4120 the middle-end will give it XFmode anyway, and XFmode values
4121 don't normally fit in integer registers. So we need to smuggle
4122 the value inside a parallel. */
4123 else if (mode == XFmode || mode == XCmode)
4124 need_parallel = true;
4126 if (need_parallel)
4128 rtx loc[8];
4129 int offset;
4130 int bytesize;
4131 int i;
4133 offset = 0;
4134 bytesize = int_size_in_bytes (valtype);
4135 /* An empty PARALLEL is invalid here, but the return value
4136 doesn't matter for empty structs. */
4137 if (bytesize == 0)
4138 return gen_rtx_REG (mode, GR_RET_FIRST);
4139 for (i = 0; offset < bytesize; i++)
4141 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4142 gen_rtx_REG (DImode,
4143 GR_RET_FIRST + i),
4144 GEN_INT (offset));
4145 offset += UNITS_PER_WORD;
4147 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4150 return gen_rtx_REG (mode, GR_RET_FIRST);
4154 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
4155 We need to emit DTP-relative relocations. */
4157 void
4158 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4160 gcc_assert (size == 8);
4161 fputs ("\tdata8.ua\t@dtprel(", file);
4162 output_addr_const (file, x);
4163 fputs (")", file);
4166 /* Print a memory address as an operand to reference that memory location. */
4168 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4169 also call this from ia64_print_operand for memory addresses. */
4171 void
4172 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4173 rtx address ATTRIBUTE_UNUSED)
4177 /* Print an operand to an assembler instruction.
4178 C Swap and print a comparison operator.
4179 D Print an FP comparison operator.
4180 E Print 32 - constant, for SImode shifts as extract.
4181 e Print 64 - constant, for DImode rotates.
4182 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4183 a floating point register emitted normally.
4184 I Invert a predicate register by adding 1.
4185 J Select the proper predicate register for a condition.
4186 j Select the inverse predicate register for a condition.
4187 O Append .acq for volatile load.
4188 P Postincrement of a MEM.
4189 Q Append .rel for volatile store.
4190 S Shift amount for shladd instruction.
4191 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4192 for Intel assembler.
4193 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4194 for Intel assembler.
4195 r Print register name, or constant 0 as r0. HP compatibility for
4196 Linux kernel.
4197 v Print vector constant value as an 8-byte integer value. */
4199 void
4200 ia64_print_operand (FILE * file, rtx x, int code)
4202 const char *str;
4204 switch (code)
4206 case 0:
4207 /* Handled below. */
4208 break;
4210 case 'C':
4212 enum rtx_code c = swap_condition (GET_CODE (x));
4213 fputs (GET_RTX_NAME (c), file);
4214 return;
4217 case 'D':
4218 switch (GET_CODE (x))
4220 case NE:
4221 str = "neq";
4222 break;
4223 case UNORDERED:
4224 str = "unord";
4225 break;
4226 case ORDERED:
4227 str = "ord";
4228 break;
4229 default:
4230 str = GET_RTX_NAME (GET_CODE (x));
4231 break;
4233 fputs (str, file);
4234 return;
4236 case 'E':
4237 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4238 return;
4240 case 'e':
4241 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4242 return;
4244 case 'F':
4245 if (x == CONST0_RTX (GET_MODE (x)))
4246 str = reg_names [FR_REG (0)];
4247 else if (x == CONST1_RTX (GET_MODE (x)))
4248 str = reg_names [FR_REG (1)];
4249 else
4251 gcc_assert (GET_CODE (x) == REG);
4252 str = reg_names [REGNO (x)];
4254 fputs (str, file);
4255 return;
4257 case 'I':
4258 fputs (reg_names [REGNO (x) + 1], file);
4259 return;
4261 case 'J':
4262 case 'j':
4264 unsigned int regno = REGNO (XEXP (x, 0));
4265 if (GET_CODE (x) == EQ)
4266 regno += 1;
4267 if (code == 'j')
4268 regno ^= 1;
4269 fputs (reg_names [regno], file);
4271 return;
4273 case 'O':
4274 if (MEM_VOLATILE_P (x))
4275 fputs(".acq", file);
4276 return;
4278 case 'P':
4280 HOST_WIDE_INT value;
4282 switch (GET_CODE (XEXP (x, 0)))
4284 default:
4285 return;
4287 case POST_MODIFY:
4288 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4289 if (GET_CODE (x) == CONST_INT)
4290 value = INTVAL (x);
4291 else
4293 gcc_assert (GET_CODE (x) == REG);
4294 fprintf (file, ", %s", reg_names[REGNO (x)]);
4295 return;
4297 break;
4299 case POST_INC:
4300 value = GET_MODE_SIZE (GET_MODE (x));
4301 break;
4303 case POST_DEC:
4304 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4305 break;
4308 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4309 return;
4312 case 'Q':
4313 if (MEM_VOLATILE_P (x))
4314 fputs(".rel", file);
4315 return;
4317 case 'S':
4318 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4319 return;
4321 case 'T':
4322 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4324 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4325 return;
4327 break;
4329 case 'U':
4330 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4332 const char *prefix = "0x";
4333 if (INTVAL (x) & 0x80000000)
4335 fprintf (file, "0xffffffff");
4336 prefix = "";
4338 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4339 return;
4341 break;
4343 case 'r':
4344 /* If this operand is the constant zero, write it as register zero.
4345 Any register, zero, or CONST_INT value is OK here. */
4346 if (GET_CODE (x) == REG)
4347 fputs (reg_names[REGNO (x)], file);
4348 else if (x == CONST0_RTX (GET_MODE (x)))
4349 fputs ("r0", file);
4350 else if (GET_CODE (x) == CONST_INT)
4351 output_addr_const (file, x);
4352 else
4353 output_operand_lossage ("invalid %%r value");
4354 return;
4356 case 'v':
4357 gcc_assert (GET_CODE (x) == CONST_VECTOR);
4358 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
4359 break;
4361 case '+':
4363 const char *which;
4365 /* For conditional branches, returns or calls, substitute
4366 sptk, dptk, dpnt, or spnt for %s. */
4367 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4368 if (x)
4370 int pred_val = INTVAL (XEXP (x, 0));
4372 /* Guess top and bottom 10% statically predicted. */
4373 if (pred_val < REG_BR_PROB_BASE / 50)
4374 which = ".spnt";
4375 else if (pred_val < REG_BR_PROB_BASE / 2)
4376 which = ".dpnt";
4377 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
4378 which = ".dptk";
4379 else
4380 which = ".sptk";
4382 else if (GET_CODE (current_output_insn) == CALL_INSN)
4383 which = ".sptk";
4384 else
4385 which = ".dptk";
4387 fputs (which, file);
4388 return;
4391 case ',':
4392 x = current_insn_predicate;
4393 if (x)
4395 unsigned int regno = REGNO (XEXP (x, 0));
4396 if (GET_CODE (x) == EQ)
4397 regno += 1;
4398 fprintf (file, "(%s) ", reg_names [regno]);
4400 return;
4402 default:
4403 output_operand_lossage ("ia64_print_operand: unknown code");
4404 return;
4407 switch (GET_CODE (x))
4409 /* This happens for the spill/restore instructions. */
4410 case POST_INC:
4411 case POST_DEC:
4412 case POST_MODIFY:
4413 x = XEXP (x, 0);
4414 /* ... fall through ... */
4416 case REG:
4417 fputs (reg_names [REGNO (x)], file);
4418 break;
4420 case MEM:
4422 rtx addr = XEXP (x, 0);
4423 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
4424 addr = XEXP (addr, 0);
4425 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4426 break;
4429 default:
4430 output_addr_const (file, x);
4431 break;
4434 return;
4437 /* Compute a (partial) cost for rtx X. Return true if the complete
4438 cost has been computed, and false if subexpressions should be
4439 scanned. In either case, *TOTAL contains the cost result. */
4440 /* ??? This is incomplete. */
4442 static bool
4443 ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
4445 switch (code)
4447 case CONST_INT:
4448 switch (outer_code)
4450 case SET:
4451 *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4452 return true;
4453 case PLUS:
4454 if (CONST_OK_FOR_I (INTVAL (x)))
4455 *total = 0;
4456 else if (CONST_OK_FOR_J (INTVAL (x)))
4457 *total = 1;
4458 else
4459 *total = COSTS_N_INSNS (1);
4460 return true;
4461 default:
4462 if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4463 *total = 0;
4464 else
4465 *total = COSTS_N_INSNS (1);
4466 return true;
4469 case CONST_DOUBLE:
4470 *total = COSTS_N_INSNS (1);
4471 return true;
4473 case CONST:
4474 case SYMBOL_REF:
4475 case LABEL_REF:
4476 *total = COSTS_N_INSNS (3);
4477 return true;
4479 case MULT:
4480 /* For multiplies wider than HImode, we have to go to the FPU,
4481 which normally involves copies. Plus there's the latency
4482 of the multiply itself, and the latency of the instructions to
4483 transfer integer regs to FP regs. */
4484 /* ??? Check for FP mode. */
4485 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4486 *total = COSTS_N_INSNS (10);
4487 else
4488 *total = COSTS_N_INSNS (2);
4489 return true;
4491 case PLUS:
4492 case MINUS:
4493 case ASHIFT:
4494 case ASHIFTRT:
4495 case LSHIFTRT:
4496 *total = COSTS_N_INSNS (1);
4497 return true;
4499 case DIV:
4500 case UDIV:
4501 case MOD:
4502 case UMOD:
4503 /* We make divide expensive, so that divide-by-constant will be
4504 optimized to a multiply. */
4505 *total = COSTS_N_INSNS (60);
4506 return true;
4508 default:
4509 return false;
4513 /* Calculate the cost of moving data from a register in class FROM to
4514 one in class TO, using MODE. */
4517 ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
4518 enum reg_class to)
4520 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4521 if (to == ADDL_REGS)
4522 to = GR_REGS;
4523 if (from == ADDL_REGS)
4524 from = GR_REGS;
4526 /* All costs are symmetric, so reduce cases by putting the
4527 lower number class as the destination. */
4528 if (from < to)
4530 enum reg_class tmp = to;
4531 to = from, from = tmp;
4534 /* Moving from FR<->GR in XFmode must be more expensive than 2,
4535 so that we get secondary memory reloads. Between FR_REGS,
4536 we have to make this at least as expensive as MEMORY_MOVE_COST
4537 to avoid spectacularly poor register class preferencing. */
4538 if (mode == XFmode)
4540 if (to != GR_REGS || from != GR_REGS)
4541 return MEMORY_MOVE_COST (mode, to, 0);
4542 else
4543 return 3;
4546 switch (to)
4548 case PR_REGS:
4549 /* Moving between PR registers takes two insns. */
4550 if (from == PR_REGS)
4551 return 3;
4552 /* Moving between PR and anything but GR is impossible. */
4553 if (from != GR_REGS)
4554 return MEMORY_MOVE_COST (mode, to, 0);
4555 break;
4557 case BR_REGS:
4558 /* Moving between BR and anything but GR is impossible. */
4559 if (from != GR_REGS && from != GR_AND_BR_REGS)
4560 return MEMORY_MOVE_COST (mode, to, 0);
4561 break;
4563 case AR_I_REGS:
4564 case AR_M_REGS:
4565 /* Moving between AR and anything but GR is impossible. */
4566 if (from != GR_REGS)
4567 return MEMORY_MOVE_COST (mode, to, 0);
4568 break;
4570 case GR_REGS:
4571 case FR_REGS:
4572 case GR_AND_FR_REGS:
4573 case GR_AND_BR_REGS:
4574 case ALL_REGS:
4575 break;
4577 default:
4578 gcc_unreachable ();
4581 return 2;
4584 /* Implement PREFERRED_RELOAD_CLASS. Place additional restrictions on CLASS
4585 to use when copying X into that class. */
4587 enum reg_class
4588 ia64_preferred_reload_class (rtx x, enum reg_class class)
4590 switch (class)
4592 case FR_REGS:
4593 /* Don't allow volatile mem reloads into floating point registers.
4594 This is defined to force reload to choose the r/m case instead
4595 of the f/f case when reloading (set (reg fX) (mem/v)). */
4596 if (MEM_P (x) && MEM_VOLATILE_P (x))
4597 return NO_REGS;
4599 /* Force all unrecognized constants into the constant pool. */
4600 if (CONSTANT_P (x))
4601 return NO_REGS;
4602 break;
4604 case AR_M_REGS:
4605 case AR_I_REGS:
4606 if (!OBJECT_P (x))
4607 return NO_REGS;
4608 break;
4610 default:
4611 break;
4614 return class;
4617 /* This function returns the register class required for a secondary
4618 register when copying between one of the registers in CLASS, and X,
4619 using MODE. A return value of NO_REGS means that no secondary register
4620 is required. */
4622 enum reg_class
4623 ia64_secondary_reload_class (enum reg_class class,
4624 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
4626 int regno = -1;
4628 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4629 regno = true_regnum (x);
4631 switch (class)
4633 case BR_REGS:
4634 case AR_M_REGS:
4635 case AR_I_REGS:
4636 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4637 interaction. We end up with two pseudos with overlapping lifetimes
4638 both of which are equiv to the same constant, and both which need
4639 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
4640 changes depending on the path length, which means the qty_first_reg
4641 check in make_regs_eqv can give different answers at different times.
4642 At some point I'll probably need a reload_indi pattern to handle
4643 this.
4645 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4646 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
4647 non-general registers for good measure. */
4648 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4649 return GR_REGS;
4651 /* This is needed if a pseudo used as a call_operand gets spilled to a
4652 stack slot. */
4653 if (GET_CODE (x) == MEM)
4654 return GR_REGS;
4655 break;
4657 case FR_REGS:
4658 /* Need to go through general registers to get to other class regs. */
4659 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4660 return GR_REGS;
4662 /* This can happen when a paradoxical subreg is an operand to the
4663 muldi3 pattern. */
4664 /* ??? This shouldn't be necessary after instruction scheduling is
4665 enabled, because paradoxical subregs are not accepted by
4666 register_operand when INSN_SCHEDULING is defined. Or alternatively,
4667 stop the paradoxical subreg stupidity in the *_operand functions
4668 in recog.c. */
4669 if (GET_CODE (x) == MEM
4670 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4671 || GET_MODE (x) == QImode))
4672 return GR_REGS;
4674 /* This can happen because of the ior/and/etc patterns that accept FP
4675 registers as operands. If the third operand is a constant, then it
4676 needs to be reloaded into a FP register. */
4677 if (GET_CODE (x) == CONST_INT)
4678 return GR_REGS;
4680 /* This can happen because of register elimination in a muldi3 insn.
4681 E.g. `26107 * (unsigned long)&u'. */
4682 if (GET_CODE (x) == PLUS)
4683 return GR_REGS;
4684 break;
4686 case PR_REGS:
4687 /* ??? This happens if we cse/gcse a BImode value across a call,
4688 and the function has a nonlocal goto. This is because global
4689 does not allocate call crossing pseudos to hard registers when
4690 current_function_has_nonlocal_goto is true. This is relatively
4691 common for C++ programs that use exceptions. To reproduce,
4692 return NO_REGS and compile libstdc++. */
4693 if (GET_CODE (x) == MEM)
4694 return GR_REGS;
4696 /* This can happen when we take a BImode subreg of a DImode value,
4697 and that DImode value winds up in some non-GR register. */
4698 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4699 return GR_REGS;
4700 break;
4702 default:
4703 break;
4706 return NO_REGS;
4710 /* Emit text to declare externally defined variables and functions, because
4711 the Intel assembler does not support undefined externals. */
4713 void
4714 ia64_asm_output_external (FILE *file, tree decl, const char *name)
4716 int save_referenced;
4718 /* GNU as does not need anything here, but the HP linker does need
4719 something for external functions. */
4721 if (TARGET_GNU_AS
4722 && (!TARGET_HPUX_LD
4723 || TREE_CODE (decl) != FUNCTION_DECL
4724 || strstr (name, "__builtin_") == name))
4725 return;
4727 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4728 the linker when we do this, so we need to be careful not to do this for
4729 builtin functions which have no library equivalent. Unfortunately, we
4730 can't tell here whether or not a function will actually be called by
4731 expand_expr, so we pull in library functions even if we may not need
4732 them later. */
4733 if (! strcmp (name, "__builtin_next_arg")
4734 || ! strcmp (name, "alloca")
4735 || ! strcmp (name, "__builtin_constant_p")
4736 || ! strcmp (name, "__builtin_args_info"))
4737 return;
4739 if (TARGET_HPUX_LD)
4740 ia64_hpux_add_extern_decl (decl);
4741 else
4743 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4744 restore it. */
4745 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4746 if (TREE_CODE (decl) == FUNCTION_DECL)
4747 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4748 (*targetm.asm_out.globalize_label) (file, name);
4749 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4753 /* Parse the -mfixed-range= option string. */
4755 static void
4756 fix_range (const char *const_str)
4758 int i, first, last;
4759 char *str, *dash, *comma;
4761 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4762 REG2 are either register names or register numbers. The effect
4763 of this option is to mark the registers in the range from REG1 to
4764 REG2 as ``fixed'' so they won't be used by the compiler. This is
4765 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4767 i = strlen (const_str);
4768 str = (char *) alloca (i + 1);
4769 memcpy (str, const_str, i + 1);
4771 while (1)
4773 dash = strchr (str, '-');
4774 if (!dash)
4776 warning (0, "value of -mfixed-range must have form REG1-REG2");
4777 return;
4779 *dash = '\0';
4781 comma = strchr (dash + 1, ',');
4782 if (comma)
4783 *comma = '\0';
4785 first = decode_reg_name (str);
4786 if (first < 0)
4788 warning (0, "unknown register name: %s", str);
4789 return;
4792 last = decode_reg_name (dash + 1);
4793 if (last < 0)
4795 warning (0, "unknown register name: %s", dash + 1);
4796 return;
4799 *dash = '-';
4801 if (first > last)
4803 warning (0, "%s-%s is an empty range", str, dash + 1);
4804 return;
4807 for (i = first; i <= last; ++i)
4808 fixed_regs[i] = call_used_regs[i] = 1;
4810 if (!comma)
4811 break;
4813 *comma = ',';
4814 str = comma + 1;
4818 /* Implement TARGET_HANDLE_OPTION. */
4820 static bool
4821 ia64_handle_option (size_t code, const char *arg, int value)
4823 switch (code)
4825 case OPT_mfixed_range_:
4826 fix_range (arg);
4827 return true;
4829 case OPT_mtls_size_:
4830 if (value != 14 && value != 22 && value != 64)
4831 error ("bad value %<%s%> for -mtls-size= switch", arg);
4832 return true;
4834 case OPT_mtune_:
4836 static struct pta
4838 const char *name; /* processor name or nickname. */
4839 enum processor_type processor;
4841 const processor_alias_table[] =
4843 {"itanium", PROCESSOR_ITANIUM},
4844 {"itanium1", PROCESSOR_ITANIUM},
4845 {"merced", PROCESSOR_ITANIUM},
4846 {"itanium2", PROCESSOR_ITANIUM2},
4847 {"mckinley", PROCESSOR_ITANIUM2},
4849 int const pta_size = ARRAY_SIZE (processor_alias_table);
4850 int i;
4852 for (i = 0; i < pta_size; i++)
4853 if (!strcmp (arg, processor_alias_table[i].name))
4855 ia64_tune = processor_alias_table[i].processor;
4856 break;
4858 if (i == pta_size)
4859 error ("bad value %<%s%> for -mtune= switch", arg);
4860 return true;
4863 default:
4864 return true;
4868 /* Implement OVERRIDE_OPTIONS. */
4870 void
4871 ia64_override_options (void)
4873 if (TARGET_AUTO_PIC)
4874 target_flags |= MASK_CONST_GP;
4876 if (TARGET_INLINE_SQRT == INL_MIN_LAT)
4878 warning (0, "not yet implemented: latency-optimized inline square root");
4879 TARGET_INLINE_SQRT = INL_MAX_THR;
4882 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4883 flag_schedule_insns_after_reload = 0;
4885 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4887 init_machine_status = ia64_init_machine_status;
4890 static struct machine_function *
4891 ia64_init_machine_status (void)
4893 return ggc_alloc_cleared (sizeof (struct machine_function));
4896 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
4897 static enum attr_type ia64_safe_type (rtx);
4899 static enum attr_itanium_class
4900 ia64_safe_itanium_class (rtx insn)
4902 if (recog_memoized (insn) >= 0)
4903 return get_attr_itanium_class (insn);
4904 else
4905 return ITANIUM_CLASS_UNKNOWN;
4908 static enum attr_type
4909 ia64_safe_type (rtx insn)
4911 if (recog_memoized (insn) >= 0)
4912 return get_attr_type (insn);
4913 else
4914 return TYPE_UNKNOWN;
4917 /* The following collection of routines emit instruction group stop bits as
4918 necessary to avoid dependencies. */
4920 /* Need to track some additional registers as far as serialization is
4921 concerned so we can properly handle br.call and br.ret. We could
4922 make these registers visible to gcc, but since these registers are
4923 never explicitly used in gcc generated code, it seems wasteful to
4924 do so (plus it would make the call and return patterns needlessly
4925 complex). */
4926 #define REG_RP (BR_REG (0))
4927 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4928 /* This is used for volatile asms which may require a stop bit immediately
4929 before and after them. */
4930 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4931 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4932 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4934 /* For each register, we keep track of how it has been written in the
4935 current instruction group.
4937 If a register is written unconditionally (no qualifying predicate),
4938 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4940 If a register is written if its qualifying predicate P is true, we
4941 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4942 may be written again by the complement of P (P^1) and when this happens,
4943 WRITE_COUNT gets set to 2.
4945 The result of this is that whenever an insn attempts to write a register
4946 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4948 If a predicate register is written by a floating-point insn, we set
4949 WRITTEN_BY_FP to true.
4951 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4952 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4954 struct reg_write_state
4956 unsigned int write_count : 2;
4957 unsigned int first_pred : 16;
4958 unsigned int written_by_fp : 1;
4959 unsigned int written_by_and : 1;
4960 unsigned int written_by_or : 1;
4963 /* Cumulative info for the current instruction group. */
4964 struct reg_write_state rws_sum[NUM_REGS];
4965 /* Info for the current instruction. This gets copied to rws_sum after a
4966 stop bit is emitted. */
4967 struct reg_write_state rws_insn[NUM_REGS];
4969 /* Indicates whether this is the first instruction after a stop bit,
4970 in which case we don't need another stop bit. Without this,
4971 ia64_variable_issue will die when scheduling an alloc. */
4972 static int first_instruction;
4974 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4975 RTL for one instruction. */
4976 struct reg_flags
4978 unsigned int is_write : 1; /* Is register being written? */
4979 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4980 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4981 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4982 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4983 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4986 static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
4987 static int rws_access_regno (int, struct reg_flags, int);
4988 static int rws_access_reg (rtx, struct reg_flags, int);
4989 static void update_set_flags (rtx, struct reg_flags *);
4990 static int set_src_needs_barrier (rtx, struct reg_flags, int);
4991 static int rtx_needs_barrier (rtx, struct reg_flags, int);
4992 static void init_insn_group_barriers (void);
4993 static int group_barrier_needed (rtx);
4994 static int safe_group_barrier_needed (rtx);
4996 /* Update *RWS for REGNO, which is being written by the current instruction,
4997 with predicate PRED, and associated register flags in FLAGS. */
4999 static void
5000 rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
5002 if (pred)
5003 rws[regno].write_count++;
5004 else
5005 rws[regno].write_count = 2;
5006 rws[regno].written_by_fp |= flags.is_fp;
5007 /* ??? Not tracking and/or across differing predicates. */
5008 rws[regno].written_by_and = flags.is_and;
5009 rws[regno].written_by_or = flags.is_or;
5010 rws[regno].first_pred = pred;
5013 /* Handle an access to register REGNO of type FLAGS using predicate register
5014 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
5015 a dependency with an earlier instruction in the same group. */
5017 static int
5018 rws_access_regno (int regno, struct reg_flags flags, int pred)
5020 int need_barrier = 0;
5022 gcc_assert (regno < NUM_REGS);
5024 if (! PR_REGNO_P (regno))
5025 flags.is_and = flags.is_or = 0;
5027 if (flags.is_write)
5029 int write_count;
5031 /* One insn writes same reg multiple times? */
5032 gcc_assert (!rws_insn[regno].write_count);
5034 /* Update info for current instruction. */
5035 rws_update (rws_insn, regno, flags, pred);
5036 write_count = rws_sum[regno].write_count;
5038 switch (write_count)
5040 case 0:
5041 /* The register has not been written yet. */
5042 rws_update (rws_sum, regno, flags, pred);
5043 break;
5045 case 1:
5046 /* The register has been written via a predicate. If this is
5047 not a complementary predicate, then we need a barrier. */
5048 /* ??? This assumes that P and P+1 are always complementary
5049 predicates for P even. */
5050 if (flags.is_and && rws_sum[regno].written_by_and)
5052 else if (flags.is_or && rws_sum[regno].written_by_or)
5054 else if ((rws_sum[regno].first_pred ^ 1) != pred)
5055 need_barrier = 1;
5056 rws_update (rws_sum, regno, flags, pred);
5057 break;
5059 case 2:
5060 /* The register has been unconditionally written already. We
5061 need a barrier. */
5062 if (flags.is_and && rws_sum[regno].written_by_and)
5064 else if (flags.is_or && rws_sum[regno].written_by_or)
5066 else
5067 need_barrier = 1;
5068 rws_sum[regno].written_by_and = flags.is_and;
5069 rws_sum[regno].written_by_or = flags.is_or;
5070 break;
5072 default:
5073 gcc_unreachable ();
5076 else
5078 if (flags.is_branch)
5080 /* Branches have several RAW exceptions that allow to avoid
5081 barriers. */
5083 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
5084 /* RAW dependencies on branch regs are permissible as long
5085 as the writer is a non-branch instruction. Since we
5086 never generate code that uses a branch register written
5087 by a branch instruction, handling this case is
5088 easy. */
5089 return 0;
5091 if (REGNO_REG_CLASS (regno) == PR_REGS
5092 && ! rws_sum[regno].written_by_fp)
5093 /* The predicates of a branch are available within the
5094 same insn group as long as the predicate was written by
5095 something other than a floating-point instruction. */
5096 return 0;
5099 if (flags.is_and && rws_sum[regno].written_by_and)
5100 return 0;
5101 if (flags.is_or && rws_sum[regno].written_by_or)
5102 return 0;
5104 switch (rws_sum[regno].write_count)
5106 case 0:
5107 /* The register has not been written yet. */
5108 break;
5110 case 1:
5111 /* The register has been written via a predicate. If this is
5112 not a complementary predicate, then we need a barrier. */
5113 /* ??? This assumes that P and P+1 are always complementary
5114 predicates for P even. */
5115 if ((rws_sum[regno].first_pred ^ 1) != pred)
5116 need_barrier = 1;
5117 break;
5119 case 2:
5120 /* The register has been unconditionally written already. We
5121 need a barrier. */
5122 need_barrier = 1;
5123 break;
5125 default:
5126 gcc_unreachable ();
5130 return need_barrier;
5133 static int
5134 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
5136 int regno = REGNO (reg);
5137 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5139 if (n == 1)
5140 return rws_access_regno (regno, flags, pred);
5141 else
5143 int need_barrier = 0;
5144 while (--n >= 0)
5145 need_barrier |= rws_access_regno (regno + n, flags, pred);
5146 return need_barrier;
5150 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
5151 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
5153 static void
5154 update_set_flags (rtx x, struct reg_flags *pflags)
5156 rtx src = SET_SRC (x);
5158 switch (GET_CODE (src))
5160 case CALL:
5161 return;
5163 case IF_THEN_ELSE:
5164 if (SET_DEST (x) == pc_rtx)
5165 /* X is a conditional branch. */
5166 return;
5167 else
5169 /* X is a conditional move. */
5170 rtx cond = XEXP (src, 0);
5171 cond = XEXP (cond, 0);
5173 /* We always split conditional moves into COND_EXEC patterns, so the
5174 only pattern that can reach here is doloop_end_internal. We don't
5175 need to do anything special for this pattern. */
5176 gcc_assert (GET_CODE (cond) == REG && REGNO (cond) == AR_LC_REGNUM);
5177 return;
5180 default:
5181 if (COMPARISON_P (src)
5182 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
5183 /* Set pflags->is_fp to 1 so that we know we're dealing
5184 with a floating point comparison when processing the
5185 destination of the SET. */
5186 pflags->is_fp = 1;
5188 /* Discover if this is a parallel comparison. We only handle
5189 and.orcm and or.andcm at present, since we must retain a
5190 strict inverse on the predicate pair. */
5191 else if (GET_CODE (src) == AND)
5192 pflags->is_and = 1;
5193 else if (GET_CODE (src) == IOR)
5194 pflags->is_or = 1;
5196 break;
5200 /* Subroutine of rtx_needs_barrier; this function determines whether the
5201 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5202 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5203 for this insn. */
5205 static int
5206 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
5208 int need_barrier = 0;
5209 rtx dst;
5210 rtx src = SET_SRC (x);
5212 if (GET_CODE (src) == CALL)
5213 /* We don't need to worry about the result registers that
5214 get written by subroutine call. */
5215 return rtx_needs_barrier (src, flags, pred);
5216 else if (SET_DEST (x) == pc_rtx)
5218 /* X is a conditional branch. */
5219 /* ??? This seems redundant, as the caller sets this bit for
5220 all JUMP_INSNs. */
5221 flags.is_branch = 1;
5222 return rtx_needs_barrier (src, flags, pred);
5225 need_barrier = rtx_needs_barrier (src, flags, pred);
5227 dst = SET_DEST (x);
5228 if (GET_CODE (dst) == ZERO_EXTRACT)
5230 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5231 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5232 dst = XEXP (dst, 0);
5234 return need_barrier;
5237 /* Handle an access to rtx X of type FLAGS using predicate register
5238 PRED. Return 1 if this access creates a dependency with an earlier
5239 instruction in the same group. */
5241 static int
5242 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5244 int i, j;
5245 int is_complemented = 0;
5246 int need_barrier = 0;
5247 const char *format_ptr;
5248 struct reg_flags new_flags;
5249 rtx cond;
5251 if (! x)
5252 return 0;
5254 new_flags = flags;
5256 switch (GET_CODE (x))
5258 case SET:
5259 update_set_flags (x, &new_flags);
5260 need_barrier = set_src_needs_barrier (x, new_flags, pred);
5261 if (GET_CODE (SET_SRC (x)) != CALL)
5263 new_flags.is_write = 1;
5264 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5266 break;
5268 case CALL:
5269 new_flags.is_write = 0;
5270 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5272 /* Avoid multiple register writes, in case this is a pattern with
5273 multiple CALL rtx. This avoids a failure in rws_access_reg. */
5274 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
5276 new_flags.is_write = 1;
5277 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5278 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5279 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5281 break;
5283 case COND_EXEC:
5284 /* X is a predicated instruction. */
5286 cond = COND_EXEC_TEST (x);
5287 gcc_assert (!pred);
5288 need_barrier = rtx_needs_barrier (cond, flags, 0);
5290 if (GET_CODE (cond) == EQ)
5291 is_complemented = 1;
5292 cond = XEXP (cond, 0);
5293 gcc_assert (GET_CODE (cond) == REG
5294 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
5295 pred = REGNO (cond);
5296 if (is_complemented)
5297 ++pred;
5299 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5300 return need_barrier;
5302 case CLOBBER:
5303 case USE:
5304 /* Clobber & use are for earlier compiler-phases only. */
5305 break;
5307 case ASM_OPERANDS:
5308 case ASM_INPUT:
5309 /* We always emit stop bits for traditional asms. We emit stop bits
5310 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
5311 if (GET_CODE (x) != ASM_OPERANDS
5312 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5314 /* Avoid writing the register multiple times if we have multiple
5315 asm outputs. This avoids a failure in rws_access_reg. */
5316 if (! rws_insn[REG_VOLATILE].write_count)
5318 new_flags.is_write = 1;
5319 rws_access_regno (REG_VOLATILE, new_flags, pred);
5321 return 1;
5324 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5325 We cannot just fall through here since then we would be confused
5326 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5327 traditional asms unlike their normal usage. */
5329 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5330 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5331 need_barrier = 1;
5332 break;
5334 case PARALLEL:
5335 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5337 rtx pat = XVECEXP (x, 0, i);
5338 switch (GET_CODE (pat))
5340 case SET:
5341 update_set_flags (pat, &new_flags);
5342 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
5343 break;
5345 case USE:
5346 case CALL:
5347 case ASM_OPERANDS:
5348 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5349 break;
5351 case CLOBBER:
5352 case RETURN:
5353 break;
5355 default:
5356 gcc_unreachable ();
5359 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5361 rtx pat = XVECEXP (x, 0, i);
5362 if (GET_CODE (pat) == SET)
5364 if (GET_CODE (SET_SRC (pat)) != CALL)
5366 new_flags.is_write = 1;
5367 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5368 pred);
5371 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5372 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5374 break;
5376 case SUBREG:
5377 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
5378 break;
5379 case REG:
5380 if (REGNO (x) == AR_UNAT_REGNUM)
5382 for (i = 0; i < 64; ++i)
5383 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5385 else
5386 need_barrier = rws_access_reg (x, flags, pred);
5387 break;
5389 case MEM:
5390 /* Find the regs used in memory address computation. */
5391 new_flags.is_write = 0;
5392 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5393 break;
5395 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
5396 case SYMBOL_REF: case LABEL_REF: case CONST:
5397 break;
5399 /* Operators with side-effects. */
5400 case POST_INC: case POST_DEC:
5401 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5403 new_flags.is_write = 0;
5404 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5405 new_flags.is_write = 1;
5406 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5407 break;
5409 case POST_MODIFY:
5410 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5412 new_flags.is_write = 0;
5413 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5414 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5415 new_flags.is_write = 1;
5416 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5417 break;
5419 /* Handle common unary and binary ops for efficiency. */
5420 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
5421 case MOD: case UDIV: case UMOD: case AND: case IOR:
5422 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
5423 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
5424 case NE: case EQ: case GE: case GT: case LE:
5425 case LT: case GEU: case GTU: case LEU: case LTU:
5426 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5427 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5428 break;
5430 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
5431 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
5432 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
5433 case SQRT: case FFS: case POPCOUNT:
5434 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5435 break;
5437 case VEC_SELECT:
5438 /* VEC_SELECT's second argument is a PARALLEL with integers that
5439 describe the elements selected. On ia64, those integers are
5440 always constants. Avoid walking the PARALLEL so that we don't
5441 get confused with "normal" parallels and then die. */
5442 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5443 break;
5445 case UNSPEC:
5446 switch (XINT (x, 1))
5448 case UNSPEC_LTOFF_DTPMOD:
5449 case UNSPEC_LTOFF_DTPREL:
5450 case UNSPEC_DTPREL:
5451 case UNSPEC_LTOFF_TPREL:
5452 case UNSPEC_TPREL:
5453 case UNSPEC_PRED_REL_MUTEX:
5454 case UNSPEC_PIC_CALL:
5455 case UNSPEC_MF:
5456 case UNSPEC_FETCHADD_ACQ:
5457 case UNSPEC_BSP_VALUE:
5458 case UNSPEC_FLUSHRS:
5459 case UNSPEC_BUNDLE_SELECTOR:
5460 break;
5462 case UNSPEC_GR_SPILL:
5463 case UNSPEC_GR_RESTORE:
5465 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5466 HOST_WIDE_INT bit = (offset >> 3) & 63;
5468 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5469 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
5470 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5471 new_flags, pred);
5472 break;
5475 case UNSPEC_FR_SPILL:
5476 case UNSPEC_FR_RESTORE:
5477 case UNSPEC_GETF_EXP:
5478 case UNSPEC_SETF_EXP:
5479 case UNSPEC_ADDP4:
5480 case UNSPEC_FR_SQRT_RECIP_APPROX:
5481 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5482 break;
5484 case UNSPEC_FR_RECIP_APPROX:
5485 case UNSPEC_SHRP:
5486 case UNSPEC_COPYSIGN:
5487 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5488 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5489 break;
5491 case UNSPEC_CMPXCHG_ACQ:
5492 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5493 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5494 break;
5496 default:
5497 gcc_unreachable ();
5499 break;
5501 case UNSPEC_VOLATILE:
5502 switch (XINT (x, 1))
5504 case UNSPECV_ALLOC:
5505 /* Alloc must always be the first instruction of a group.
5506 We force this by always returning true. */
5507 /* ??? We might get better scheduling if we explicitly check for
5508 input/local/output register dependencies, and modify the
5509 scheduler so that alloc is always reordered to the start of
5510 the current group. We could then eliminate all of the
5511 first_instruction code. */
5512 rws_access_regno (AR_PFS_REGNUM, flags, pred);
5514 new_flags.is_write = 1;
5515 rws_access_regno (REG_AR_CFM, new_flags, pred);
5516 return 1;
5518 case UNSPECV_SET_BSP:
5519 need_barrier = 1;
5520 break;
5522 case UNSPECV_BLOCKAGE:
5523 case UNSPECV_INSN_GROUP_BARRIER:
5524 case UNSPECV_BREAK:
5525 case UNSPECV_PSAC_ALL:
5526 case UNSPECV_PSAC_NORMAL:
5527 return 0;
5529 default:
5530 gcc_unreachable ();
5532 break;
5534 case RETURN:
5535 new_flags.is_write = 0;
5536 need_barrier = rws_access_regno (REG_RP, flags, pred);
5537 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5539 new_flags.is_write = 1;
5540 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5541 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5542 break;
5544 default:
5545 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5546 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5547 switch (format_ptr[i])
5549 case '0': /* unused field */
5550 case 'i': /* integer */
5551 case 'n': /* note */
5552 case 'w': /* wide integer */
5553 case 's': /* pointer to string */
5554 case 'S': /* optional pointer to string */
5555 break;
5557 case 'e':
5558 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5559 need_barrier = 1;
5560 break;
5562 case 'E':
5563 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5564 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5565 need_barrier = 1;
5566 break;
5568 default:
5569 gcc_unreachable ();
5571 break;
5573 return need_barrier;
5576 /* Clear out the state for group_barrier_needed at the start of a
5577 sequence of insns. */
5579 static void
5580 init_insn_group_barriers (void)
5582 memset (rws_sum, 0, sizeof (rws_sum));
5583 first_instruction = 1;
5586 /* Given the current state, determine whether a group barrier (a stop bit) is
5587 necessary before INSN. Return nonzero if so. This modifies the state to
5588 include the effects of INSN as a side-effect. */
5590 static int
5591 group_barrier_needed (rtx insn)
5593 rtx pat;
5594 int need_barrier = 0;
5595 struct reg_flags flags;
5597 memset (&flags, 0, sizeof (flags));
5598 switch (GET_CODE (insn))
5600 case NOTE:
5601 break;
5603 case BARRIER:
5604 /* A barrier doesn't imply an instruction group boundary. */
5605 break;
5607 case CODE_LABEL:
5608 memset (rws_insn, 0, sizeof (rws_insn));
5609 return 1;
5611 case CALL_INSN:
5612 flags.is_branch = 1;
5613 flags.is_sibcall = SIBLING_CALL_P (insn);
5614 memset (rws_insn, 0, sizeof (rws_insn));
5616 /* Don't bundle a call following another call. */
5617 if ((pat = prev_active_insn (insn))
5618 && GET_CODE (pat) == CALL_INSN)
5620 need_barrier = 1;
5621 break;
5624 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5625 break;
5627 case JUMP_INSN:
5628 flags.is_branch = 1;
5630 /* Don't bundle a jump following a call. */
5631 if ((pat = prev_active_insn (insn))
5632 && GET_CODE (pat) == CALL_INSN)
5634 need_barrier = 1;
5635 break;
5637 /* FALLTHRU */
5639 case INSN:
5640 if (GET_CODE (PATTERN (insn)) == USE
5641 || GET_CODE (PATTERN (insn)) == CLOBBER)
5642 /* Don't care about USE and CLOBBER "insns"---those are used to
5643 indicate to the optimizer that it shouldn't get rid of
5644 certain operations. */
5645 break;
5647 pat = PATTERN (insn);
5649 /* Ug. Hack hacks hacked elsewhere. */
5650 switch (recog_memoized (insn))
5652 /* We play dependency tricks with the epilogue in order
5653 to get proper schedules. Undo this for dv analysis. */
5654 case CODE_FOR_epilogue_deallocate_stack:
5655 case CODE_FOR_prologue_allocate_stack:
5656 pat = XVECEXP (pat, 0, 0);
5657 break;
5659 /* The pattern we use for br.cloop confuses the code above.
5660 The second element of the vector is representative. */
5661 case CODE_FOR_doloop_end_internal:
5662 pat = XVECEXP (pat, 0, 1);
5663 break;
5665 /* Doesn't generate code. */
5666 case CODE_FOR_pred_rel_mutex:
5667 case CODE_FOR_prologue_use:
5668 return 0;
5670 default:
5671 break;
5674 memset (rws_insn, 0, sizeof (rws_insn));
5675 need_barrier = rtx_needs_barrier (pat, flags, 0);
5677 /* Check to see if the previous instruction was a volatile
5678 asm. */
5679 if (! need_barrier)
5680 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5681 break;
5683 default:
5684 gcc_unreachable ();
5687 if (first_instruction && INSN_P (insn)
5688 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5689 && GET_CODE (PATTERN (insn)) != USE
5690 && GET_CODE (PATTERN (insn)) != CLOBBER)
5692 need_barrier = 0;
5693 first_instruction = 0;
5696 return need_barrier;
5699 /* Like group_barrier_needed, but do not clobber the current state. */
5701 static int
5702 safe_group_barrier_needed (rtx insn)
5704 struct reg_write_state rws_saved[NUM_REGS];
5705 int saved_first_instruction;
5706 int t;
5708 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5709 saved_first_instruction = first_instruction;
5711 t = group_barrier_needed (insn);
5713 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5714 first_instruction = saved_first_instruction;
5716 return t;
5719 /* Scan the current function and insert stop bits as necessary to
5720 eliminate dependencies. This function assumes that a final
5721 instruction scheduling pass has been run which has already
5722 inserted most of the necessary stop bits. This function only
5723 inserts new ones at basic block boundaries, since these are
5724 invisible to the scheduler. */
5726 static void
5727 emit_insn_group_barriers (FILE *dump)
5729 rtx insn;
5730 rtx last_label = 0;
5731 int insns_since_last_label = 0;
5733 init_insn_group_barriers ();
5735 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5737 if (GET_CODE (insn) == CODE_LABEL)
5739 if (insns_since_last_label)
5740 last_label = insn;
5741 insns_since_last_label = 0;
5743 else if (GET_CODE (insn) == NOTE
5744 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5746 if (insns_since_last_label)
5747 last_label = insn;
5748 insns_since_last_label = 0;
5750 else if (GET_CODE (insn) == INSN
5751 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5752 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5754 init_insn_group_barriers ();
5755 last_label = 0;
5757 else if (INSN_P (insn))
5759 insns_since_last_label = 1;
5761 if (group_barrier_needed (insn))
5763 if (last_label)
5765 if (dump)
5766 fprintf (dump, "Emitting stop before label %d\n",
5767 INSN_UID (last_label));
5768 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5769 insn = last_label;
5771 init_insn_group_barriers ();
5772 last_label = 0;
5779 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5780 This function has to emit all necessary group barriers. */
5782 static void
5783 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
5785 rtx insn;
5787 init_insn_group_barriers ();
5789 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5791 if (GET_CODE (insn) == BARRIER)
5793 rtx last = prev_active_insn (insn);
5795 if (! last)
5796 continue;
5797 if (GET_CODE (last) == JUMP_INSN
5798 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5799 last = prev_active_insn (last);
5800 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5801 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5803 init_insn_group_barriers ();
5805 else if (INSN_P (insn))
5807 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5808 init_insn_group_barriers ();
5809 else if (group_barrier_needed (insn))
5811 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5812 init_insn_group_barriers ();
5813 group_barrier_needed (insn);
5821 /* Instruction scheduling support. */
5823 #define NR_BUNDLES 10
5825 /* A list of names of all available bundles. */
5827 static const char *bundle_name [NR_BUNDLES] =
5829 ".mii",
5830 ".mmi",
5831 ".mfi",
5832 ".mmf",
5833 #if NR_BUNDLES == 10
5834 ".bbb",
5835 ".mbb",
5836 #endif
5837 ".mib",
5838 ".mmb",
5839 ".mfb",
5840 ".mlx"
5843 /* Nonzero if we should insert stop bits into the schedule. */
5845 int ia64_final_schedule = 0;
5847 /* Codes of the corresponding queried units: */
5849 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
5850 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
5852 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
5853 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
5855 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
5857 /* The following variable value is an insn group barrier. */
5859 static rtx dfa_stop_insn;
5861 /* The following variable value is the last issued insn. */
5863 static rtx last_scheduled_insn;
5865 /* The following variable value is size of the DFA state. */
5867 static size_t dfa_state_size;
5869 /* The following variable value is pointer to a DFA state used as
5870 temporary variable. */
5872 static state_t temp_dfa_state = NULL;
5874 /* The following variable value is DFA state after issuing the last
5875 insn. */
5877 static state_t prev_cycle_state = NULL;
5879 /* The following array element values are TRUE if the corresponding
5880 insn requires to add stop bits before it. */
5882 static char *stops_p;
5884 /* The following variable is used to set up the mentioned above array. */
5886 static int stop_before_p = 0;
5888 /* The following variable value is length of the arrays `clocks' and
5889 `add_cycles'. */
5891 static int clocks_length;
5893 /* The following array element values are cycles on which the
5894 corresponding insn will be issued. The array is used only for
5895 Itanium1. */
5897 static int *clocks;
5899 /* The following array element values are numbers of cycles should be
5900 added to improve insn scheduling for MM_insns for Itanium1. */
5902 static int *add_cycles;
5904 static rtx ia64_single_set (rtx);
5905 static void ia64_emit_insn_before (rtx, rtx);
5907 /* Map a bundle number to its pseudo-op. */
5909 const char *
5910 get_bundle_name (int b)
5912 return bundle_name[b];
5916 /* Return the maximum number of instructions a cpu can issue. */
5918 static int
5919 ia64_issue_rate (void)
5921 return 6;
5924 /* Helper function - like single_set, but look inside COND_EXEC. */
5926 static rtx
5927 ia64_single_set (rtx insn)
5929 rtx x = PATTERN (insn), ret;
5930 if (GET_CODE (x) == COND_EXEC)
5931 x = COND_EXEC_CODE (x);
5932 if (GET_CODE (x) == SET)
5933 return x;
5935 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5936 Although they are not classical single set, the second set is there just
5937 to protect it from moving past FP-relative stack accesses. */
5938 switch (recog_memoized (insn))
5940 case CODE_FOR_prologue_allocate_stack:
5941 case CODE_FOR_epilogue_deallocate_stack:
5942 ret = XVECEXP (x, 0, 0);
5943 break;
5945 default:
5946 ret = single_set_2 (insn, x);
5947 break;
5950 return ret;
5953 /* Adjust the cost of a scheduling dependency. Return the new cost of
5954 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5956 static int
5957 ia64_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
5959 enum attr_itanium_class dep_class;
5960 enum attr_itanium_class insn_class;
5962 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
5963 return cost;
5965 insn_class = ia64_safe_itanium_class (insn);
5966 dep_class = ia64_safe_itanium_class (dep_insn);
5967 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
5968 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
5969 return 0;
5971 return cost;
5974 /* Like emit_insn_before, but skip cycle_display notes.
5975 ??? When cycle display notes are implemented, update this. */
5977 static void
5978 ia64_emit_insn_before (rtx insn, rtx before)
5980 emit_insn_before (insn, before);
5983 /* The following function marks insns who produce addresses for load
5984 and store insns. Such insns will be placed into M slots because it
5985 decrease latency time for Itanium1 (see function
5986 `ia64_produce_address_p' and the DFA descriptions). */
5988 static void
5989 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
5991 rtx insn, link, next, next_tail;
5993 /* Before reload, which_alternative is not set, which means that
5994 ia64_safe_itanium_class will produce wrong results for (at least)
5995 move instructions. */
5996 if (!reload_completed)
5997 return;
5999 next_tail = NEXT_INSN (tail);
6000 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6001 if (INSN_P (insn))
6002 insn->call = 0;
6003 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6004 if (INSN_P (insn)
6005 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6007 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
6009 if (REG_NOTE_KIND (link) != REG_DEP_TRUE)
6010 continue;
6011 next = XEXP (link, 0);
6012 if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
6013 || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
6014 && ia64_st_address_bypass_p (insn, next))
6015 break;
6016 else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
6017 || ia64_safe_itanium_class (next)
6018 == ITANIUM_CLASS_FLD)
6019 && ia64_ld_address_bypass_p (insn, next))
6020 break;
6022 insn->call = link != 0;
6026 /* We're beginning a new block. Initialize data structures as necessary. */
6028 static void
6029 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6030 int sched_verbose ATTRIBUTE_UNUSED,
6031 int max_ready ATTRIBUTE_UNUSED)
6033 #ifdef ENABLE_CHECKING
6034 rtx insn;
6036 if (reload_completed)
6037 for (insn = NEXT_INSN (current_sched_info->prev_head);
6038 insn != current_sched_info->next_tail;
6039 insn = NEXT_INSN (insn))
6040 gcc_assert (!SCHED_GROUP_P (insn));
6041 #endif
6042 last_scheduled_insn = NULL_RTX;
6043 init_insn_group_barriers ();
6046 /* We are about to being issuing insns for this clock cycle.
6047 Override the default sort algorithm to better slot instructions. */
6049 static int
6050 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6051 int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
6052 int reorder_type)
6054 int n_asms;
6055 int n_ready = *pn_ready;
6056 rtx *e_ready = ready + n_ready;
6057 rtx *insnp;
6059 if (sched_verbose)
6060 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6062 if (reorder_type == 0)
6064 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6065 n_asms = 0;
6066 for (insnp = ready; insnp < e_ready; insnp++)
6067 if (insnp < e_ready)
6069 rtx insn = *insnp;
6070 enum attr_type t = ia64_safe_type (insn);
6071 if (t == TYPE_UNKNOWN)
6073 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6074 || asm_noperands (PATTERN (insn)) >= 0)
6076 rtx lowest = ready[n_asms];
6077 ready[n_asms] = insn;
6078 *insnp = lowest;
6079 n_asms++;
6081 else
6083 rtx highest = ready[n_ready - 1];
6084 ready[n_ready - 1] = insn;
6085 *insnp = highest;
6086 return 1;
6091 if (n_asms < n_ready)
6093 /* Some normal insns to process. Skip the asms. */
6094 ready += n_asms;
6095 n_ready -= n_asms;
6097 else if (n_ready > 0)
6098 return 1;
6101 if (ia64_final_schedule)
6103 int deleted = 0;
6104 int nr_need_stop = 0;
6106 for (insnp = ready; insnp < e_ready; insnp++)
6107 if (safe_group_barrier_needed (*insnp))
6108 nr_need_stop++;
6110 if (reorder_type == 1 && n_ready == nr_need_stop)
6111 return 0;
6112 if (reorder_type == 0)
6113 return 1;
6114 insnp = e_ready;
6115 /* Move down everything that needs a stop bit, preserving
6116 relative order. */
6117 while (insnp-- > ready + deleted)
6118 while (insnp >= ready + deleted)
6120 rtx insn = *insnp;
6121 if (! safe_group_barrier_needed (insn))
6122 break;
6123 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6124 *ready = insn;
6125 deleted++;
6127 n_ready -= deleted;
6128 ready += deleted;
6131 return 1;
6134 /* We are about to being issuing insns for this clock cycle. Override
6135 the default sort algorithm to better slot instructions. */
6137 static int
6138 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6139 int clock_var)
6141 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6142 pn_ready, clock_var, 0);
6145 /* Like ia64_sched_reorder, but called after issuing each insn.
6146 Override the default sort algorithm to better slot instructions. */
6148 static int
6149 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6150 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6151 int *pn_ready, int clock_var)
6153 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6154 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6155 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6156 clock_var, 1);
6159 /* We are about to issue INSN. Return the number of insns left on the
6160 ready queue that can be issued this cycle. */
6162 static int
6163 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6164 int sched_verbose ATTRIBUTE_UNUSED,
6165 rtx insn ATTRIBUTE_UNUSED,
6166 int can_issue_more ATTRIBUTE_UNUSED)
6168 last_scheduled_insn = insn;
6169 memcpy (prev_cycle_state, curr_state, dfa_state_size);
6170 if (reload_completed)
6172 int needed = group_barrier_needed (insn);
6174 gcc_assert (!needed);
6175 if (GET_CODE (insn) == CALL_INSN)
6176 init_insn_group_barriers ();
6177 stops_p [INSN_UID (insn)] = stop_before_p;
6178 stop_before_p = 0;
6180 return 1;
6183 /* We are choosing insn from the ready queue. Return nonzero if INSN
6184 can be chosen. */
6186 static int
6187 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
6189 gcc_assert (insn && INSN_P (insn));
6190 return (!reload_completed
6191 || !safe_group_barrier_needed (insn));
6194 /* The following variable value is pseudo-insn used by the DFA insn
6195 scheduler to change the DFA state when the simulated clock is
6196 increased. */
6198 static rtx dfa_pre_cycle_insn;
6200 /* We are about to being issuing INSN. Return nonzero if we cannot
6201 issue it on given cycle CLOCK and return zero if we should not sort
6202 the ready queue on the next clock start. */
6204 static int
6205 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
6206 int clock, int *sort_p)
6208 int setup_clocks_p = FALSE;
6210 gcc_assert (insn && INSN_P (insn));
6211 if ((reload_completed && safe_group_barrier_needed (insn))
6212 || (last_scheduled_insn
6213 && (GET_CODE (last_scheduled_insn) == CALL_INSN
6214 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6215 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6217 init_insn_group_barriers ();
6218 if (verbose && dump)
6219 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
6220 last_clock == clock ? " + cycle advance" : "");
6221 stop_before_p = 1;
6222 if (last_clock == clock)
6224 state_transition (curr_state, dfa_stop_insn);
6225 if (TARGET_EARLY_STOP_BITS)
6226 *sort_p = (last_scheduled_insn == NULL_RTX
6227 || GET_CODE (last_scheduled_insn) != CALL_INSN);
6228 else
6229 *sort_p = 0;
6230 return 1;
6232 else if (reload_completed)
6233 setup_clocks_p = TRUE;
6234 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6235 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
6236 state_reset (curr_state);
6237 else
6239 memcpy (curr_state, prev_cycle_state, dfa_state_size);
6240 state_transition (curr_state, dfa_stop_insn);
6241 state_transition (curr_state, dfa_pre_cycle_insn);
6242 state_transition (curr_state, NULL);
6245 else if (reload_completed)
6246 setup_clocks_p = TRUE;
6247 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
6248 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6249 && asm_noperands (PATTERN (insn)) < 0)
6251 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6253 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6255 rtx link;
6256 int d = -1;
6258 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6259 if (REG_NOTE_KIND (link) == 0)
6261 enum attr_itanium_class dep_class;
6262 rtx dep_insn = XEXP (link, 0);
6264 dep_class = ia64_safe_itanium_class (dep_insn);
6265 if ((dep_class == ITANIUM_CLASS_MMMUL
6266 || dep_class == ITANIUM_CLASS_MMSHF)
6267 && last_clock - clocks [INSN_UID (dep_insn)] < 4
6268 && (d < 0
6269 || last_clock - clocks [INSN_UID (dep_insn)] < d))
6270 d = last_clock - clocks [INSN_UID (dep_insn)];
6272 if (d >= 0)
6273 add_cycles [INSN_UID (insn)] = 3 - d;
6276 return 0;
6281 /* The following page contains abstract data `bundle states' which are
6282 used for bundling insns (inserting nops and template generation). */
6284 /* The following describes state of insn bundling. */
6286 struct bundle_state
6288 /* Unique bundle state number to identify them in the debugging
6289 output */
6290 int unique_num;
6291 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
6292 /* number nops before and after the insn */
6293 short before_nops_num, after_nops_num;
6294 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
6295 insn */
6296 int cost; /* cost of the state in cycles */
6297 int accumulated_insns_num; /* number of all previous insns including
6298 nops. L is considered as 2 insns */
6299 int branch_deviation; /* deviation of previous branches from 3rd slots */
6300 struct bundle_state *next; /* next state with the same insn_num */
6301 struct bundle_state *originator; /* originator (previous insn state) */
6302 /* All bundle states are in the following chain. */
6303 struct bundle_state *allocated_states_chain;
6304 /* The DFA State after issuing the insn and the nops. */
6305 state_t dfa_state;
6308 /* The following is map insn number to the corresponding bundle state. */
6310 static struct bundle_state **index_to_bundle_states;
6312 /* The unique number of next bundle state. */
6314 static int bundle_states_num;
6316 /* All allocated bundle states are in the following chain. */
6318 static struct bundle_state *allocated_bundle_states_chain;
6320 /* All allocated but not used bundle states are in the following
6321 chain. */
6323 static struct bundle_state *free_bundle_state_chain;
6326 /* The following function returns a free bundle state. */
6328 static struct bundle_state *
6329 get_free_bundle_state (void)
6331 struct bundle_state *result;
6333 if (free_bundle_state_chain != NULL)
6335 result = free_bundle_state_chain;
6336 free_bundle_state_chain = result->next;
6338 else
6340 result = xmalloc (sizeof (struct bundle_state));
6341 result->dfa_state = xmalloc (dfa_state_size);
6342 result->allocated_states_chain = allocated_bundle_states_chain;
6343 allocated_bundle_states_chain = result;
6345 result->unique_num = bundle_states_num++;
6346 return result;
6350 /* The following function frees given bundle state. */
6352 static void
6353 free_bundle_state (struct bundle_state *state)
6355 state->next = free_bundle_state_chain;
6356 free_bundle_state_chain = state;
6359 /* Start work with abstract data `bundle states'. */
6361 static void
6362 initiate_bundle_states (void)
6364 bundle_states_num = 0;
6365 free_bundle_state_chain = NULL;
6366 allocated_bundle_states_chain = NULL;
6369 /* Finish work with abstract data `bundle states'. */
6371 static void
6372 finish_bundle_states (void)
6374 struct bundle_state *curr_state, *next_state;
6376 for (curr_state = allocated_bundle_states_chain;
6377 curr_state != NULL;
6378 curr_state = next_state)
6380 next_state = curr_state->allocated_states_chain;
6381 free (curr_state->dfa_state);
6382 free (curr_state);
6386 /* Hash table of the bundle states. The key is dfa_state and insn_num
6387 of the bundle states. */
6389 static htab_t bundle_state_table;
6391 /* The function returns hash of BUNDLE_STATE. */
6393 static unsigned
6394 bundle_state_hash (const void *bundle_state)
6396 const struct bundle_state *state = (struct bundle_state *) bundle_state;
6397 unsigned result, i;
6399 for (result = i = 0; i < dfa_state_size; i++)
6400 result += (((unsigned char *) state->dfa_state) [i]
6401 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
6402 return result + state->insn_num;
6405 /* The function returns nonzero if the bundle state keys are equal. */
6407 static int
6408 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
6410 const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
6411 const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
6413 return (state1->insn_num == state2->insn_num
6414 && memcmp (state1->dfa_state, state2->dfa_state,
6415 dfa_state_size) == 0);
6418 /* The function inserts the BUNDLE_STATE into the hash table. The
6419 function returns nonzero if the bundle has been inserted into the
6420 table. The table contains the best bundle state with given key. */
6422 static int
6423 insert_bundle_state (struct bundle_state *bundle_state)
6425 void **entry_ptr;
6427 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
6428 if (*entry_ptr == NULL)
6430 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
6431 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
6432 *entry_ptr = (void *) bundle_state;
6433 return TRUE;
6435 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
6436 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
6437 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
6438 > bundle_state->accumulated_insns_num
6439 || (((struct bundle_state *)
6440 *entry_ptr)->accumulated_insns_num
6441 == bundle_state->accumulated_insns_num
6442 && ((struct bundle_state *)
6443 *entry_ptr)->branch_deviation
6444 > bundle_state->branch_deviation))))
6447 struct bundle_state temp;
6449 temp = *(struct bundle_state *) *entry_ptr;
6450 *(struct bundle_state *) *entry_ptr = *bundle_state;
6451 ((struct bundle_state *) *entry_ptr)->next = temp.next;
6452 *bundle_state = temp;
6454 return FALSE;
6457 /* Start work with the hash table. */
6459 static void
6460 initiate_bundle_state_table (void)
6462 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
6463 (htab_del) 0);
6466 /* Finish work with the hash table. */
6468 static void
6469 finish_bundle_state_table (void)
6471 htab_delete (bundle_state_table);
6476 /* The following variable is a insn `nop' used to check bundle states
6477 with different number of inserted nops. */
6479 static rtx ia64_nop;
6481 /* The following function tries to issue NOPS_NUM nops for the current
6482 state without advancing processor cycle. If it failed, the
6483 function returns FALSE and frees the current state. */
6485 static int
6486 try_issue_nops (struct bundle_state *curr_state, int nops_num)
6488 int i;
6490 for (i = 0; i < nops_num; i++)
6491 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6493 free_bundle_state (curr_state);
6494 return FALSE;
6496 return TRUE;
6499 /* The following function tries to issue INSN for the current
6500 state without advancing processor cycle. If it failed, the
6501 function returns FALSE and frees the current state. */
6503 static int
6504 try_issue_insn (struct bundle_state *curr_state, rtx insn)
6506 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6508 free_bundle_state (curr_state);
6509 return FALSE;
6511 return TRUE;
6514 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6515 starting with ORIGINATOR without advancing processor cycle. If
6516 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6517 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6518 If it was successful, the function creates new bundle state and
6519 insert into the hash table and into `index_to_bundle_states'. */
6521 static void
6522 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
6523 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
6525 struct bundle_state *curr_state;
6527 curr_state = get_free_bundle_state ();
6528 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6529 curr_state->insn = insn;
6530 curr_state->insn_num = originator->insn_num + 1;
6531 curr_state->cost = originator->cost;
6532 curr_state->originator = originator;
6533 curr_state->before_nops_num = before_nops_num;
6534 curr_state->after_nops_num = 0;
6535 curr_state->accumulated_insns_num
6536 = originator->accumulated_insns_num + before_nops_num;
6537 curr_state->branch_deviation = originator->branch_deviation;
6538 gcc_assert (insn);
6539 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6541 gcc_assert (GET_MODE (insn) != TImode);
6542 if (!try_issue_nops (curr_state, before_nops_num))
6543 return;
6544 if (!try_issue_insn (curr_state, insn))
6545 return;
6546 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6547 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6548 && curr_state->accumulated_insns_num % 3 != 0)
6550 free_bundle_state (curr_state);
6551 return;
6554 else if (GET_MODE (insn) != TImode)
6556 if (!try_issue_nops (curr_state, before_nops_num))
6557 return;
6558 if (!try_issue_insn (curr_state, insn))
6559 return;
6560 curr_state->accumulated_insns_num++;
6561 gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
6562 && asm_noperands (PATTERN (insn)) < 0);
6564 if (ia64_safe_type (insn) == TYPE_L)
6565 curr_state->accumulated_insns_num++;
6567 else
6569 /* If this is an insn that must be first in a group, then don't allow
6570 nops to be emitted before it. Currently, alloc is the only such
6571 supported instruction. */
6572 /* ??? The bundling automatons should handle this for us, but they do
6573 not yet have support for the first_insn attribute. */
6574 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
6576 free_bundle_state (curr_state);
6577 return;
6580 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6581 state_transition (curr_state->dfa_state, NULL);
6582 curr_state->cost++;
6583 if (!try_issue_nops (curr_state, before_nops_num))
6584 return;
6585 if (!try_issue_insn (curr_state, insn))
6586 return;
6587 curr_state->accumulated_insns_num++;
6588 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6589 || asm_noperands (PATTERN (insn)) >= 0)
6591 /* Finish bundle containing asm insn. */
6592 curr_state->after_nops_num
6593 = 3 - curr_state->accumulated_insns_num % 3;
6594 curr_state->accumulated_insns_num
6595 += 3 - curr_state->accumulated_insns_num % 3;
6597 else if (ia64_safe_type (insn) == TYPE_L)
6598 curr_state->accumulated_insns_num++;
6600 if (ia64_safe_type (insn) == TYPE_B)
6601 curr_state->branch_deviation
6602 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6603 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6605 if (!only_bundle_end_p && insert_bundle_state (curr_state))
6607 state_t dfa_state;
6608 struct bundle_state *curr_state1;
6609 struct bundle_state *allocated_states_chain;
6611 curr_state1 = get_free_bundle_state ();
6612 dfa_state = curr_state1->dfa_state;
6613 allocated_states_chain = curr_state1->allocated_states_chain;
6614 *curr_state1 = *curr_state;
6615 curr_state1->dfa_state = dfa_state;
6616 curr_state1->allocated_states_chain = allocated_states_chain;
6617 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6618 dfa_state_size);
6619 curr_state = curr_state1;
6621 if (!try_issue_nops (curr_state,
6622 3 - curr_state->accumulated_insns_num % 3))
6623 return;
6624 curr_state->after_nops_num
6625 = 3 - curr_state->accumulated_insns_num % 3;
6626 curr_state->accumulated_insns_num
6627 += 3 - curr_state->accumulated_insns_num % 3;
6629 if (!insert_bundle_state (curr_state))
6630 free_bundle_state (curr_state);
6631 return;
6634 /* The following function returns position in the two window bundle
6635 for given STATE. */
6637 static int
6638 get_max_pos (state_t state)
6640 if (cpu_unit_reservation_p (state, pos_6))
6641 return 6;
6642 else if (cpu_unit_reservation_p (state, pos_5))
6643 return 5;
6644 else if (cpu_unit_reservation_p (state, pos_4))
6645 return 4;
6646 else if (cpu_unit_reservation_p (state, pos_3))
6647 return 3;
6648 else if (cpu_unit_reservation_p (state, pos_2))
6649 return 2;
6650 else if (cpu_unit_reservation_p (state, pos_1))
6651 return 1;
6652 else
6653 return 0;
6656 /* The function returns code of a possible template for given position
6657 and state. The function should be called only with 2 values of
6658 position equal to 3 or 6. We avoid generating F NOPs by putting
6659 templates containing F insns at the end of the template search
6660 because undocumented anomaly in McKinley derived cores which can
6661 cause stalls if an F-unit insn (including a NOP) is issued within a
6662 six-cycle window after reading certain application registers (such
6663 as ar.bsp). Furthermore, power-considerations also argue against
6664 the use of F-unit instructions unless they're really needed. */
6666 static int
6667 get_template (state_t state, int pos)
6669 switch (pos)
6671 case 3:
6672 if (cpu_unit_reservation_p (state, _0mmi_))
6673 return 1;
6674 else if (cpu_unit_reservation_p (state, _0mii_))
6675 return 0;
6676 else if (cpu_unit_reservation_p (state, _0mmb_))
6677 return 7;
6678 else if (cpu_unit_reservation_p (state, _0mib_))
6679 return 6;
6680 else if (cpu_unit_reservation_p (state, _0mbb_))
6681 return 5;
6682 else if (cpu_unit_reservation_p (state, _0bbb_))
6683 return 4;
6684 else if (cpu_unit_reservation_p (state, _0mmf_))
6685 return 3;
6686 else if (cpu_unit_reservation_p (state, _0mfi_))
6687 return 2;
6688 else if (cpu_unit_reservation_p (state, _0mfb_))
6689 return 8;
6690 else if (cpu_unit_reservation_p (state, _0mlx_))
6691 return 9;
6692 else
6693 gcc_unreachable ();
6694 case 6:
6695 if (cpu_unit_reservation_p (state, _1mmi_))
6696 return 1;
6697 else if (cpu_unit_reservation_p (state, _1mii_))
6698 return 0;
6699 else if (cpu_unit_reservation_p (state, _1mmb_))
6700 return 7;
6701 else if (cpu_unit_reservation_p (state, _1mib_))
6702 return 6;
6703 else if (cpu_unit_reservation_p (state, _1mbb_))
6704 return 5;
6705 else if (cpu_unit_reservation_p (state, _1bbb_))
6706 return 4;
6707 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6708 return 3;
6709 else if (cpu_unit_reservation_p (state, _1mfi_))
6710 return 2;
6711 else if (cpu_unit_reservation_p (state, _1mfb_))
6712 return 8;
6713 else if (cpu_unit_reservation_p (state, _1mlx_))
6714 return 9;
6715 else
6716 gcc_unreachable ();
6717 default:
6718 gcc_unreachable ();
6722 /* The following function returns an insn important for insn bundling
6723 followed by INSN and before TAIL. */
6725 static rtx
6726 get_next_important_insn (rtx insn, rtx tail)
6728 for (; insn && insn != tail; insn = NEXT_INSN (insn))
6729 if (INSN_P (insn)
6730 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6731 && GET_CODE (PATTERN (insn)) != USE
6732 && GET_CODE (PATTERN (insn)) != CLOBBER)
6733 return insn;
6734 return NULL_RTX;
6737 /* The following function does insn bundling. Bundling means
6738 inserting templates and nop insns to fit insn groups into permitted
6739 templates. Instruction scheduling uses NDFA (non-deterministic
6740 finite automata) encoding informations about the templates and the
6741 inserted nops. Nondeterminism of the automata permits follows
6742 all possible insn sequences very fast.
6744 Unfortunately it is not possible to get information about inserting
6745 nop insns and used templates from the automata states. The
6746 automata only says that we can issue an insn possibly inserting
6747 some nops before it and using some template. Therefore insn
6748 bundling in this function is implemented by using DFA
6749 (deterministic finite automata). We follows all possible insn
6750 sequences by inserting 0-2 nops (that is what the NDFA describe for
6751 insn scheduling) before/after each insn being bundled. We know the
6752 start of simulated processor cycle from insn scheduling (insn
6753 starting a new cycle has TImode).
6755 Simple implementation of insn bundling would create enormous
6756 number of possible insn sequences satisfying information about new
6757 cycle ticks taken from the insn scheduling. To make the algorithm
6758 practical we use dynamic programming. Each decision (about
6759 inserting nops and implicitly about previous decisions) is described
6760 by structure bundle_state (see above). If we generate the same
6761 bundle state (key is automaton state after issuing the insns and
6762 nops for it), we reuse already generated one. As consequence we
6763 reject some decisions which cannot improve the solution and
6764 reduce memory for the algorithm.
6766 When we reach the end of EBB (extended basic block), we choose the
6767 best sequence and then, moving back in EBB, insert templates for
6768 the best alternative. The templates are taken from querying
6769 automaton state for each insn in chosen bundle states.
6771 So the algorithm makes two (forward and backward) passes through
6772 EBB. There is an additional forward pass through EBB for Itanium1
6773 processor. This pass inserts more nops to make dependency between
6774 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
6776 static void
6777 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
6779 struct bundle_state *curr_state, *next_state, *best_state;
6780 rtx insn, next_insn;
6781 int insn_num;
6782 int i, bundle_end_p, only_bundle_end_p, asm_p;
6783 int pos = 0, max_pos, template0, template1;
6784 rtx b;
6785 rtx nop;
6786 enum attr_type type;
6788 insn_num = 0;
6789 /* Count insns in the EBB. */
6790 for (insn = NEXT_INSN (prev_head_insn);
6791 insn && insn != tail;
6792 insn = NEXT_INSN (insn))
6793 if (INSN_P (insn))
6794 insn_num++;
6795 if (insn_num == 0)
6796 return;
6797 bundling_p = 1;
6798 dfa_clean_insn_cache ();
6799 initiate_bundle_state_table ();
6800 index_to_bundle_states = xmalloc ((insn_num + 2)
6801 * sizeof (struct bundle_state *));
6802 /* First (forward) pass -- generation of bundle states. */
6803 curr_state = get_free_bundle_state ();
6804 curr_state->insn = NULL;
6805 curr_state->before_nops_num = 0;
6806 curr_state->after_nops_num = 0;
6807 curr_state->insn_num = 0;
6808 curr_state->cost = 0;
6809 curr_state->accumulated_insns_num = 0;
6810 curr_state->branch_deviation = 0;
6811 curr_state->next = NULL;
6812 curr_state->originator = NULL;
6813 state_reset (curr_state->dfa_state);
6814 index_to_bundle_states [0] = curr_state;
6815 insn_num = 0;
6816 /* Shift cycle mark if it is put on insn which could be ignored. */
6817 for (insn = NEXT_INSN (prev_head_insn);
6818 insn != tail;
6819 insn = NEXT_INSN (insn))
6820 if (INSN_P (insn)
6821 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6822 || GET_CODE (PATTERN (insn)) == USE
6823 || GET_CODE (PATTERN (insn)) == CLOBBER)
6824 && GET_MODE (insn) == TImode)
6826 PUT_MODE (insn, VOIDmode);
6827 for (next_insn = NEXT_INSN (insn);
6828 next_insn != tail;
6829 next_insn = NEXT_INSN (next_insn))
6830 if (INSN_P (next_insn)
6831 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
6832 && GET_CODE (PATTERN (next_insn)) != USE
6833 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
6835 PUT_MODE (next_insn, TImode);
6836 break;
6839 /* Froward pass: generation of bundle states. */
6840 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6841 insn != NULL_RTX;
6842 insn = next_insn)
6844 gcc_assert (INSN_P (insn)
6845 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6846 && GET_CODE (PATTERN (insn)) != USE
6847 && GET_CODE (PATTERN (insn)) != CLOBBER);
6848 type = ia64_safe_type (insn);
6849 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6850 insn_num++;
6851 index_to_bundle_states [insn_num] = NULL;
6852 for (curr_state = index_to_bundle_states [insn_num - 1];
6853 curr_state != NULL;
6854 curr_state = next_state)
6856 pos = curr_state->accumulated_insns_num % 3;
6857 next_state = curr_state->next;
6858 /* We must fill up the current bundle in order to start a
6859 subsequent asm insn in a new bundle. Asm insn is always
6860 placed in a separate bundle. */
6861 only_bundle_end_p
6862 = (next_insn != NULL_RTX
6863 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
6864 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
6865 /* We may fill up the current bundle if it is the cycle end
6866 without a group barrier. */
6867 bundle_end_p
6868 = (only_bundle_end_p || next_insn == NULL_RTX
6869 || (GET_MODE (next_insn) == TImode
6870 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
6871 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
6872 || type == TYPE_S
6873 /* We need to insert 2 nops for cases like M_MII. To
6874 guarantee issuing all insns on the same cycle for
6875 Itanium 1, we need to issue 2 nops after the first M
6876 insn (MnnMII where n is a nop insn). */
6877 || ((type == TYPE_M || type == TYPE_A)
6878 && ia64_tune == PROCESSOR_ITANIUM
6879 && !bundle_end_p && pos == 1))
6880 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
6881 only_bundle_end_p);
6882 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
6883 only_bundle_end_p);
6884 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
6885 only_bundle_end_p);
6887 gcc_assert (index_to_bundle_states [insn_num]);
6888 for (curr_state = index_to_bundle_states [insn_num];
6889 curr_state != NULL;
6890 curr_state = curr_state->next)
6891 if (verbose >= 2 && dump)
6893 /* This structure is taken from generated code of the
6894 pipeline hazard recognizer (see file insn-attrtab.c).
6895 Please don't forget to change the structure if a new
6896 automaton is added to .md file. */
6897 struct DFA_chip
6899 unsigned short one_automaton_state;
6900 unsigned short oneb_automaton_state;
6901 unsigned short two_automaton_state;
6902 unsigned short twob_automaton_state;
6905 fprintf
6906 (dump,
6907 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6908 curr_state->unique_num,
6909 (curr_state->originator == NULL
6910 ? -1 : curr_state->originator->unique_num),
6911 curr_state->cost,
6912 curr_state->before_nops_num, curr_state->after_nops_num,
6913 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6914 (ia64_tune == PROCESSOR_ITANIUM
6915 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6916 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6917 INSN_UID (insn));
6921 /* We should find a solution because the 2nd insn scheduling has
6922 found one. */
6923 gcc_assert (index_to_bundle_states [insn_num]);
6924 /* Find a state corresponding to the best insn sequence. */
6925 best_state = NULL;
6926 for (curr_state = index_to_bundle_states [insn_num];
6927 curr_state != NULL;
6928 curr_state = curr_state->next)
6929 /* We are just looking at the states with fully filled up last
6930 bundle. The first we prefer insn sequences with minimal cost
6931 then with minimal inserted nops and finally with branch insns
6932 placed in the 3rd slots. */
6933 if (curr_state->accumulated_insns_num % 3 == 0
6934 && (best_state == NULL || best_state->cost > curr_state->cost
6935 || (best_state->cost == curr_state->cost
6936 && (curr_state->accumulated_insns_num
6937 < best_state->accumulated_insns_num
6938 || (curr_state->accumulated_insns_num
6939 == best_state->accumulated_insns_num
6940 && curr_state->branch_deviation
6941 < best_state->branch_deviation)))))
6942 best_state = curr_state;
6943 /* Second (backward) pass: adding nops and templates. */
6944 insn_num = best_state->before_nops_num;
6945 template0 = template1 = -1;
6946 for (curr_state = best_state;
6947 curr_state->originator != NULL;
6948 curr_state = curr_state->originator)
6950 insn = curr_state->insn;
6951 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
6952 || asm_noperands (PATTERN (insn)) >= 0);
6953 insn_num++;
6954 if (verbose >= 2 && dump)
6956 struct DFA_chip
6958 unsigned short one_automaton_state;
6959 unsigned short oneb_automaton_state;
6960 unsigned short two_automaton_state;
6961 unsigned short twob_automaton_state;
6964 fprintf
6965 (dump,
6966 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6967 curr_state->unique_num,
6968 (curr_state->originator == NULL
6969 ? -1 : curr_state->originator->unique_num),
6970 curr_state->cost,
6971 curr_state->before_nops_num, curr_state->after_nops_num,
6972 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6973 (ia64_tune == PROCESSOR_ITANIUM
6974 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6975 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6976 INSN_UID (insn));
6978 /* Find the position in the current bundle window. The window can
6979 contain at most two bundles. Two bundle window means that
6980 the processor will make two bundle rotation. */
6981 max_pos = get_max_pos (curr_state->dfa_state);
6982 if (max_pos == 6
6983 /* The following (negative template number) means that the
6984 processor did one bundle rotation. */
6985 || (max_pos == 3 && template0 < 0))
6987 /* We are at the end of the window -- find template(s) for
6988 its bundle(s). */
6989 pos = max_pos;
6990 if (max_pos == 3)
6991 template0 = get_template (curr_state->dfa_state, 3);
6992 else
6994 template1 = get_template (curr_state->dfa_state, 3);
6995 template0 = get_template (curr_state->dfa_state, 6);
6998 if (max_pos > 3 && template1 < 0)
6999 /* It may happen when we have the stop inside a bundle. */
7001 gcc_assert (pos <= 3);
7002 template1 = get_template (curr_state->dfa_state, 3);
7003 pos += 3;
7005 if (!asm_p)
7006 /* Emit nops after the current insn. */
7007 for (i = 0; i < curr_state->after_nops_num; i++)
7009 nop = gen_nop ();
7010 emit_insn_after (nop, insn);
7011 pos--;
7012 gcc_assert (pos >= 0);
7013 if (pos % 3 == 0)
7015 /* We are at the start of a bundle: emit the template
7016 (it should be defined). */
7017 gcc_assert (template0 >= 0);
7018 b = gen_bundle_selector (GEN_INT (template0));
7019 ia64_emit_insn_before (b, nop);
7020 /* If we have two bundle window, we make one bundle
7021 rotation. Otherwise template0 will be undefined
7022 (negative value). */
7023 template0 = template1;
7024 template1 = -1;
7027 /* Move the position backward in the window. Group barrier has
7028 no slot. Asm insn takes all bundle. */
7029 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7030 && GET_CODE (PATTERN (insn)) != ASM_INPUT
7031 && asm_noperands (PATTERN (insn)) < 0)
7032 pos--;
7033 /* Long insn takes 2 slots. */
7034 if (ia64_safe_type (insn) == TYPE_L)
7035 pos--;
7036 gcc_assert (pos >= 0);
7037 if (pos % 3 == 0
7038 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7039 && GET_CODE (PATTERN (insn)) != ASM_INPUT
7040 && asm_noperands (PATTERN (insn)) < 0)
7042 /* The current insn is at the bundle start: emit the
7043 template. */
7044 gcc_assert (template0 >= 0);
7045 b = gen_bundle_selector (GEN_INT (template0));
7046 ia64_emit_insn_before (b, insn);
7047 b = PREV_INSN (insn);
7048 insn = b;
7049 /* See comment above in analogous place for emitting nops
7050 after the insn. */
7051 template0 = template1;
7052 template1 = -1;
7054 /* Emit nops after the current insn. */
7055 for (i = 0; i < curr_state->before_nops_num; i++)
7057 nop = gen_nop ();
7058 ia64_emit_insn_before (nop, insn);
7059 nop = PREV_INSN (insn);
7060 insn = nop;
7061 pos--;
7062 gcc_assert (pos >= 0);
7063 if (pos % 3 == 0)
7065 /* See comment above in analogous place for emitting nops
7066 after the insn. */
7067 gcc_assert (template0 >= 0);
7068 b = gen_bundle_selector (GEN_INT (template0));
7069 ia64_emit_insn_before (b, insn);
7070 b = PREV_INSN (insn);
7071 insn = b;
7072 template0 = template1;
7073 template1 = -1;
7077 if (ia64_tune == PROCESSOR_ITANIUM)
7078 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
7079 Itanium1 has a strange design, if the distance between an insn
7080 and dependent MM-insn is less 4 then we have a 6 additional
7081 cycles stall. So we make the distance equal to 4 cycles if it
7082 is less. */
7083 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
7084 insn != NULL_RTX;
7085 insn = next_insn)
7087 gcc_assert (INSN_P (insn)
7088 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
7089 && GET_CODE (PATTERN (insn)) != USE
7090 && GET_CODE (PATTERN (insn)) != CLOBBER);
7091 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
7092 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
7093 /* We found a MM-insn which needs additional cycles. */
7095 rtx last;
7096 int i, j, n;
7097 int pred_stop_p;
7099 /* Now we are searching for a template of the bundle in
7100 which the MM-insn is placed and the position of the
7101 insn in the bundle (0, 1, 2). Also we are searching
7102 for that there is a stop before the insn. */
7103 last = prev_active_insn (insn);
7104 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
7105 if (pred_stop_p)
7106 last = prev_active_insn (last);
7107 n = 0;
7108 for (;; last = prev_active_insn (last))
7109 if (recog_memoized (last) == CODE_FOR_bundle_selector)
7111 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
7112 if (template0 == 9)
7113 /* The insn is in MLX bundle. Change the template
7114 onto MFI because we will add nops before the
7115 insn. It simplifies subsequent code a lot. */
7116 PATTERN (last)
7117 = gen_bundle_selector (const2_rtx); /* -> MFI */
7118 break;
7120 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
7121 && (ia64_safe_itanium_class (last)
7122 != ITANIUM_CLASS_IGNORE))
7123 n++;
7124 /* Some check of correctness: the stop is not at the
7125 bundle start, there are no more 3 insns in the bundle,
7126 and the MM-insn is not at the start of bundle with
7127 template MLX. */
7128 gcc_assert ((!pred_stop_p || n)
7129 && n <= 2
7130 && (template0 != 9 || !n));
7131 /* Put nops after the insn in the bundle. */
7132 for (j = 3 - n; j > 0; j --)
7133 ia64_emit_insn_before (gen_nop (), insn);
7134 /* It takes into account that we will add more N nops
7135 before the insn lately -- please see code below. */
7136 add_cycles [INSN_UID (insn)]--;
7137 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
7138 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7139 insn);
7140 if (pred_stop_p)
7141 add_cycles [INSN_UID (insn)]--;
7142 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
7144 /* Insert "MII;" template. */
7145 ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
7146 insn);
7147 ia64_emit_insn_before (gen_nop (), insn);
7148 ia64_emit_insn_before (gen_nop (), insn);
7149 if (i > 1)
7151 /* To decrease code size, we use "MI;I;"
7152 template. */
7153 ia64_emit_insn_before
7154 (gen_insn_group_barrier (GEN_INT (3)), insn);
7155 i--;
7157 ia64_emit_insn_before (gen_nop (), insn);
7158 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7159 insn);
7161 /* Put the MM-insn in the same slot of a bundle with the
7162 same template as the original one. */
7163 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
7164 insn);
7165 /* To put the insn in the same slot, add necessary number
7166 of nops. */
7167 for (j = n; j > 0; j --)
7168 ia64_emit_insn_before (gen_nop (), insn);
7169 /* Put the stop if the original bundle had it. */
7170 if (pred_stop_p)
7171 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7172 insn);
7175 free (index_to_bundle_states);
7176 finish_bundle_state_table ();
7177 bundling_p = 0;
7178 dfa_clean_insn_cache ();
7181 /* The following function is called at the end of scheduling BB or
7182 EBB. After reload, it inserts stop bits and does insn bundling. */
7184 static void
7185 ia64_sched_finish (FILE *dump, int sched_verbose)
7187 if (sched_verbose)
7188 fprintf (dump, "// Finishing schedule.\n");
7189 if (!reload_completed)
7190 return;
7191 if (reload_completed)
7193 final_emit_insn_group_barriers (dump);
7194 bundling (dump, sched_verbose, current_sched_info->prev_head,
7195 current_sched_info->next_tail);
7196 if (sched_verbose && dump)
7197 fprintf (dump, "// finishing %d-%d\n",
7198 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
7199 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
7201 return;
7205 /* The following function inserts stop bits in scheduled BB or EBB. */
7207 static void
7208 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7210 rtx insn;
7211 int need_barrier_p = 0;
7212 rtx prev_insn = NULL_RTX;
7214 init_insn_group_barriers ();
7216 for (insn = NEXT_INSN (current_sched_info->prev_head);
7217 insn != current_sched_info->next_tail;
7218 insn = NEXT_INSN (insn))
7220 if (GET_CODE (insn) == BARRIER)
7222 rtx last = prev_active_insn (insn);
7224 if (! last)
7225 continue;
7226 if (GET_CODE (last) == JUMP_INSN
7227 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
7228 last = prev_active_insn (last);
7229 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7230 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7232 init_insn_group_barriers ();
7233 need_barrier_p = 0;
7234 prev_insn = NULL_RTX;
7236 else if (INSN_P (insn))
7238 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7240 init_insn_group_barriers ();
7241 need_barrier_p = 0;
7242 prev_insn = NULL_RTX;
7244 else if (need_barrier_p || group_barrier_needed (insn))
7246 if (TARGET_EARLY_STOP_BITS)
7248 rtx last;
7250 for (last = insn;
7251 last != current_sched_info->prev_head;
7252 last = PREV_INSN (last))
7253 if (INSN_P (last) && GET_MODE (last) == TImode
7254 && stops_p [INSN_UID (last)])
7255 break;
7256 if (last == current_sched_info->prev_head)
7257 last = insn;
7258 last = prev_active_insn (last);
7259 if (last
7260 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
7261 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
7262 last);
7263 init_insn_group_barriers ();
7264 for (last = NEXT_INSN (last);
7265 last != insn;
7266 last = NEXT_INSN (last))
7267 if (INSN_P (last))
7268 group_barrier_needed (last);
7270 else
7272 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7273 insn);
7274 init_insn_group_barriers ();
7276 group_barrier_needed (insn);
7277 prev_insn = NULL_RTX;
7279 else if (recog_memoized (insn) >= 0)
7280 prev_insn = insn;
7281 need_barrier_p = (GET_CODE (insn) == CALL_INSN
7282 || GET_CODE (PATTERN (insn)) == ASM_INPUT
7283 || asm_noperands (PATTERN (insn)) >= 0);
7290 /* If the following function returns TRUE, we will use the the DFA
7291 insn scheduler. */
7293 static int
7294 ia64_first_cycle_multipass_dfa_lookahead (void)
7296 return (reload_completed ? 6 : 4);
7299 /* The following function initiates variable `dfa_pre_cycle_insn'. */
7301 static void
7302 ia64_init_dfa_pre_cycle_insn (void)
7304 if (temp_dfa_state == NULL)
7306 dfa_state_size = state_size ();
7307 temp_dfa_state = xmalloc (dfa_state_size);
7308 prev_cycle_state = xmalloc (dfa_state_size);
7310 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
7311 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
7312 recog_memoized (dfa_pre_cycle_insn);
7313 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
7314 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
7315 recog_memoized (dfa_stop_insn);
7318 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
7319 used by the DFA insn scheduler. */
7321 static rtx
7322 ia64_dfa_pre_cycle_insn (void)
7324 return dfa_pre_cycle_insn;
7327 /* The following function returns TRUE if PRODUCER (of type ilog or
7328 ld) produces address for CONSUMER (of type st or stf). */
7331 ia64_st_address_bypass_p (rtx producer, rtx consumer)
7333 rtx dest, reg, mem;
7335 gcc_assert (producer && consumer);
7336 dest = ia64_single_set (producer);
7337 gcc_assert (dest);
7338 reg = SET_DEST (dest);
7339 gcc_assert (reg);
7340 if (GET_CODE (reg) == SUBREG)
7341 reg = SUBREG_REG (reg);
7342 gcc_assert (GET_CODE (reg) == REG);
7344 dest = ia64_single_set (consumer);
7345 gcc_assert (dest);
7346 mem = SET_DEST (dest);
7347 gcc_assert (mem && GET_CODE (mem) == MEM);
7348 return reg_mentioned_p (reg, mem);
7351 /* The following function returns TRUE if PRODUCER (of type ilog or
7352 ld) produces address for CONSUMER (of type ld or fld). */
7355 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
7357 rtx dest, src, reg, mem;
7359 gcc_assert (producer && consumer);
7360 dest = ia64_single_set (producer);
7361 gcc_assert (dest);
7362 reg = SET_DEST (dest);
7363 gcc_assert (reg);
7364 if (GET_CODE (reg) == SUBREG)
7365 reg = SUBREG_REG (reg);
7366 gcc_assert (GET_CODE (reg) == REG);
7368 src = ia64_single_set (consumer);
7369 gcc_assert (src);
7370 mem = SET_SRC (src);
7371 gcc_assert (mem);
7372 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
7373 mem = XVECEXP (mem, 0, 0);
7374 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
7375 mem = XEXP (mem, 0);
7377 /* Note that LO_SUM is used for GOT loads. */
7378 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
7380 return reg_mentioned_p (reg, mem);
7383 /* The following function returns TRUE if INSN produces address for a
7384 load/store insn. We will place such insns into M slot because it
7385 decreases its latency time. */
7388 ia64_produce_address_p (rtx insn)
7390 return insn->call;
7394 /* Emit pseudo-ops for the assembler to describe predicate relations.
7395 At present this assumes that we only consider predicate pairs to
7396 be mutex, and that the assembler can deduce proper values from
7397 straight-line code. */
7399 static void
7400 emit_predicate_relation_info (void)
7402 basic_block bb;
7404 FOR_EACH_BB_REVERSE (bb)
7406 int r;
7407 rtx head = BB_HEAD (bb);
7409 /* We only need such notes at code labels. */
7410 if (GET_CODE (head) != CODE_LABEL)
7411 continue;
7412 if (GET_CODE (NEXT_INSN (head)) == NOTE
7413 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
7414 head = NEXT_INSN (head);
7416 for (r = PR_REG (0); r < PR_REG (64); r += 2)
7417 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
7419 rtx p = gen_rtx_REG (BImode, r);
7420 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
7421 if (head == BB_END (bb))
7422 BB_END (bb) = n;
7423 head = n;
7427 /* Look for conditional calls that do not return, and protect predicate
7428 relations around them. Otherwise the assembler will assume the call
7429 returns, and complain about uses of call-clobbered predicates after
7430 the call. */
7431 FOR_EACH_BB_REVERSE (bb)
7433 rtx insn = BB_HEAD (bb);
7435 while (1)
7437 if (GET_CODE (insn) == CALL_INSN
7438 && GET_CODE (PATTERN (insn)) == COND_EXEC
7439 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
7441 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
7442 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
7443 if (BB_HEAD (bb) == insn)
7444 BB_HEAD (bb) = b;
7445 if (BB_END (bb) == insn)
7446 BB_END (bb) = a;
7449 if (insn == BB_END (bb))
7450 break;
7451 insn = NEXT_INSN (insn);
7456 /* Perform machine dependent operations on the rtl chain INSNS. */
7458 static void
7459 ia64_reorg (void)
7461 /* We are freeing block_for_insn in the toplev to keep compatibility
7462 with old MDEP_REORGS that are not CFG based. Recompute it now. */
7463 compute_bb_for_insn ();
7465 /* If optimizing, we'll have split before scheduling. */
7466 if (optimize == 0)
7467 split_all_insns (0);
7469 /* ??? update_life_info_in_dirty_blocks fails to terminate during
7470 non-optimizing bootstrap. */
7471 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7473 if (ia64_flag_schedule_insns2)
7475 timevar_push (TV_SCHED2);
7476 ia64_final_schedule = 1;
7478 initiate_bundle_states ();
7479 ia64_nop = make_insn_raw (gen_nop ());
7480 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
7481 recog_memoized (ia64_nop);
7482 clocks_length = get_max_uid () + 1;
7483 stops_p = xcalloc (1, clocks_length);
7484 if (ia64_tune == PROCESSOR_ITANIUM)
7486 clocks = xcalloc (clocks_length, sizeof (int));
7487 add_cycles = xcalloc (clocks_length, sizeof (int));
7489 if (ia64_tune == PROCESSOR_ITANIUM2)
7491 pos_1 = get_cpu_unit_code ("2_1");
7492 pos_2 = get_cpu_unit_code ("2_2");
7493 pos_3 = get_cpu_unit_code ("2_3");
7494 pos_4 = get_cpu_unit_code ("2_4");
7495 pos_5 = get_cpu_unit_code ("2_5");
7496 pos_6 = get_cpu_unit_code ("2_6");
7497 _0mii_ = get_cpu_unit_code ("2b_0mii.");
7498 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7499 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7500 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7501 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7502 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7503 _0mib_ = get_cpu_unit_code ("2b_0mib.");
7504 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7505 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7506 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7507 _1mii_ = get_cpu_unit_code ("2b_1mii.");
7508 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7509 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7510 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7511 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7512 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7513 _1mib_ = get_cpu_unit_code ("2b_1mib.");
7514 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7515 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7516 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7518 else
7520 pos_1 = get_cpu_unit_code ("1_1");
7521 pos_2 = get_cpu_unit_code ("1_2");
7522 pos_3 = get_cpu_unit_code ("1_3");
7523 pos_4 = get_cpu_unit_code ("1_4");
7524 pos_5 = get_cpu_unit_code ("1_5");
7525 pos_6 = get_cpu_unit_code ("1_6");
7526 _0mii_ = get_cpu_unit_code ("1b_0mii.");
7527 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7528 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7529 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7530 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7531 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7532 _0mib_ = get_cpu_unit_code ("1b_0mib.");
7533 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7534 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7535 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7536 _1mii_ = get_cpu_unit_code ("1b_1mii.");
7537 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7538 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7539 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7540 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7541 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7542 _1mib_ = get_cpu_unit_code ("1b_1mib.");
7543 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7544 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7545 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7547 schedule_ebbs (dump_file);
7548 finish_bundle_states ();
7549 if (ia64_tune == PROCESSOR_ITANIUM)
7551 free (add_cycles);
7552 free (clocks);
7554 free (stops_p);
7555 emit_insn_group_barriers (dump_file);
7557 ia64_final_schedule = 0;
7558 timevar_pop (TV_SCHED2);
7560 else
7561 emit_all_insn_group_barriers (dump_file);
7563 /* A call must not be the last instruction in a function, so that the
7564 return address is still within the function, so that unwinding works
7565 properly. Note that IA-64 differs from dwarf2 on this point. */
7566 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7568 rtx insn;
7569 int saw_stop = 0;
7571 insn = get_last_insn ();
7572 if (! INSN_P (insn))
7573 insn = prev_active_insn (insn);
7574 /* Skip over insns that expand to nothing. */
7575 while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
7577 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7578 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7579 saw_stop = 1;
7580 insn = prev_active_insn (insn);
7582 if (GET_CODE (insn) == CALL_INSN)
7584 if (! saw_stop)
7585 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7586 emit_insn (gen_break_f ());
7587 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7591 emit_predicate_relation_info ();
7593 if (ia64_flag_var_tracking)
7595 timevar_push (TV_VAR_TRACKING);
7596 variable_tracking_main ();
7597 timevar_pop (TV_VAR_TRACKING);
7601 /* Return true if REGNO is used by the epilogue. */
7604 ia64_epilogue_uses (int regno)
7606 switch (regno)
7608 case R_GR (1):
7609 /* With a call to a function in another module, we will write a new
7610 value to "gp". After returning from such a call, we need to make
7611 sure the function restores the original gp-value, even if the
7612 function itself does not use the gp anymore. */
7613 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7615 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7616 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7617 /* For functions defined with the syscall_linkage attribute, all
7618 input registers are marked as live at all function exits. This
7619 prevents the register allocator from using the input registers,
7620 which in turn makes it possible to restart a system call after
7621 an interrupt without having to save/restore the input registers.
7622 This also prevents kernel data from leaking to application code. */
7623 return lookup_attribute ("syscall_linkage",
7624 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7626 case R_BR (0):
7627 /* Conditional return patterns can't represent the use of `b0' as
7628 the return address, so we force the value live this way. */
7629 return 1;
7631 case AR_PFS_REGNUM:
7632 /* Likewise for ar.pfs, which is used by br.ret. */
7633 return 1;
7635 default:
7636 return 0;
7640 /* Return true if REGNO is used by the frame unwinder. */
7643 ia64_eh_uses (int regno)
7645 if (! reload_completed)
7646 return 0;
7648 if (current_frame_info.reg_save_b0
7649 && regno == current_frame_info.reg_save_b0)
7650 return 1;
7651 if (current_frame_info.reg_save_pr
7652 && regno == current_frame_info.reg_save_pr)
7653 return 1;
7654 if (current_frame_info.reg_save_ar_pfs
7655 && regno == current_frame_info.reg_save_ar_pfs)
7656 return 1;
7657 if (current_frame_info.reg_save_ar_unat
7658 && regno == current_frame_info.reg_save_ar_unat)
7659 return 1;
7660 if (current_frame_info.reg_save_ar_lc
7661 && regno == current_frame_info.reg_save_ar_lc)
7662 return 1;
7664 return 0;
7667 /* Return true if this goes in small data/bss. */
7669 /* ??? We could also support own long data here. Generating movl/add/ld8
7670 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7671 code faster because there is one less load. This also includes incomplete
7672 types which can't go in sdata/sbss. */
7674 static bool
7675 ia64_in_small_data_p (tree exp)
7677 if (TARGET_NO_SDATA)
7678 return false;
7680 /* We want to merge strings, so we never consider them small data. */
7681 if (TREE_CODE (exp) == STRING_CST)
7682 return false;
7684 /* Functions are never small data. */
7685 if (TREE_CODE (exp) == FUNCTION_DECL)
7686 return false;
7688 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7690 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7692 if (strcmp (section, ".sdata") == 0
7693 || strncmp (section, ".sdata.", 7) == 0
7694 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
7695 || strcmp (section, ".sbss") == 0
7696 || strncmp (section, ".sbss.", 6) == 0
7697 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
7698 return true;
7700 else
7702 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7704 /* If this is an incomplete type with size 0, then we can't put it
7705 in sdata because it might be too big when completed. */
7706 if (size > 0 && size <= ia64_section_threshold)
7707 return true;
7710 return false;
7713 /* Output assembly directives for prologue regions. */
7715 /* The current basic block number. */
7717 static bool last_block;
7719 /* True if we need a copy_state command at the start of the next block. */
7721 static bool need_copy_state;
7723 /* The function emits unwind directives for the start of an epilogue. */
7725 static void
7726 process_epilogue (void)
7728 /* If this isn't the last block of the function, then we need to label the
7729 current state, and copy it back in at the start of the next block. */
7731 if (!last_block)
7733 fprintf (asm_out_file, "\t.label_state %d\n",
7734 ++cfun->machine->state_num);
7735 need_copy_state = true;
7738 fprintf (asm_out_file, "\t.restore sp\n");
7741 /* This function processes a SET pattern looking for specific patterns
7742 which result in emitting an assembly directive required for unwinding. */
7744 static int
7745 process_set (FILE *asm_out_file, rtx pat)
7747 rtx src = SET_SRC (pat);
7748 rtx dest = SET_DEST (pat);
7749 int src_regno, dest_regno;
7751 /* Look for the ALLOC insn. */
7752 if (GET_CODE (src) == UNSPEC_VOLATILE
7753 && XINT (src, 1) == UNSPECV_ALLOC
7754 && GET_CODE (dest) == REG)
7756 dest_regno = REGNO (dest);
7758 /* If this is the final destination for ar.pfs, then this must
7759 be the alloc in the prologue. */
7760 if (dest_regno == current_frame_info.reg_save_ar_pfs)
7761 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7762 ia64_dbx_register_number (dest_regno));
7763 else
7765 /* This must be an alloc before a sibcall. We must drop the
7766 old frame info. The easiest way to drop the old frame
7767 info is to ensure we had a ".restore sp" directive
7768 followed by a new prologue. If the procedure doesn't
7769 have a memory-stack frame, we'll issue a dummy ".restore
7770 sp" now. */
7771 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
7772 /* if haven't done process_epilogue() yet, do it now */
7773 process_epilogue ();
7774 fprintf (asm_out_file, "\t.prologue\n");
7776 return 1;
7779 /* Look for SP = .... */
7780 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7782 if (GET_CODE (src) == PLUS)
7784 rtx op0 = XEXP (src, 0);
7785 rtx op1 = XEXP (src, 1);
7787 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
7789 if (INTVAL (op1) < 0)
7790 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
7791 -INTVAL (op1));
7792 else
7793 process_epilogue ();
7795 else
7797 gcc_assert (GET_CODE (src) == REG
7798 && REGNO (src) == HARD_FRAME_POINTER_REGNUM);
7799 process_epilogue ();
7802 return 1;
7805 /* Register move we need to look at. */
7806 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7808 src_regno = REGNO (src);
7809 dest_regno = REGNO (dest);
7811 switch (src_regno)
7813 case BR_REG (0):
7814 /* Saving return address pointer. */
7815 gcc_assert (dest_regno == current_frame_info.reg_save_b0);
7816 fprintf (asm_out_file, "\t.save rp, r%d\n",
7817 ia64_dbx_register_number (dest_regno));
7818 return 1;
7820 case PR_REG (0):
7821 gcc_assert (dest_regno == current_frame_info.reg_save_pr);
7822 fprintf (asm_out_file, "\t.save pr, r%d\n",
7823 ia64_dbx_register_number (dest_regno));
7824 return 1;
7826 case AR_UNAT_REGNUM:
7827 gcc_assert (dest_regno == current_frame_info.reg_save_ar_unat);
7828 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7829 ia64_dbx_register_number (dest_regno));
7830 return 1;
7832 case AR_LC_REGNUM:
7833 gcc_assert (dest_regno == current_frame_info.reg_save_ar_lc);
7834 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7835 ia64_dbx_register_number (dest_regno));
7836 return 1;
7838 case STACK_POINTER_REGNUM:
7839 gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM
7840 && frame_pointer_needed);
7841 fprintf (asm_out_file, "\t.vframe r%d\n",
7842 ia64_dbx_register_number (dest_regno));
7843 return 1;
7845 default:
7846 /* Everything else should indicate being stored to memory. */
7847 gcc_unreachable ();
7851 /* Memory store we need to look at. */
7852 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7854 long off;
7855 rtx base;
7856 const char *saveop;
7858 if (GET_CODE (XEXP (dest, 0)) == REG)
7860 base = XEXP (dest, 0);
7861 off = 0;
7863 else
7865 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
7866 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
7867 base = XEXP (XEXP (dest, 0), 0);
7868 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7871 if (base == hard_frame_pointer_rtx)
7873 saveop = ".savepsp";
7874 off = - off;
7876 else
7878 gcc_assert (base == stack_pointer_rtx);
7879 saveop = ".savesp";
7882 src_regno = REGNO (src);
7883 switch (src_regno)
7885 case BR_REG (0):
7886 gcc_assert (!current_frame_info.reg_save_b0);
7887 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7888 return 1;
7890 case PR_REG (0):
7891 gcc_assert (!current_frame_info.reg_save_pr);
7892 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7893 return 1;
7895 case AR_LC_REGNUM:
7896 gcc_assert (!current_frame_info.reg_save_ar_lc);
7897 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7898 return 1;
7900 case AR_PFS_REGNUM:
7901 gcc_assert (!current_frame_info.reg_save_ar_pfs);
7902 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7903 return 1;
7905 case AR_UNAT_REGNUM:
7906 gcc_assert (!current_frame_info.reg_save_ar_unat);
7907 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7908 return 1;
7910 case GR_REG (4):
7911 case GR_REG (5):
7912 case GR_REG (6):
7913 case GR_REG (7):
7914 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7915 1 << (src_regno - GR_REG (4)));
7916 return 1;
7918 case BR_REG (1):
7919 case BR_REG (2):
7920 case BR_REG (3):
7921 case BR_REG (4):
7922 case BR_REG (5):
7923 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7924 1 << (src_regno - BR_REG (1)));
7925 return 1;
7927 case FR_REG (2):
7928 case FR_REG (3):
7929 case FR_REG (4):
7930 case FR_REG (5):
7931 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7932 1 << (src_regno - FR_REG (2)));
7933 return 1;
7935 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7936 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7937 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7938 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7939 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7940 1 << (src_regno - FR_REG (12)));
7941 return 1;
7943 default:
7944 return 0;
7948 return 0;
7952 /* This function looks at a single insn and emits any directives
7953 required to unwind this insn. */
7954 void
7955 process_for_unwind_directive (FILE *asm_out_file, rtx insn)
7957 if (flag_unwind_tables
7958 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7960 rtx pat;
7962 if (GET_CODE (insn) == NOTE
7963 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7965 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7967 /* Restore unwind state from immediately before the epilogue. */
7968 if (need_copy_state)
7970 fprintf (asm_out_file, "\t.body\n");
7971 fprintf (asm_out_file, "\t.copy_state %d\n",
7972 cfun->machine->state_num);
7973 need_copy_state = false;
7977 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7978 return;
7980 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7981 if (pat)
7982 pat = XEXP (pat, 0);
7983 else
7984 pat = PATTERN (insn);
7986 switch (GET_CODE (pat))
7988 case SET:
7989 process_set (asm_out_file, pat);
7990 break;
7992 case PARALLEL:
7994 int par_index;
7995 int limit = XVECLEN (pat, 0);
7996 for (par_index = 0; par_index < limit; par_index++)
7998 rtx x = XVECEXP (pat, 0, par_index);
7999 if (GET_CODE (x) == SET)
8000 process_set (asm_out_file, x);
8002 break;
8005 default:
8006 gcc_unreachable ();
8012 enum ia64_builtins
8014 IA64_BUILTIN_BSP,
8015 IA64_BUILTIN_FLUSHRS
8018 void
8019 ia64_init_builtins (void)
8021 tree fpreg_type;
8022 tree float80_type;
8024 /* The __fpreg type. */
8025 fpreg_type = make_node (REAL_TYPE);
8026 /* ??? The back end should know to load/save __fpreg variables using
8027 the ldf.fill and stf.spill instructions. */
8028 TYPE_PRECISION (fpreg_type) = 80;
8029 layout_type (fpreg_type);
8030 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
8032 /* The __float80 type. */
8033 float80_type = make_node (REAL_TYPE);
8034 TYPE_PRECISION (float80_type) = 80;
8035 layout_type (float80_type);
8036 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
8038 /* The __float128 type. */
8039 if (!TARGET_HPUX)
8041 tree float128_type = make_node (REAL_TYPE);
8042 TYPE_PRECISION (float128_type) = 128;
8043 layout_type (float128_type);
8044 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
8046 else
8047 /* Under HPUX, this is a synonym for "long double". */
8048 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
8049 "__float128");
8051 #define def_builtin(name, type, code) \
8052 lang_hooks.builtin_function ((name), (type), (code), BUILT_IN_MD, \
8053 NULL, NULL_TREE)
8055 def_builtin ("__builtin_ia64_bsp",
8056 build_function_type (ptr_type_node, void_list_node),
8057 IA64_BUILTIN_BSP);
8059 def_builtin ("__builtin_ia64_flushrs",
8060 build_function_type (void_type_node, void_list_node),
8061 IA64_BUILTIN_FLUSHRS);
8063 #undef def_builtin
8067 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
8068 enum machine_mode mode ATTRIBUTE_UNUSED,
8069 int ignore ATTRIBUTE_UNUSED)
8071 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8072 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8074 switch (fcode)
8076 case IA64_BUILTIN_BSP:
8077 if (! target || ! register_operand (target, DImode))
8078 target = gen_reg_rtx (DImode);
8079 emit_insn (gen_bsp_value (target));
8080 #ifdef POINTERS_EXTEND_UNSIGNED
8081 target = convert_memory_address (ptr_mode, target);
8082 #endif
8083 return target;
8085 case IA64_BUILTIN_FLUSHRS:
8086 emit_insn (gen_flushrs ());
8087 return const0_rtx;
8089 default:
8090 break;
8093 return NULL_RTX;
8096 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8097 most significant bits of the stack slot. */
8099 enum direction
8100 ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
8102 /* Exception to normal case for structures/unions/etc. */
8104 if (type && AGGREGATE_TYPE_P (type)
8105 && int_size_in_bytes (type) < UNITS_PER_WORD)
8106 return upward;
8108 /* Fall back to the default. */
8109 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
8112 /* Linked list of all external functions that are to be emitted by GCC.
8113 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8114 order to avoid putting out names that are never really used. */
8116 struct extern_func_list GTY(())
8118 struct extern_func_list *next;
8119 tree decl;
8122 static GTY(()) struct extern_func_list *extern_func_head;
8124 static void
8125 ia64_hpux_add_extern_decl (tree decl)
8127 struct extern_func_list *p = ggc_alloc (sizeof (struct extern_func_list));
8129 p->decl = decl;
8130 p->next = extern_func_head;
8131 extern_func_head = p;
8134 /* Print out the list of used global functions. */
8136 static void
8137 ia64_hpux_file_end (void)
8139 struct extern_func_list *p;
8141 for (p = extern_func_head; p; p = p->next)
8143 tree decl = p->decl;
8144 tree id = DECL_ASSEMBLER_NAME (decl);
8146 gcc_assert (id);
8148 if (!TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (id))
8150 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
8152 TREE_ASM_WRITTEN (decl) = 1;
8153 (*targetm.asm_out.globalize_label) (asm_out_file, name);
8154 fputs (TYPE_ASM_OP, asm_out_file);
8155 assemble_name (asm_out_file, name);
8156 fprintf (asm_out_file, "," TYPE_OPERAND_FMT "\n", "function");
8160 extern_func_head = 0;
8163 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
8164 modes of word_mode and larger. Rename the TFmode libfuncs using the
8165 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
8166 backward compatibility. */
8168 static void
8169 ia64_init_libfuncs (void)
8171 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
8172 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
8173 set_optab_libfunc (smod_optab, SImode, "__modsi3");
8174 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
8176 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
8177 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
8178 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
8179 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
8180 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
8182 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
8183 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
8184 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
8185 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
8186 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
8187 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
8189 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
8190 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
8191 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
8192 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
8194 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
8195 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
8198 /* Rename all the TFmode libfuncs using the HPUX conventions. */
8200 static void
8201 ia64_hpux_init_libfuncs (void)
8203 ia64_init_libfuncs ();
8205 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
8206 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
8207 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
8209 /* ia64_expand_compare uses this. */
8210 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
8212 /* These should never be used. */
8213 set_optab_libfunc (eq_optab, TFmode, 0);
8214 set_optab_libfunc (ne_optab, TFmode, 0);
8215 set_optab_libfunc (gt_optab, TFmode, 0);
8216 set_optab_libfunc (ge_optab, TFmode, 0);
8217 set_optab_libfunc (lt_optab, TFmode, 0);
8218 set_optab_libfunc (le_optab, TFmode, 0);
8221 /* Rename the division and modulus functions in VMS. */
8223 static void
8224 ia64_vms_init_libfuncs (void)
8226 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
8227 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
8228 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
8229 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
8230 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
8231 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
8232 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
8233 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
8236 /* Rename the TFmode libfuncs available from soft-fp in glibc using
8237 the HPUX conventions. */
8239 static void
8240 ia64_sysv4_init_libfuncs (void)
8242 ia64_init_libfuncs ();
8244 /* These functions are not part of the HPUX TFmode interface. We
8245 use them instead of _U_Qfcmp, which doesn't work the way we
8246 expect. */
8247 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
8248 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
8249 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
8250 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
8251 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
8252 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
8254 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
8255 glibc doesn't have them. */
8258 /* Switch to the section to which we should output X. The only thing
8259 special we do here is to honor small data. */
8261 static void
8262 ia64_select_rtx_section (enum machine_mode mode, rtx x,
8263 unsigned HOST_WIDE_INT align)
8265 if (GET_MODE_SIZE (mode) > 0
8266 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8267 sdata_section ();
8268 else
8269 default_elf_select_rtx_section (mode, x, align);
8272 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8273 Pretend flag_pic is always set. */
8275 static void
8276 ia64_rwreloc_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
8278 default_elf_select_section_1 (exp, reloc, align, true);
8281 static void
8282 ia64_rwreloc_unique_section (tree decl, int reloc)
8284 default_unique_section_1 (decl, reloc, true);
8287 static void
8288 ia64_rwreloc_select_rtx_section (enum machine_mode mode, rtx x,
8289 unsigned HOST_WIDE_INT align)
8291 int save_pic = flag_pic;
8292 flag_pic = 1;
8293 ia64_select_rtx_section (mode, x, align);
8294 flag_pic = save_pic;
8297 #ifndef TARGET_RWRELOC
8298 #define TARGET_RWRELOC flag_pic
8299 #endif
8301 static unsigned int
8302 ia64_section_type_flags (tree decl, const char *name, int reloc)
8304 unsigned int flags = 0;
8306 if (strcmp (name, ".sdata") == 0
8307 || strncmp (name, ".sdata.", 7) == 0
8308 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
8309 || strncmp (name, ".sdata2.", 8) == 0
8310 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
8311 || strcmp (name, ".sbss") == 0
8312 || strncmp (name, ".sbss.", 6) == 0
8313 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
8314 flags = SECTION_SMALL;
8316 flags |= default_section_type_flags_1 (decl, name, reloc, TARGET_RWRELOC);
8317 return flags;
8320 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
8321 structure type and that the address of that type should be passed
8322 in out0, rather than in r8. */
8324 static bool
8325 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
8327 tree ret_type = TREE_TYPE (fntype);
8329 /* The Itanium C++ ABI requires that out0, rather than r8, be used
8330 as the structure return address parameter, if the return value
8331 type has a non-trivial copy constructor or destructor. It is not
8332 clear if this same convention should be used for other
8333 programming languages. Until G++ 3.4, we incorrectly used r8 for
8334 these return values. */
8335 return (abi_version_at_least (2)
8336 && ret_type
8337 && TYPE_MODE (ret_type) == BLKmode
8338 && TREE_ADDRESSABLE (ret_type)
8339 && strcmp (lang_hooks.name, "GNU C++") == 0);
8342 /* Output the assembler code for a thunk function. THUNK_DECL is the
8343 declaration for the thunk function itself, FUNCTION is the decl for
8344 the target function. DELTA is an immediate constant offset to be
8345 added to THIS. If VCALL_OFFSET is nonzero, the word at
8346 *(*this + vcall_offset) should be added to THIS. */
8348 static void
8349 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
8350 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8351 tree function)
8353 rtx this, insn, funexp;
8354 unsigned int this_parmno;
8355 unsigned int this_regno;
8357 reload_completed = 1;
8358 epilogue_completed = 1;
8359 no_new_pseudos = 1;
8360 reset_block_changes ();
8362 /* Set things up as ia64_expand_prologue might. */
8363 last_scratch_gr_reg = 15;
8365 memset (&current_frame_info, 0, sizeof (current_frame_info));
8366 current_frame_info.spill_cfa_off = -16;
8367 current_frame_info.n_input_regs = 1;
8368 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8370 /* Mark the end of the (empty) prologue. */
8371 emit_note (NOTE_INSN_PROLOGUE_END);
8373 /* Figure out whether "this" will be the first parameter (the
8374 typical case) or the second parameter (as happens when the
8375 virtual function returns certain class objects). */
8376 this_parmno
8377 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
8378 ? 1 : 0);
8379 this_regno = IN_REG (this_parmno);
8380 if (!TARGET_REG_NAMES)
8381 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
8383 this = gen_rtx_REG (Pmode, this_regno);
8384 if (TARGET_ILP32)
8386 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
8387 REG_POINTER (tmp) = 1;
8388 if (delta && CONST_OK_FOR_I (delta))
8390 emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
8391 delta = 0;
8393 else
8394 emit_insn (gen_ptr_extend (this, tmp));
8397 /* Apply the constant offset, if required. */
8398 if (delta)
8400 rtx delta_rtx = GEN_INT (delta);
8402 if (!CONST_OK_FOR_I (delta))
8404 rtx tmp = gen_rtx_REG (Pmode, 2);
8405 emit_move_insn (tmp, delta_rtx);
8406 delta_rtx = tmp;
8408 emit_insn (gen_adddi3 (this, this, delta_rtx));
8411 /* Apply the offset from the vtable, if required. */
8412 if (vcall_offset)
8414 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8415 rtx tmp = gen_rtx_REG (Pmode, 2);
8417 if (TARGET_ILP32)
8419 rtx t = gen_rtx_REG (ptr_mode, 2);
8420 REG_POINTER (t) = 1;
8421 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
8422 if (CONST_OK_FOR_I (vcall_offset))
8424 emit_insn (gen_ptr_extend_plus_imm (tmp, t,
8425 vcall_offset_rtx));
8426 vcall_offset = 0;
8428 else
8429 emit_insn (gen_ptr_extend (tmp, t));
8431 else
8432 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8434 if (vcall_offset)
8436 if (!CONST_OK_FOR_J (vcall_offset))
8438 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8439 emit_move_insn (tmp2, vcall_offset_rtx);
8440 vcall_offset_rtx = tmp2;
8442 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8445 if (TARGET_ILP32)
8446 emit_move_insn (gen_rtx_REG (ptr_mode, 2),
8447 gen_rtx_MEM (ptr_mode, tmp));
8448 else
8449 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8451 emit_insn (gen_adddi3 (this, this, tmp));
8454 /* Generate a tail call to the target function. */
8455 if (! TREE_USED (function))
8457 assemble_external (function);
8458 TREE_USED (function) = 1;
8460 funexp = XEXP (DECL_RTL (function), 0);
8461 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8462 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8463 insn = get_last_insn ();
8464 SIBLING_CALL_P (insn) = 1;
8466 /* Code generation for calls relies on splitting. */
8467 reload_completed = 1;
8468 epilogue_completed = 1;
8469 try_split (PATTERN (insn), insn, 0);
8471 emit_barrier ();
8473 /* Run just enough of rest_of_compilation to get the insns emitted.
8474 There's not really enough bulk here to make other passes such as
8475 instruction scheduling worth while. Note that use_thunk calls
8476 assemble_start_function and assemble_end_function. */
8478 insn_locators_initialize ();
8479 emit_all_insn_group_barriers (NULL);
8480 insn = get_insns ();
8481 shorten_branches (insn);
8482 final_start_function (insn, file, 1);
8483 final (insn, file, 1);
8484 final_end_function ();
8486 reload_completed = 0;
8487 epilogue_completed = 0;
8488 no_new_pseudos = 0;
8491 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
8493 static rtx
8494 ia64_struct_value_rtx (tree fntype,
8495 int incoming ATTRIBUTE_UNUSED)
8497 if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
8498 return NULL_RTX;
8499 return gen_rtx_REG (Pmode, GR_REG (8));
8502 static bool
8503 ia64_scalar_mode_supported_p (enum machine_mode mode)
8505 switch (mode)
8507 case QImode:
8508 case HImode:
8509 case SImode:
8510 case DImode:
8511 case TImode:
8512 return true;
8514 case SFmode:
8515 case DFmode:
8516 case XFmode:
8517 return true;
8519 case TFmode:
8520 return TARGET_HPUX;
8522 default:
8523 return false;
8527 static bool
8528 ia64_vector_mode_supported_p (enum machine_mode mode)
8530 switch (mode)
8532 case V8QImode:
8533 case V4HImode:
8534 case V2SImode:
8535 return true;
8537 case V2SFmode:
8538 return true;
8540 default:
8541 return false;
8545 #include "gt-ia64.h"