* common.opt (-Wattributes): New. Default true.
[official-gcc.git] / gcc / config / ia64 / ia64.c
bloba52b7d850d484769dcc2c6d77dc04e7aecb9193b
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson <wilson@cygnus.com> and
5 David Mosberger <davidm@hpl.hp.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "regs.h"
31 #include "hard-reg-set.h"
32 #include "real.h"
33 #include "insn-config.h"
34 #include "conditions.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "recog.h"
39 #include "expr.h"
40 #include "optabs.h"
41 #include "except.h"
42 #include "function.h"
43 #include "ggc.h"
44 #include "basic-block.h"
45 #include "toplev.h"
46 #include "sched-int.h"
47 #include "timevar.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "tm_p.h"
51 #include "hashtab.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
54 #include "tree-gimple.h"
56 /* This is used for communication between ASM_OUTPUT_LABEL and
57 ASM_OUTPUT_LABELREF. */
58 int ia64_asm_output_label = 0;
60 /* Define the information needed to generate branch and scc insns. This is
61 stored from the compare operation. */
62 struct rtx_def * ia64_compare_op0;
63 struct rtx_def * ia64_compare_op1;
65 /* Register names for ia64_expand_prologue. */
66 static const char * const ia64_reg_numbers[96] =
67 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
68 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
69 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
70 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
71 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
72 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
73 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
74 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
75 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
76 "r104","r105","r106","r107","r108","r109","r110","r111",
77 "r112","r113","r114","r115","r116","r117","r118","r119",
78 "r120","r121","r122","r123","r124","r125","r126","r127"};
80 /* ??? These strings could be shared with REGISTER_NAMES. */
81 static const char * const ia64_input_reg_names[8] =
82 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
84 /* ??? These strings could be shared with REGISTER_NAMES. */
85 static const char * const ia64_local_reg_names[80] =
86 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
87 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
88 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
89 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
90 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
91 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
92 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
93 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
94 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
95 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
97 /* ??? These strings could be shared with REGISTER_NAMES. */
98 static const char * const ia64_output_reg_names[8] =
99 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
101 /* Determines whether we use adds, addl, or movl to generate our
102 TLS immediate offsets. */
103 int ia64_tls_size = 22;
105 /* Which cpu are we scheduling for. */
106 enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
108 /* Determines whether we run our final scheduling pass or not. We always
109 avoid the normal second scheduling pass. */
110 static int ia64_flag_schedule_insns2;
112 /* Determines whether we run variable tracking in machine dependent
113 reorganization. */
114 static int ia64_flag_var_tracking;
116 /* Variables which are this size or smaller are put in the sdata/sbss
117 sections. */
119 unsigned int ia64_section_threshold;
121 /* The following variable is used by the DFA insn scheduler. The value is
122 TRUE if we do insn bundling instead of insn scheduling. */
123 int bundling_p = 0;
125 /* Structure to be filled in by ia64_compute_frame_size with register
126 save masks and offsets for the current function. */
128 struct ia64_frame_info
130 HOST_WIDE_INT total_size; /* size of the stack frame, not including
131 the caller's scratch area. */
132 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
133 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
134 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
135 HARD_REG_SET mask; /* mask of saved registers. */
136 unsigned int gr_used_mask; /* mask of registers in use as gr spill
137 registers or long-term scratches. */
138 int n_spilled; /* number of spilled registers. */
139 int reg_fp; /* register for fp. */
140 int reg_save_b0; /* save register for b0. */
141 int reg_save_pr; /* save register for prs. */
142 int reg_save_ar_pfs; /* save register for ar.pfs. */
143 int reg_save_ar_unat; /* save register for ar.unat. */
144 int reg_save_ar_lc; /* save register for ar.lc. */
145 int reg_save_gp; /* save register for gp. */
146 int n_input_regs; /* number of input registers used. */
147 int n_local_regs; /* number of local registers used. */
148 int n_output_regs; /* number of output registers used. */
149 int n_rotate_regs; /* number of rotating registers used. */
151 char need_regstk; /* true if a .regstk directive needed. */
152 char initialized; /* true if the data is finalized. */
155 /* Current frame information calculated by ia64_compute_frame_size. */
156 static struct ia64_frame_info current_frame_info;
158 static int ia64_first_cycle_multipass_dfa_lookahead (void);
159 static void ia64_dependencies_evaluation_hook (rtx, rtx);
160 static void ia64_init_dfa_pre_cycle_insn (void);
161 static rtx ia64_dfa_pre_cycle_insn (void);
162 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
163 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
164 static rtx gen_tls_get_addr (void);
165 static rtx gen_thread_pointer (void);
166 static int find_gr_spill (int);
167 static int next_scratch_gr_reg (void);
168 static void mark_reg_gr_used_mask (rtx, void *);
169 static void ia64_compute_frame_size (HOST_WIDE_INT);
170 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
171 static void finish_spill_pointers (void);
172 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
173 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
174 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
175 static rtx gen_movdi_x (rtx, rtx, rtx);
176 static rtx gen_fr_spill_x (rtx, rtx, rtx);
177 static rtx gen_fr_restore_x (rtx, rtx, rtx);
179 static enum machine_mode hfa_element_mode (tree, bool);
180 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
181 tree, int *, int);
182 static bool ia64_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
183 tree, bool);
184 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
185 tree, bool);
186 static bool ia64_function_ok_for_sibcall (tree, tree);
187 static bool ia64_return_in_memory (tree, tree);
188 static bool ia64_rtx_costs (rtx, int, int, int *);
189 static void fix_range (const char *);
190 static bool ia64_handle_option (size_t, const char *, int);
191 static struct machine_function * ia64_init_machine_status (void);
192 static void emit_insn_group_barriers (FILE *);
193 static void emit_all_insn_group_barriers (FILE *);
194 static void final_emit_insn_group_barriers (FILE *);
195 static void emit_predicate_relation_info (void);
196 static void ia64_reorg (void);
197 static bool ia64_in_small_data_p (tree);
198 static void process_epilogue (void);
199 static int process_set (FILE *, rtx);
201 static bool ia64_assemble_integer (rtx, unsigned int, int);
202 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
203 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
204 static void ia64_output_function_end_prologue (FILE *);
206 static int ia64_issue_rate (void);
207 static int ia64_adjust_cost (rtx, rtx, rtx, int);
208 static void ia64_sched_init (FILE *, int, int);
209 static void ia64_sched_finish (FILE *, int);
210 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
211 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
212 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
213 static int ia64_variable_issue (FILE *, int, rtx, int);
215 static struct bundle_state *get_free_bundle_state (void);
216 static void free_bundle_state (struct bundle_state *);
217 static void initiate_bundle_states (void);
218 static void finish_bundle_states (void);
219 static unsigned bundle_state_hash (const void *);
220 static int bundle_state_eq_p (const void *, const void *);
221 static int insert_bundle_state (struct bundle_state *);
222 static void initiate_bundle_state_table (void);
223 static void finish_bundle_state_table (void);
224 static int try_issue_nops (struct bundle_state *, int);
225 static int try_issue_insn (struct bundle_state *, rtx);
226 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
227 static int get_max_pos (state_t);
228 static int get_template (state_t, int);
230 static rtx get_next_important_insn (rtx, rtx);
231 static void bundling (FILE *, int, rtx, rtx);
233 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
234 HOST_WIDE_INT, tree);
235 static void ia64_file_start (void);
237 static void ia64_select_rtx_section (enum machine_mode, rtx,
238 unsigned HOST_WIDE_INT);
239 static void ia64_rwreloc_select_section (tree, int, unsigned HOST_WIDE_INT)
240 ATTRIBUTE_UNUSED;
241 static void ia64_rwreloc_unique_section (tree, int)
242 ATTRIBUTE_UNUSED;
243 static void ia64_rwreloc_select_rtx_section (enum machine_mode, rtx,
244 unsigned HOST_WIDE_INT)
245 ATTRIBUTE_UNUSED;
246 static unsigned int ia64_section_type_flags (tree, const char *, int);
247 static void ia64_hpux_add_extern_decl (tree decl)
248 ATTRIBUTE_UNUSED;
249 static void ia64_hpux_file_end (void)
250 ATTRIBUTE_UNUSED;
251 static void ia64_init_libfuncs (void)
252 ATTRIBUTE_UNUSED;
253 static void ia64_hpux_init_libfuncs (void)
254 ATTRIBUTE_UNUSED;
255 static void ia64_sysv4_init_libfuncs (void)
256 ATTRIBUTE_UNUSED;
257 static void ia64_vms_init_libfuncs (void)
258 ATTRIBUTE_UNUSED;
260 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
261 static void ia64_encode_section_info (tree, rtx, int);
262 static rtx ia64_struct_value_rtx (tree, int);
263 static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
264 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
265 static bool ia64_vector_mode_supported_p (enum machine_mode mode);
266 static bool ia64_cannot_force_const_mem (rtx);
268 /* Table of valid machine attributes. */
269 static const struct attribute_spec ia64_attribute_table[] =
271 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
272 { "syscall_linkage", 0, 0, false, true, true, NULL },
273 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
274 { NULL, 0, 0, false, false, false, NULL }
277 /* Initialize the GCC target structure. */
278 #undef TARGET_ATTRIBUTE_TABLE
279 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
281 #undef TARGET_INIT_BUILTINS
282 #define TARGET_INIT_BUILTINS ia64_init_builtins
284 #undef TARGET_EXPAND_BUILTIN
285 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
287 #undef TARGET_ASM_BYTE_OP
288 #define TARGET_ASM_BYTE_OP "\tdata1\t"
289 #undef TARGET_ASM_ALIGNED_HI_OP
290 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
291 #undef TARGET_ASM_ALIGNED_SI_OP
292 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
293 #undef TARGET_ASM_ALIGNED_DI_OP
294 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
295 #undef TARGET_ASM_UNALIGNED_HI_OP
296 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
297 #undef TARGET_ASM_UNALIGNED_SI_OP
298 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
299 #undef TARGET_ASM_UNALIGNED_DI_OP
300 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
301 #undef TARGET_ASM_INTEGER
302 #define TARGET_ASM_INTEGER ia64_assemble_integer
304 #undef TARGET_ASM_FUNCTION_PROLOGUE
305 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
306 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
307 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
308 #undef TARGET_ASM_FUNCTION_EPILOGUE
309 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
311 #undef TARGET_IN_SMALL_DATA_P
312 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
314 #undef TARGET_SCHED_ADJUST_COST
315 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
316 #undef TARGET_SCHED_ISSUE_RATE
317 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
318 #undef TARGET_SCHED_VARIABLE_ISSUE
319 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
320 #undef TARGET_SCHED_INIT
321 #define TARGET_SCHED_INIT ia64_sched_init
322 #undef TARGET_SCHED_FINISH
323 #define TARGET_SCHED_FINISH ia64_sched_finish
324 #undef TARGET_SCHED_REORDER
325 #define TARGET_SCHED_REORDER ia64_sched_reorder
326 #undef TARGET_SCHED_REORDER2
327 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
329 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
330 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
332 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
333 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
335 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
336 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
337 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
338 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
340 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
341 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
342 ia64_first_cycle_multipass_dfa_lookahead_guard
344 #undef TARGET_SCHED_DFA_NEW_CYCLE
345 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
347 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
348 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
349 #undef TARGET_PASS_BY_REFERENCE
350 #define TARGET_PASS_BY_REFERENCE ia64_pass_by_reference
351 #undef TARGET_ARG_PARTIAL_BYTES
352 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
354 #undef TARGET_ASM_OUTPUT_MI_THUNK
355 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
356 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
357 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
359 #undef TARGET_ASM_FILE_START
360 #define TARGET_ASM_FILE_START ia64_file_start
362 #undef TARGET_RTX_COSTS
363 #define TARGET_RTX_COSTS ia64_rtx_costs
364 #undef TARGET_ADDRESS_COST
365 #define TARGET_ADDRESS_COST hook_int_rtx_0
367 #undef TARGET_MACHINE_DEPENDENT_REORG
368 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
370 #undef TARGET_ENCODE_SECTION_INFO
371 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
373 #undef TARGET_SECTION_TYPE_FLAGS
374 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
376 /* ??? ABI doesn't allow us to define this. */
377 #if 0
378 #undef TARGET_PROMOTE_FUNCTION_ARGS
379 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
380 #endif
382 /* ??? ABI doesn't allow us to define this. */
383 #if 0
384 #undef TARGET_PROMOTE_FUNCTION_RETURN
385 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
386 #endif
388 /* ??? Investigate. */
389 #if 0
390 #undef TARGET_PROMOTE_PROTOTYPES
391 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
392 #endif
394 #undef TARGET_STRUCT_VALUE_RTX
395 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
396 #undef TARGET_RETURN_IN_MEMORY
397 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
398 #undef TARGET_SETUP_INCOMING_VARARGS
399 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
400 #undef TARGET_STRICT_ARGUMENT_NAMING
401 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
402 #undef TARGET_MUST_PASS_IN_STACK
403 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
405 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
406 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
408 #undef TARGET_UNWIND_EMIT
409 #define TARGET_UNWIND_EMIT process_for_unwind_directive
411 #undef TARGET_SCALAR_MODE_SUPPORTED_P
412 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
413 #undef TARGET_VECTOR_MODE_SUPPORTED_P
414 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
416 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
417 in an order different from the specified program order. */
418 #undef TARGET_RELAXED_ORDERING
419 #define TARGET_RELAXED_ORDERING true
421 #undef TARGET_DEFAULT_TARGET_FLAGS
422 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
423 #undef TARGET_HANDLE_OPTION
424 #define TARGET_HANDLE_OPTION ia64_handle_option
426 #undef TARGET_CANNOT_FORCE_CONST_MEM
427 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
429 struct gcc_target targetm = TARGET_INITIALIZER;
431 typedef enum
433 ADDR_AREA_NORMAL, /* normal address area */
434 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
436 ia64_addr_area;
438 static GTY(()) tree small_ident1;
439 static GTY(()) tree small_ident2;
441 static void
442 init_idents (void)
444 if (small_ident1 == 0)
446 small_ident1 = get_identifier ("small");
447 small_ident2 = get_identifier ("__small__");
451 /* Retrieve the address area that has been chosen for the given decl. */
453 static ia64_addr_area
454 ia64_get_addr_area (tree decl)
456 tree model_attr;
458 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
459 if (model_attr)
461 tree id;
463 init_idents ();
464 id = TREE_VALUE (TREE_VALUE (model_attr));
465 if (id == small_ident1 || id == small_ident2)
466 return ADDR_AREA_SMALL;
468 return ADDR_AREA_NORMAL;
471 static tree
472 ia64_handle_model_attribute (tree *node, tree name, tree args,
473 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
475 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
476 ia64_addr_area area;
477 tree arg, decl = *node;
479 init_idents ();
480 arg = TREE_VALUE (args);
481 if (arg == small_ident1 || arg == small_ident2)
483 addr_area = ADDR_AREA_SMALL;
485 else
487 warning (OPT_Wattributes, "invalid argument of %qs attribute",
488 IDENTIFIER_POINTER (name));
489 *no_add_attrs = true;
492 switch (TREE_CODE (decl))
494 case VAR_DECL:
495 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
496 == FUNCTION_DECL)
497 && !TREE_STATIC (decl))
499 error ("%Jan address area attribute cannot be specified for "
500 "local variables", decl, decl);
501 *no_add_attrs = true;
503 area = ia64_get_addr_area (decl);
504 if (area != ADDR_AREA_NORMAL && addr_area != area)
506 error ("%Jaddress area of '%s' conflicts with previous "
507 "declaration", decl, decl);
508 *no_add_attrs = true;
510 break;
512 case FUNCTION_DECL:
513 error ("%Jaddress area attribute cannot be specified for functions",
514 decl, decl);
515 *no_add_attrs = true;
516 break;
518 default:
519 warning (OPT_Wattributes, "%qs attribute ignored",
520 IDENTIFIER_POINTER (name));
521 *no_add_attrs = true;
522 break;
525 return NULL_TREE;
528 static void
529 ia64_encode_addr_area (tree decl, rtx symbol)
531 int flags;
533 flags = SYMBOL_REF_FLAGS (symbol);
534 switch (ia64_get_addr_area (decl))
536 case ADDR_AREA_NORMAL: break;
537 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
538 default: gcc_unreachable ();
540 SYMBOL_REF_FLAGS (symbol) = flags;
543 static void
544 ia64_encode_section_info (tree decl, rtx rtl, int first)
546 default_encode_section_info (decl, rtl, first);
548 /* Careful not to prod global register variables. */
549 if (TREE_CODE (decl) == VAR_DECL
550 && GET_CODE (DECL_RTL (decl)) == MEM
551 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
552 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
553 ia64_encode_addr_area (decl, XEXP (rtl, 0));
556 /* Implement CONST_OK_FOR_LETTER_P. */
558 bool
559 ia64_const_ok_for_letter_p (HOST_WIDE_INT value, char c)
561 switch (c)
563 case 'I':
564 return CONST_OK_FOR_I (value);
565 case 'J':
566 return CONST_OK_FOR_J (value);
567 case 'K':
568 return CONST_OK_FOR_K (value);
569 case 'L':
570 return CONST_OK_FOR_L (value);
571 case 'M':
572 return CONST_OK_FOR_M (value);
573 case 'N':
574 return CONST_OK_FOR_N (value);
575 case 'O':
576 return CONST_OK_FOR_O (value);
577 case 'P':
578 return CONST_OK_FOR_P (value);
579 default:
580 return false;
584 /* Implement CONST_DOUBLE_OK_FOR_LETTER_P. */
586 bool
587 ia64_const_double_ok_for_letter_p (rtx value, char c)
589 switch (c)
591 case 'G':
592 return CONST_DOUBLE_OK_FOR_G (value);
593 default:
594 return false;
598 /* Implement EXTRA_CONSTRAINT. */
600 bool
601 ia64_extra_constraint (rtx value, char c)
603 switch (c)
605 case 'Q':
606 /* Non-volatile memory for FP_REG loads/stores. */
607 return memory_operand(value, VOIDmode) && !MEM_VOLATILE_P (value);
609 case 'R':
610 /* 1..4 for shladd arguments. */
611 return (GET_CODE (value) == CONST_INT
612 && INTVAL (value) >= 1 && INTVAL (value) <= 4);
614 case 'S':
615 /* Non-post-inc memory for asms and other unsavory creatures. */
616 return (GET_CODE (value) == MEM
617 && GET_RTX_CLASS (GET_CODE (XEXP (value, 0))) != RTX_AUTOINC
618 && (reload_in_progress || memory_operand (value, VOIDmode)));
620 case 'T':
621 /* Symbol ref to small-address-area. */
622 return small_addr_symbolic_operand (value, VOIDmode);
624 case 'U':
625 /* Vector zero. */
626 return value == CONST0_RTX (GET_MODE (value));
628 case 'W':
629 /* An integer vector, such that conversion to an integer yields a
630 value appropriate for an integer 'J' constraint. */
631 if (GET_CODE (value) == CONST_VECTOR
632 && GET_MODE_CLASS (GET_MODE (value)) == MODE_VECTOR_INT)
634 value = simplify_subreg (DImode, value, GET_MODE (value), 0);
635 return ia64_const_ok_for_letter_p (INTVAL (value), 'J');
637 return false;
639 case 'Y':
640 /* A V2SF vector containing elements that satisfy 'G'. */
641 return
642 (GET_CODE (value) == CONST_VECTOR
643 && GET_MODE (value) == V2SFmode
644 && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 0), 'G')
645 && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 1), 'G'));
647 default:
648 return false;
652 /* Return 1 if the operands of a move are ok. */
655 ia64_move_ok (rtx dst, rtx src)
657 /* If we're under init_recog_no_volatile, we'll not be able to use
658 memory_operand. So check the code directly and don't worry about
659 the validity of the underlying address, which should have been
660 checked elsewhere anyway. */
661 if (GET_CODE (dst) != MEM)
662 return 1;
663 if (GET_CODE (src) == MEM)
664 return 0;
665 if (register_operand (src, VOIDmode))
666 return 1;
668 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
669 if (INTEGRAL_MODE_P (GET_MODE (dst)))
670 return src == const0_rtx;
671 else
672 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
676 addp4_optimize_ok (rtx op1, rtx op2)
678 return (basereg_operand (op1, GET_MODE(op1)) !=
679 basereg_operand (op2, GET_MODE(op2)));
682 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
683 Return the length of the field, or <= 0 on failure. */
686 ia64_depz_field_mask (rtx rop, rtx rshift)
688 unsigned HOST_WIDE_INT op = INTVAL (rop);
689 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
691 /* Get rid of the zero bits we're shifting in. */
692 op >>= shift;
694 /* We must now have a solid block of 1's at bit 0. */
695 return exact_log2 (op + 1);
698 /* Return the TLS model to use for ADDR. */
700 static enum tls_model
701 tls_symbolic_operand_type (rtx addr)
703 enum tls_model tls_kind = 0;
705 if (GET_CODE (addr) == CONST)
707 if (GET_CODE (XEXP (addr, 0)) == PLUS
708 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
709 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
711 else if (GET_CODE (addr) == SYMBOL_REF)
712 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
714 return tls_kind;
717 /* Return true if X is a constant that is valid for some immediate
718 field in an instruction. */
720 bool
721 ia64_legitimate_constant_p (rtx x)
723 switch (GET_CODE (x))
725 case CONST_INT:
726 case LABEL_REF:
727 return true;
729 case CONST_DOUBLE:
730 if (GET_MODE (x) == VOIDmode)
731 return true;
732 return CONST_DOUBLE_OK_FOR_G (x);
734 case CONST:
735 case SYMBOL_REF:
736 return tls_symbolic_operand_type (x) == 0;
738 default:
739 return false;
743 /* Don't allow TLS addresses to get spilled to memory. */
745 static bool
746 ia64_cannot_force_const_mem (rtx x)
748 return tls_symbolic_operand_type (x) != 0;
751 /* Expand a symbolic constant load. */
753 bool
754 ia64_expand_load_address (rtx dest, rtx src)
756 gcc_assert (GET_CODE (dest) == REG);
758 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
759 having to pointer-extend the value afterward. Other forms of address
760 computation below are also more natural to compute as 64-bit quantities.
761 If we've been given an SImode destination register, change it. */
762 if (GET_MODE (dest) != Pmode)
763 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest), 0);
765 if (TARGET_NO_PIC)
766 return false;
767 if (small_addr_symbolic_operand (src, VOIDmode))
768 return false;
770 if (TARGET_AUTO_PIC)
771 emit_insn (gen_load_gprel64 (dest, src));
772 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
773 emit_insn (gen_load_fptr (dest, src));
774 else if (sdata_symbolic_operand (src, VOIDmode))
775 emit_insn (gen_load_gprel (dest, src));
776 else
778 HOST_WIDE_INT addend = 0;
779 rtx tmp;
781 /* We did split constant offsets in ia64_expand_move, and we did try
782 to keep them split in move_operand, but we also allowed reload to
783 rematerialize arbitrary constants rather than spill the value to
784 the stack and reload it. So we have to be prepared here to split
785 them apart again. */
786 if (GET_CODE (src) == CONST)
788 HOST_WIDE_INT hi, lo;
790 hi = INTVAL (XEXP (XEXP (src, 0), 1));
791 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
792 hi = hi - lo;
794 if (lo != 0)
796 addend = lo;
797 src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
801 tmp = gen_rtx_HIGH (Pmode, src);
802 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
803 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
805 tmp = gen_rtx_LO_SUM (Pmode, dest, src);
806 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
808 if (addend)
810 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
811 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
815 return true;
818 static GTY(()) rtx gen_tls_tga;
819 static rtx
820 gen_tls_get_addr (void)
822 if (!gen_tls_tga)
823 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
824 return gen_tls_tga;
827 static GTY(()) rtx thread_pointer_rtx;
828 static rtx
829 gen_thread_pointer (void)
831 if (!thread_pointer_rtx)
832 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
833 return thread_pointer_rtx;
836 static rtx
837 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
838 HOST_WIDE_INT addend)
840 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
841 rtx orig_op0 = op0, orig_op1 = op1;
842 HOST_WIDE_INT addend_lo, addend_hi;
844 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
845 addend_hi = addend - addend_lo;
847 switch (tls_kind)
849 case TLS_MODEL_GLOBAL_DYNAMIC:
850 start_sequence ();
852 tga_op1 = gen_reg_rtx (Pmode);
853 emit_insn (gen_load_dtpmod (tga_op1, op1));
855 tga_op2 = gen_reg_rtx (Pmode);
856 emit_insn (gen_load_dtprel (tga_op2, op1));
858 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
859 LCT_CONST, Pmode, 2, tga_op1,
860 Pmode, tga_op2, Pmode);
862 insns = get_insns ();
863 end_sequence ();
865 if (GET_MODE (op0) != Pmode)
866 op0 = tga_ret;
867 emit_libcall_block (insns, op0, tga_ret, op1);
868 break;
870 case TLS_MODEL_LOCAL_DYNAMIC:
871 /* ??? This isn't the completely proper way to do local-dynamic
872 If the call to __tls_get_addr is used only by a single symbol,
873 then we should (somehow) move the dtprel to the second arg
874 to avoid the extra add. */
875 start_sequence ();
877 tga_op1 = gen_reg_rtx (Pmode);
878 emit_insn (gen_load_dtpmod (tga_op1, op1));
880 tga_op2 = const0_rtx;
882 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
883 LCT_CONST, Pmode, 2, tga_op1,
884 Pmode, tga_op2, Pmode);
886 insns = get_insns ();
887 end_sequence ();
889 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
890 UNSPEC_LD_BASE);
891 tmp = gen_reg_rtx (Pmode);
892 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
894 if (!register_operand (op0, Pmode))
895 op0 = gen_reg_rtx (Pmode);
896 if (TARGET_TLS64)
898 emit_insn (gen_load_dtprel (op0, op1));
899 emit_insn (gen_adddi3 (op0, tmp, op0));
901 else
902 emit_insn (gen_add_dtprel (op0, op1, tmp));
903 break;
905 case TLS_MODEL_INITIAL_EXEC:
906 op1 = plus_constant (op1, addend_hi);
907 addend = addend_lo;
909 tmp = gen_reg_rtx (Pmode);
910 emit_insn (gen_load_tprel (tmp, op1));
912 if (!register_operand (op0, Pmode))
913 op0 = gen_reg_rtx (Pmode);
914 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
915 break;
917 case TLS_MODEL_LOCAL_EXEC:
918 if (!register_operand (op0, Pmode))
919 op0 = gen_reg_rtx (Pmode);
921 op1 = orig_op1;
922 addend = 0;
923 if (TARGET_TLS64)
925 emit_insn (gen_load_tprel (op0, op1));
926 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
928 else
929 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
930 break;
932 default:
933 gcc_unreachable ();
936 if (addend)
937 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
938 orig_op0, 1, OPTAB_DIRECT);
939 if (orig_op0 == op0)
940 return NULL_RTX;
941 if (GET_MODE (orig_op0) == Pmode)
942 return op0;
943 return gen_lowpart (GET_MODE (orig_op0), op0);
947 ia64_expand_move (rtx op0, rtx op1)
949 enum machine_mode mode = GET_MODE (op0);
951 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
952 op1 = force_reg (mode, op1);
954 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
956 HOST_WIDE_INT addend = 0;
957 enum tls_model tls_kind;
958 rtx sym = op1;
960 if (GET_CODE (op1) == CONST
961 && GET_CODE (XEXP (op1, 0)) == PLUS
962 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
964 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
965 sym = XEXP (XEXP (op1, 0), 0);
968 tls_kind = tls_symbolic_operand_type (sym);
969 if (tls_kind)
970 return ia64_expand_tls_address (tls_kind, op0, sym, addend);
972 if (any_offset_symbol_operand (sym, mode))
973 addend = 0;
974 else if (aligned_offset_symbol_operand (sym, mode))
976 HOST_WIDE_INT addend_lo, addend_hi;
978 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
979 addend_hi = addend - addend_lo;
981 if (addend_lo != 0)
983 op1 = plus_constant (sym, addend_hi);
984 addend = addend_lo;
986 else
987 addend = 0;
989 else
990 op1 = sym;
992 if (reload_completed)
994 /* We really should have taken care of this offset earlier. */
995 gcc_assert (addend == 0);
996 if (ia64_expand_load_address (op0, op1))
997 return NULL_RTX;
1000 if (addend)
1002 rtx subtarget = no_new_pseudos ? op0 : gen_reg_rtx (mode);
1004 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1006 op1 = expand_simple_binop (mode, PLUS, subtarget,
1007 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1008 if (op0 == op1)
1009 return NULL_RTX;
1013 return op1;
1016 /* Split a move from OP1 to OP0 conditional on COND. */
1018 void
1019 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1021 rtx insn, first = get_last_insn ();
1023 emit_move_insn (op0, op1);
1025 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1026 if (INSN_P (insn))
1027 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1028 PATTERN (insn));
1031 /* Split a post-reload TImode or TFmode reference into two DImode
1032 components. This is made extra difficult by the fact that we do
1033 not get any scratch registers to work with, because reload cannot
1034 be prevented from giving us a scratch that overlaps the register
1035 pair involved. So instead, when addressing memory, we tweak the
1036 pointer register up and back down with POST_INCs. Or up and not
1037 back down when we can get away with it.
1039 REVERSED is true when the loads must be done in reversed order
1040 (high word first) for correctness. DEAD is true when the pointer
1041 dies with the second insn we generate and therefore the second
1042 address must not carry a postmodify.
1044 May return an insn which is to be emitted after the moves. */
1046 static rtx
1047 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1049 rtx fixup = 0;
1051 switch (GET_CODE (in))
1053 case REG:
1054 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1055 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1056 break;
1058 case CONST_INT:
1059 case CONST_DOUBLE:
1060 /* Cannot occur reversed. */
1061 gcc_assert (!reversed);
1063 if (GET_MODE (in) != TFmode)
1064 split_double (in, &out[0], &out[1]);
1065 else
1066 /* split_double does not understand how to split a TFmode
1067 quantity into a pair of DImode constants. */
1069 REAL_VALUE_TYPE r;
1070 unsigned HOST_WIDE_INT p[2];
1071 long l[4]; /* TFmode is 128 bits */
1073 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1074 real_to_target (l, &r, TFmode);
1076 if (FLOAT_WORDS_BIG_ENDIAN)
1078 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1079 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1081 else
1083 p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1084 p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1086 out[0] = GEN_INT (p[0]);
1087 out[1] = GEN_INT (p[1]);
1089 break;
1091 case MEM:
1093 rtx base = XEXP (in, 0);
1094 rtx offset;
1096 switch (GET_CODE (base))
1098 case REG:
1099 if (!reversed)
1101 out[0] = adjust_automodify_address
1102 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1103 out[1] = adjust_automodify_address
1104 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1106 else
1108 /* Reversal requires a pre-increment, which can only
1109 be done as a separate insn. */
1110 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1111 out[0] = adjust_automodify_address
1112 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1113 out[1] = adjust_address (in, DImode, 0);
1115 break;
1117 case POST_INC:
1118 gcc_assert (!reversed && !dead);
1120 /* Just do the increment in two steps. */
1121 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1122 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1123 break;
1125 case POST_DEC:
1126 gcc_assert (!reversed && !dead);
1128 /* Add 8, subtract 24. */
1129 base = XEXP (base, 0);
1130 out[0] = adjust_automodify_address
1131 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1132 out[1] = adjust_automodify_address
1133 (in, DImode,
1134 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1136 break;
1138 case POST_MODIFY:
1139 gcc_assert (!reversed && !dead);
1141 /* Extract and adjust the modification. This case is
1142 trickier than the others, because we might have an
1143 index register, or we might have a combined offset that
1144 doesn't fit a signed 9-bit displacement field. We can
1145 assume the incoming expression is already legitimate. */
1146 offset = XEXP (base, 1);
1147 base = XEXP (base, 0);
1149 out[0] = adjust_automodify_address
1150 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1152 if (GET_CODE (XEXP (offset, 1)) == REG)
1154 /* Can't adjust the postmodify to match. Emit the
1155 original, then a separate addition insn. */
1156 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1157 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1159 else
1161 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1162 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1164 /* Again the postmodify cannot be made to match,
1165 but in this case it's more efficient to get rid
1166 of the postmodify entirely and fix up with an
1167 add insn. */
1168 out[1] = adjust_automodify_address (in, DImode, base, 8);
1169 fixup = gen_adddi3
1170 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1172 else
1174 /* Combined offset still fits in the displacement field.
1175 (We cannot overflow it at the high end.) */
1176 out[1] = adjust_automodify_address
1177 (in, DImode, gen_rtx_POST_MODIFY
1178 (Pmode, base, gen_rtx_PLUS
1179 (Pmode, base,
1180 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1184 break;
1186 default:
1187 gcc_unreachable ();
1189 break;
1192 default:
1193 gcc_unreachable ();
1196 return fixup;
1199 /* Split a TImode or TFmode move instruction after reload.
1200 This is used by *movtf_internal and *movti_internal. */
1201 void
1202 ia64_split_tmode_move (rtx operands[])
1204 rtx in[2], out[2], insn;
1205 rtx fixup[2];
1206 bool dead = false;
1207 bool reversed = false;
1209 /* It is possible for reload to decide to overwrite a pointer with
1210 the value it points to. In that case we have to do the loads in
1211 the appropriate order so that the pointer is not destroyed too
1212 early. Also we must not generate a postmodify for that second
1213 load, or rws_access_regno will die. */
1214 if (GET_CODE (operands[1]) == MEM
1215 && reg_overlap_mentioned_p (operands[0], operands[1]))
1217 rtx base = XEXP (operands[1], 0);
1218 while (GET_CODE (base) != REG)
1219 base = XEXP (base, 0);
1221 if (REGNO (base) == REGNO (operands[0]))
1222 reversed = true;
1223 dead = true;
1225 /* Another reason to do the moves in reversed order is if the first
1226 element of the target register pair is also the second element of
1227 the source register pair. */
1228 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1229 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1230 reversed = true;
1232 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1233 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1235 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1236 if (GET_CODE (EXP) == MEM \
1237 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1238 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1239 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1240 REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
1241 XEXP (XEXP (EXP, 0), 0), \
1242 REG_NOTES (INSN))
1244 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1245 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1246 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1248 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1249 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1250 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1252 if (fixup[0])
1253 emit_insn (fixup[0]);
1254 if (fixup[1])
1255 emit_insn (fixup[1]);
1257 #undef MAYBE_ADD_REG_INC_NOTE
1260 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1261 through memory plus an extra GR scratch register. Except that you can
1262 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1263 SECONDARY_RELOAD_CLASS, but not both.
1265 We got into problems in the first place by allowing a construct like
1266 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1267 This solution attempts to prevent this situation from occurring. When
1268 we see something like the above, we spill the inner register to memory. */
1271 spill_xfmode_operand (rtx in, int force)
1273 if (GET_CODE (in) == SUBREG
1274 && GET_MODE (SUBREG_REG (in)) == TImode
1275 && GET_CODE (SUBREG_REG (in)) == REG)
1277 rtx memt = assign_stack_temp (TImode, 16, 0);
1278 emit_move_insn (memt, SUBREG_REG (in));
1279 return adjust_address (memt, XFmode, 0);
1281 else if (force && GET_CODE (in) == REG)
1283 rtx memx = assign_stack_temp (XFmode, 16, 0);
1284 emit_move_insn (memx, in);
1285 return memx;
1287 else
1288 return in;
1291 /* Emit comparison instruction if necessary, returning the expression
1292 that holds the compare result in the proper mode. */
1294 static GTY(()) rtx cmptf_libfunc;
1297 ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1299 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1300 rtx cmp;
1302 /* If we have a BImode input, then we already have a compare result, and
1303 do not need to emit another comparison. */
1304 if (GET_MODE (op0) == BImode)
1306 gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx);
1307 cmp = op0;
1309 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1310 magic number as its third argument, that indicates what to do.
1311 The return value is an integer to be compared against zero. */
1312 else if (GET_MODE (op0) == TFmode)
1314 enum qfcmp_magic {
1315 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1316 QCMP_UNORD = 2,
1317 QCMP_EQ = 4,
1318 QCMP_LT = 8,
1319 QCMP_GT = 16
1320 } magic;
1321 enum rtx_code ncode;
1322 rtx ret, insns;
1324 gcc_assert (cmptf_libfunc && GET_MODE (op1) == TFmode);
1325 switch (code)
1327 /* 1 = equal, 0 = not equal. Equality operators do
1328 not raise FP_INVALID when given an SNaN operand. */
1329 case EQ: magic = QCMP_EQ; ncode = NE; break;
1330 case NE: magic = QCMP_EQ; ncode = EQ; break;
1331 /* isunordered() from C99. */
1332 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1333 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1334 /* Relational operators raise FP_INVALID when given
1335 an SNaN operand. */
1336 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1337 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1338 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1339 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1340 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1341 Expanders for buneq etc. weuld have to be added to ia64.md
1342 for this to be useful. */
1343 default: gcc_unreachable ();
1346 start_sequence ();
1348 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1349 op0, TFmode, op1, TFmode,
1350 GEN_INT (magic), DImode);
1351 cmp = gen_reg_rtx (BImode);
1352 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1353 gen_rtx_fmt_ee (ncode, BImode,
1354 ret, const0_rtx)));
1356 insns = get_insns ();
1357 end_sequence ();
1359 emit_libcall_block (insns, cmp, cmp,
1360 gen_rtx_fmt_ee (code, BImode, op0, op1));
1361 code = NE;
1363 else
1365 cmp = gen_reg_rtx (BImode);
1366 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1367 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1368 code = NE;
1371 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1374 /* Generate an integral vector comparison. */
1376 static bool
1377 ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1378 rtx dest, rtx op0, rtx op1)
1380 bool negate = false;
1381 rtx x;
1383 switch (code)
1385 case EQ:
1386 case GT:
1387 break;
1389 case NE:
1390 code = EQ;
1391 negate = true;
1392 break;
1394 case LE:
1395 code = GT;
1396 negate = true;
1397 break;
1399 case GE:
1400 negate = true;
1401 /* FALLTHRU */
1403 case LT:
1404 x = op0;
1405 op0 = op1;
1406 op1 = x;
1407 code = GT;
1408 break;
1410 case GTU:
1411 case GEU:
1412 case LTU:
1413 case LEU:
1415 rtx w0h, w0l, w1h, w1l, ch, cl;
1416 enum machine_mode wmode;
1417 rtx (*unpack_l) (rtx, rtx, rtx);
1418 rtx (*unpack_h) (rtx, rtx, rtx);
1419 rtx (*pack) (rtx, rtx, rtx);
1421 /* We don't have native unsigned comparisons, but we can generate
1422 them better than generic code can. */
1424 gcc_assert (mode != V2SImode);
1425 switch (mode)
1427 case V8QImode:
1428 wmode = V4HImode;
1429 pack = gen_pack2_sss;
1430 unpack_l = gen_unpack1_l;
1431 unpack_h = gen_unpack1_h;
1432 break;
1434 case V4HImode:
1435 wmode = V2SImode;
1436 pack = gen_pack4_sss;
1437 unpack_l = gen_unpack2_l;
1438 unpack_h = gen_unpack2_h;
1439 break;
1441 default:
1442 gcc_unreachable ();
1445 /* Unpack into wider vectors, zero extending the elements. */
1447 w0l = gen_reg_rtx (wmode);
1448 w0h = gen_reg_rtx (wmode);
1449 w1l = gen_reg_rtx (wmode);
1450 w1h = gen_reg_rtx (wmode);
1451 emit_insn (unpack_l (gen_lowpart (mode, w0l), op0, CONST0_RTX (mode)));
1452 emit_insn (unpack_h (gen_lowpart (mode, w0h), op0, CONST0_RTX (mode)));
1453 emit_insn (unpack_l (gen_lowpart (mode, w1l), op1, CONST0_RTX (mode)));
1454 emit_insn (unpack_h (gen_lowpart (mode, w1h), op1, CONST0_RTX (mode)));
1456 /* Compare in the wider mode. */
1458 cl = gen_reg_rtx (wmode);
1459 ch = gen_reg_rtx (wmode);
1460 code = signed_condition (code);
1461 ia64_expand_vecint_compare (code, wmode, cl, w0l, w1l);
1462 negate = ia64_expand_vecint_compare (code, wmode, ch, w0h, w1h);
1464 /* Repack into a single narrower vector. */
1466 emit_insn (pack (dest, cl, ch));
1468 return negate;
1470 default:
1471 gcc_unreachable ();
1474 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1475 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1477 return negate;
1480 static void
1481 ia64_expand_vcondu_v2si (enum rtx_code code, rtx operands[])
1483 rtx dl, dh, bl, bh, op1l, op1h, op2l, op2h, op4l, op4h, op5l, op5h, x;
1485 /* In this case, we extract the two SImode quantities and generate
1486 normal comparisons for each of them. */
1488 op1l = gen_lowpart (SImode, operands[1]);
1489 op2l = gen_lowpart (SImode, operands[2]);
1490 op4l = gen_lowpart (SImode, operands[4]);
1491 op5l = gen_lowpart (SImode, operands[5]);
1493 op1h = gen_reg_rtx (SImode);
1494 op2h = gen_reg_rtx (SImode);
1495 op4h = gen_reg_rtx (SImode);
1496 op5h = gen_reg_rtx (SImode);
1498 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op1h),
1499 gen_lowpart (DImode, operands[1]), GEN_INT (32)));
1500 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op2h),
1501 gen_lowpart (DImode, operands[2]), GEN_INT (32)));
1502 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op4h),
1503 gen_lowpart (DImode, operands[4]), GEN_INT (32)));
1504 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op5h),
1505 gen_lowpart (DImode, operands[5]), GEN_INT (32)));
1507 bl = gen_reg_rtx (BImode);
1508 x = gen_rtx_fmt_ee (code, BImode, op4l, op5l);
1509 emit_insn (gen_rtx_SET (VOIDmode, bl, x));
1511 bh = gen_reg_rtx (BImode);
1512 x = gen_rtx_fmt_ee (code, BImode, op4h, op5h);
1513 emit_insn (gen_rtx_SET (VOIDmode, bh, x));
1515 /* With the results of the comparisons, emit conditional moves. */
1517 dl = gen_reg_rtx (SImode);
1518 x = gen_rtx_IF_THEN_ELSE (SImode, bl, op1l, op2l);
1519 emit_insn (gen_rtx_SET (VOIDmode, dl, x));
1521 dh = gen_reg_rtx (SImode);
1522 x = gen_rtx_IF_THEN_ELSE (SImode, bh, op1h, op2h);
1523 emit_insn (gen_rtx_SET (VOIDmode, dh, x));
1525 /* Merge the two partial results back into a vector. */
1527 x = gen_rtx_VEC_CONCAT (V2SImode, dl, dh);
1528 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1531 /* Emit an integral vector conditional move. */
1533 void
1534 ia64_expand_vecint_cmov (rtx operands[])
1536 enum machine_mode mode = GET_MODE (operands[0]);
1537 enum rtx_code code = GET_CODE (operands[3]);
1538 bool negate;
1539 rtx cmp, x, ot, of;
1541 /* Since we don't have unsigned V2SImode comparisons, it's more efficient
1542 to special-case them entirely. */
1543 if (mode == V2SImode
1544 && (code == GTU || code == GEU || code == LEU || code == LTU))
1546 ia64_expand_vcondu_v2si (code, operands);
1547 return;
1550 cmp = gen_reg_rtx (mode);
1551 negate = ia64_expand_vecint_compare (code, mode, cmp,
1552 operands[4], operands[5]);
1554 ot = operands[1+negate];
1555 of = operands[2-negate];
1557 if (ot == CONST0_RTX (mode))
1559 if (of == CONST0_RTX (mode))
1561 emit_move_insn (operands[0], ot);
1562 return;
1565 x = gen_rtx_NOT (mode, cmp);
1566 x = gen_rtx_AND (mode, x, of);
1567 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1569 else if (of == CONST0_RTX (mode))
1571 x = gen_rtx_AND (mode, cmp, ot);
1572 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1574 else
1576 rtx t, f;
1578 t = gen_reg_rtx (mode);
1579 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1580 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1582 f = gen_reg_rtx (mode);
1583 x = gen_rtx_NOT (mode, cmp);
1584 x = gen_rtx_AND (mode, x, operands[2-negate]);
1585 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1587 x = gen_rtx_IOR (mode, t, f);
1588 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1592 /* Emit an integral vector min or max operation. Return true if all done. */
1594 bool
1595 ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1596 rtx operands[])
1598 rtx xops[5];
1600 /* These four combinations are supported directly. */
1601 if (mode == V8QImode && (code == UMIN || code == UMAX))
1602 return false;
1603 if (mode == V4HImode && (code == SMIN || code == SMAX))
1604 return false;
1606 /* Everything else implemented via vector comparisons. */
1607 xops[0] = operands[0];
1608 xops[4] = xops[1] = operands[1];
1609 xops[5] = xops[2] = operands[2];
1611 switch (code)
1613 case UMIN:
1614 code = LTU;
1615 break;
1616 case UMAX:
1617 code = GTU;
1618 break;
1619 case SMIN:
1620 code = LT;
1621 break;
1622 case SMAX:
1623 code = GT;
1624 break;
1625 default:
1626 gcc_unreachable ();
1628 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1630 ia64_expand_vecint_cmov (xops);
1631 return true;
1634 /* Emit the appropriate sequence for a call. */
1636 void
1637 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1638 int sibcall_p)
1640 rtx insn, b0;
1642 addr = XEXP (addr, 0);
1643 addr = convert_memory_address (DImode, addr);
1644 b0 = gen_rtx_REG (DImode, R_BR (0));
1646 /* ??? Should do this for functions known to bind local too. */
1647 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1649 if (sibcall_p)
1650 insn = gen_sibcall_nogp (addr);
1651 else if (! retval)
1652 insn = gen_call_nogp (addr, b0);
1653 else
1654 insn = gen_call_value_nogp (retval, addr, b0);
1655 insn = emit_call_insn (insn);
1657 else
1659 if (sibcall_p)
1660 insn = gen_sibcall_gp (addr);
1661 else if (! retval)
1662 insn = gen_call_gp (addr, b0);
1663 else
1664 insn = gen_call_value_gp (retval, addr, b0);
1665 insn = emit_call_insn (insn);
1667 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1670 if (sibcall_p)
1671 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1674 void
1675 ia64_reload_gp (void)
1677 rtx tmp;
1679 if (current_frame_info.reg_save_gp)
1680 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1681 else
1683 HOST_WIDE_INT offset;
1685 offset = (current_frame_info.spill_cfa_off
1686 + current_frame_info.spill_size);
1687 if (frame_pointer_needed)
1689 tmp = hard_frame_pointer_rtx;
1690 offset = -offset;
1692 else
1694 tmp = stack_pointer_rtx;
1695 offset = current_frame_info.total_size - offset;
1698 if (CONST_OK_FOR_I (offset))
1699 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1700 tmp, GEN_INT (offset)));
1701 else
1703 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1704 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1705 pic_offset_table_rtx, tmp));
1708 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1711 emit_move_insn (pic_offset_table_rtx, tmp);
1714 void
1715 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1716 rtx scratch_b, int noreturn_p, int sibcall_p)
1718 rtx insn;
1719 bool is_desc = false;
1721 /* If we find we're calling through a register, then we're actually
1722 calling through a descriptor, so load up the values. */
1723 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1725 rtx tmp;
1726 bool addr_dead_p;
1728 /* ??? We are currently constrained to *not* use peep2, because
1729 we can legitimately change the global lifetime of the GP
1730 (in the form of killing where previously live). This is
1731 because a call through a descriptor doesn't use the previous
1732 value of the GP, while a direct call does, and we do not
1733 commit to either form until the split here.
1735 That said, this means that we lack precise life info for
1736 whether ADDR is dead after this call. This is not terribly
1737 important, since we can fix things up essentially for free
1738 with the POST_DEC below, but it's nice to not use it when we
1739 can immediately tell it's not necessary. */
1740 addr_dead_p = ((noreturn_p || sibcall_p
1741 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1742 REGNO (addr)))
1743 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1745 /* Load the code address into scratch_b. */
1746 tmp = gen_rtx_POST_INC (Pmode, addr);
1747 tmp = gen_rtx_MEM (Pmode, tmp);
1748 emit_move_insn (scratch_r, tmp);
1749 emit_move_insn (scratch_b, scratch_r);
1751 /* Load the GP address. If ADDR is not dead here, then we must
1752 revert the change made above via the POST_INCREMENT. */
1753 if (!addr_dead_p)
1754 tmp = gen_rtx_POST_DEC (Pmode, addr);
1755 else
1756 tmp = addr;
1757 tmp = gen_rtx_MEM (Pmode, tmp);
1758 emit_move_insn (pic_offset_table_rtx, tmp);
1760 is_desc = true;
1761 addr = scratch_b;
1764 if (sibcall_p)
1765 insn = gen_sibcall_nogp (addr);
1766 else if (retval)
1767 insn = gen_call_value_nogp (retval, addr, retaddr);
1768 else
1769 insn = gen_call_nogp (addr, retaddr);
1770 emit_call_insn (insn);
1772 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1773 ia64_reload_gp ();
1776 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
1778 This differs from the generic code in that we know about the zero-extending
1779 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
1780 also know that ld.acq+cmpxchg.rel equals a full barrier.
1782 The loop we want to generate looks like
1784 cmp_reg = mem;
1785 label:
1786 old_reg = cmp_reg;
1787 new_reg = cmp_reg op val;
1788 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
1789 if (cmp_reg != old_reg)
1790 goto label;
1792 Note that we only do the plain load from memory once. Subsequent
1793 iterations use the value loaded by the compare-and-swap pattern. */
1795 void
1796 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
1797 rtx old_dst, rtx new_dst)
1799 enum machine_mode mode = GET_MODE (mem);
1800 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
1801 enum insn_code icode;
1803 /* Special case for using fetchadd. */
1804 if ((mode == SImode || mode == DImode) && fetchadd_operand (val, mode))
1806 if (!old_dst)
1807 old_dst = gen_reg_rtx (mode);
1809 emit_insn (gen_memory_barrier ());
1811 if (mode == SImode)
1812 icode = CODE_FOR_fetchadd_acq_si;
1813 else
1814 icode = CODE_FOR_fetchadd_acq_di;
1815 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
1817 if (new_dst)
1819 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
1820 true, OPTAB_WIDEN);
1821 if (new_reg != new_dst)
1822 emit_move_insn (new_dst, new_reg);
1824 return;
1827 /* Because of the volatile mem read, we get an ld.acq, which is the
1828 front half of the full barrier. The end half is the cmpxchg.rel. */
1829 gcc_assert (MEM_VOLATILE_P (mem));
1831 old_reg = gen_reg_rtx (DImode);
1832 cmp_reg = gen_reg_rtx (DImode);
1833 label = gen_label_rtx ();
1835 if (mode != DImode)
1837 val = simplify_gen_subreg (DImode, val, mode, 0);
1838 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
1840 else
1841 emit_move_insn (cmp_reg, mem);
1843 emit_label (label);
1845 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
1846 emit_move_insn (old_reg, cmp_reg);
1847 emit_move_insn (ar_ccv, cmp_reg);
1849 if (old_dst)
1850 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
1852 new_reg = cmp_reg;
1853 if (code == NOT)
1855 new_reg = expand_simple_unop (DImode, NOT, new_reg, NULL_RTX, true);
1856 code = AND;
1858 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
1859 true, OPTAB_DIRECT);
1861 if (mode != DImode)
1862 new_reg = gen_lowpart (mode, new_reg);
1863 if (new_dst)
1864 emit_move_insn (new_dst, new_reg);
1866 switch (mode)
1868 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
1869 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
1870 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
1871 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
1872 default:
1873 gcc_unreachable ();
1876 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
1878 emit_cmp_and_jump_insns (cmp_reg, old_reg, EQ, NULL, DImode, true, label);
1881 /* Begin the assembly file. */
1883 static void
1884 ia64_file_start (void)
1886 /* Variable tracking should be run after all optimizations which change order
1887 of insns. It also needs a valid CFG. This can't be done in
1888 ia64_override_options, because flag_var_tracking is finalized after
1889 that. */
1890 ia64_flag_var_tracking = flag_var_tracking;
1891 flag_var_tracking = 0;
1893 default_file_start ();
1894 emit_safe_across_calls ();
1897 void
1898 emit_safe_across_calls (void)
1900 unsigned int rs, re;
1901 int out_state;
1903 rs = 1;
1904 out_state = 0;
1905 while (1)
1907 while (rs < 64 && call_used_regs[PR_REG (rs)])
1908 rs++;
1909 if (rs >= 64)
1910 break;
1911 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1912 continue;
1913 if (out_state == 0)
1915 fputs ("\t.pred.safe_across_calls ", asm_out_file);
1916 out_state = 1;
1918 else
1919 fputc (',', asm_out_file);
1920 if (re == rs + 1)
1921 fprintf (asm_out_file, "p%u", rs);
1922 else
1923 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
1924 rs = re + 1;
1926 if (out_state)
1927 fputc ('\n', asm_out_file);
1930 /* Helper function for ia64_compute_frame_size: find an appropriate general
1931 register to spill some special register to. SPECIAL_SPILL_MASK contains
1932 bits in GR0 to GR31 that have already been allocated by this routine.
1933 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1935 static int
1936 find_gr_spill (int try_locals)
1938 int regno;
1940 /* If this is a leaf function, first try an otherwise unused
1941 call-clobbered register. */
1942 if (current_function_is_leaf)
1944 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1945 if (! regs_ever_live[regno]
1946 && call_used_regs[regno]
1947 && ! fixed_regs[regno]
1948 && ! global_regs[regno]
1949 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1951 current_frame_info.gr_used_mask |= 1 << regno;
1952 return regno;
1956 if (try_locals)
1958 regno = current_frame_info.n_local_regs;
1959 /* If there is a frame pointer, then we can't use loc79, because
1960 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1961 reg_name switching code in ia64_expand_prologue. */
1962 if (regno < (80 - frame_pointer_needed))
1964 current_frame_info.n_local_regs = regno + 1;
1965 return LOC_REG (0) + regno;
1969 /* Failed to find a general register to spill to. Must use stack. */
1970 return 0;
1973 /* In order to make for nice schedules, we try to allocate every temporary
1974 to a different register. We must of course stay away from call-saved,
1975 fixed, and global registers. We must also stay away from registers
1976 allocated in current_frame_info.gr_used_mask, since those include regs
1977 used all through the prologue.
1979 Any register allocated here must be used immediately. The idea is to
1980 aid scheduling, not to solve data flow problems. */
1982 static int last_scratch_gr_reg;
1984 static int
1985 next_scratch_gr_reg (void)
1987 int i, regno;
1989 for (i = 0; i < 32; ++i)
1991 regno = (last_scratch_gr_reg + i + 1) & 31;
1992 if (call_used_regs[regno]
1993 && ! fixed_regs[regno]
1994 && ! global_regs[regno]
1995 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1997 last_scratch_gr_reg = regno;
1998 return regno;
2002 /* There must be _something_ available. */
2003 gcc_unreachable ();
2006 /* Helper function for ia64_compute_frame_size, called through
2007 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2009 static void
2010 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2012 unsigned int regno = REGNO (reg);
2013 if (regno < 32)
2015 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
2016 for (i = 0; i < n; ++i)
2017 current_frame_info.gr_used_mask |= 1 << (regno + i);
2021 /* Returns the number of bytes offset between the frame pointer and the stack
2022 pointer for the current function. SIZE is the number of bytes of space
2023 needed for local variables. */
2025 static void
2026 ia64_compute_frame_size (HOST_WIDE_INT size)
2028 HOST_WIDE_INT total_size;
2029 HOST_WIDE_INT spill_size = 0;
2030 HOST_WIDE_INT extra_spill_size = 0;
2031 HOST_WIDE_INT pretend_args_size;
2032 HARD_REG_SET mask;
2033 int n_spilled = 0;
2034 int spilled_gr_p = 0;
2035 int spilled_fr_p = 0;
2036 unsigned int regno;
2037 int i;
2039 if (current_frame_info.initialized)
2040 return;
2042 memset (&current_frame_info, 0, sizeof current_frame_info);
2043 CLEAR_HARD_REG_SET (mask);
2045 /* Don't allocate scratches to the return register. */
2046 diddle_return_value (mark_reg_gr_used_mask, NULL);
2048 /* Don't allocate scratches to the EH scratch registers. */
2049 if (cfun->machine->ia64_eh_epilogue_sp)
2050 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2051 if (cfun->machine->ia64_eh_epilogue_bsp)
2052 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2054 /* Find the size of the register stack frame. We have only 80 local
2055 registers, because we reserve 8 for the inputs and 8 for the
2056 outputs. */
2058 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2059 since we'll be adjusting that down later. */
2060 regno = LOC_REG (78) + ! frame_pointer_needed;
2061 for (; regno >= LOC_REG (0); regno--)
2062 if (regs_ever_live[regno])
2063 break;
2064 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2066 /* For functions marked with the syscall_linkage attribute, we must mark
2067 all eight input registers as in use, so that locals aren't visible to
2068 the caller. */
2070 if (cfun->machine->n_varargs > 0
2071 || lookup_attribute ("syscall_linkage",
2072 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2073 current_frame_info.n_input_regs = 8;
2074 else
2076 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2077 if (regs_ever_live[regno])
2078 break;
2079 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2082 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2083 if (regs_ever_live[regno])
2084 break;
2085 i = regno - OUT_REG (0) + 1;
2087 /* When -p profiling, we need one output register for the mcount argument.
2088 Likewise for -a profiling for the bb_init_func argument. For -ax
2089 profiling, we need two output registers for the two bb_init_trace_func
2090 arguments. */
2091 if (current_function_profile)
2092 i = MAX (i, 1);
2093 current_frame_info.n_output_regs = i;
2095 /* ??? No rotating register support yet. */
2096 current_frame_info.n_rotate_regs = 0;
2098 /* Discover which registers need spilling, and how much room that
2099 will take. Begin with floating point and general registers,
2100 which will always wind up on the stack. */
2102 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2103 if (regs_ever_live[regno] && ! call_used_regs[regno])
2105 SET_HARD_REG_BIT (mask, regno);
2106 spill_size += 16;
2107 n_spilled += 1;
2108 spilled_fr_p = 1;
2111 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2112 if (regs_ever_live[regno] && ! call_used_regs[regno])
2114 SET_HARD_REG_BIT (mask, regno);
2115 spill_size += 8;
2116 n_spilled += 1;
2117 spilled_gr_p = 1;
2120 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2121 if (regs_ever_live[regno] && ! call_used_regs[regno])
2123 SET_HARD_REG_BIT (mask, regno);
2124 spill_size += 8;
2125 n_spilled += 1;
2128 /* Now come all special registers that might get saved in other
2129 general registers. */
2131 if (frame_pointer_needed)
2133 current_frame_info.reg_fp = find_gr_spill (1);
2134 /* If we did not get a register, then we take LOC79. This is guaranteed
2135 to be free, even if regs_ever_live is already set, because this is
2136 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2137 as we don't count loc79 above. */
2138 if (current_frame_info.reg_fp == 0)
2140 current_frame_info.reg_fp = LOC_REG (79);
2141 current_frame_info.n_local_regs++;
2145 if (! current_function_is_leaf)
2147 /* Emit a save of BR0 if we call other functions. Do this even
2148 if this function doesn't return, as EH depends on this to be
2149 able to unwind the stack. */
2150 SET_HARD_REG_BIT (mask, BR_REG (0));
2152 current_frame_info.reg_save_b0 = find_gr_spill (1);
2153 if (current_frame_info.reg_save_b0 == 0)
2155 spill_size += 8;
2156 n_spilled += 1;
2159 /* Similarly for ar.pfs. */
2160 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2161 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2162 if (current_frame_info.reg_save_ar_pfs == 0)
2164 extra_spill_size += 8;
2165 n_spilled += 1;
2168 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2169 registers are clobbered, so we fall back to the stack. */
2170 current_frame_info.reg_save_gp
2171 = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
2172 if (current_frame_info.reg_save_gp == 0)
2174 SET_HARD_REG_BIT (mask, GR_REG (1));
2175 spill_size += 8;
2176 n_spilled += 1;
2179 else
2181 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
2183 SET_HARD_REG_BIT (mask, BR_REG (0));
2184 spill_size += 8;
2185 n_spilled += 1;
2188 if (regs_ever_live[AR_PFS_REGNUM])
2190 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2191 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2192 if (current_frame_info.reg_save_ar_pfs == 0)
2194 extra_spill_size += 8;
2195 n_spilled += 1;
2200 /* Unwind descriptor hackery: things are most efficient if we allocate
2201 consecutive GR save registers for RP, PFS, FP in that order. However,
2202 it is absolutely critical that FP get the only hard register that's
2203 guaranteed to be free, so we allocated it first. If all three did
2204 happen to be allocated hard regs, and are consecutive, rearrange them
2205 into the preferred order now. */
2206 if (current_frame_info.reg_fp != 0
2207 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
2208 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
2210 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
2211 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
2212 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
2215 /* See if we need to store the predicate register block. */
2216 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2217 if (regs_ever_live[regno] && ! call_used_regs[regno])
2218 break;
2219 if (regno <= PR_REG (63))
2221 SET_HARD_REG_BIT (mask, PR_REG (0));
2222 current_frame_info.reg_save_pr = find_gr_spill (1);
2223 if (current_frame_info.reg_save_pr == 0)
2225 extra_spill_size += 8;
2226 n_spilled += 1;
2229 /* ??? Mark them all as used so that register renaming and such
2230 are free to use them. */
2231 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2232 regs_ever_live[regno] = 1;
2235 /* If we're forced to use st8.spill, we're forced to save and restore
2236 ar.unat as well. The check for existing liveness allows inline asm
2237 to touch ar.unat. */
2238 if (spilled_gr_p || cfun->machine->n_varargs
2239 || regs_ever_live[AR_UNAT_REGNUM])
2241 regs_ever_live[AR_UNAT_REGNUM] = 1;
2242 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2243 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
2244 if (current_frame_info.reg_save_ar_unat == 0)
2246 extra_spill_size += 8;
2247 n_spilled += 1;
2251 if (regs_ever_live[AR_LC_REGNUM])
2253 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2254 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
2255 if (current_frame_info.reg_save_ar_lc == 0)
2257 extra_spill_size += 8;
2258 n_spilled += 1;
2262 /* If we have an odd number of words of pretend arguments written to
2263 the stack, then the FR save area will be unaligned. We round the
2264 size of this area up to keep things 16 byte aligned. */
2265 if (spilled_fr_p)
2266 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2267 else
2268 pretend_args_size = current_function_pretend_args_size;
2270 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2271 + current_function_outgoing_args_size);
2272 total_size = IA64_STACK_ALIGN (total_size);
2274 /* We always use the 16-byte scratch area provided by the caller, but
2275 if we are a leaf function, there's no one to which we need to provide
2276 a scratch area. */
2277 if (current_function_is_leaf)
2278 total_size = MAX (0, total_size - 16);
2280 current_frame_info.total_size = total_size;
2281 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2282 current_frame_info.spill_size = spill_size;
2283 current_frame_info.extra_spill_size = extra_spill_size;
2284 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2285 current_frame_info.n_spilled = n_spilled;
2286 current_frame_info.initialized = reload_completed;
2289 /* Compute the initial difference between the specified pair of registers. */
2291 HOST_WIDE_INT
2292 ia64_initial_elimination_offset (int from, int to)
2294 HOST_WIDE_INT offset;
2296 ia64_compute_frame_size (get_frame_size ());
2297 switch (from)
2299 case FRAME_POINTER_REGNUM:
2300 switch (to)
2302 case HARD_FRAME_POINTER_REGNUM:
2303 if (current_function_is_leaf)
2304 offset = -current_frame_info.total_size;
2305 else
2306 offset = -(current_frame_info.total_size
2307 - current_function_outgoing_args_size - 16);
2308 break;
2310 case STACK_POINTER_REGNUM:
2311 if (current_function_is_leaf)
2312 offset = 0;
2313 else
2314 offset = 16 + current_function_outgoing_args_size;
2315 break;
2317 default:
2318 gcc_unreachable ();
2320 break;
2322 case ARG_POINTER_REGNUM:
2323 /* Arguments start above the 16 byte save area, unless stdarg
2324 in which case we store through the 16 byte save area. */
2325 switch (to)
2327 case HARD_FRAME_POINTER_REGNUM:
2328 offset = 16 - current_function_pretend_args_size;
2329 break;
2331 case STACK_POINTER_REGNUM:
2332 offset = (current_frame_info.total_size
2333 + 16 - current_function_pretend_args_size);
2334 break;
2336 default:
2337 gcc_unreachable ();
2339 break;
2341 default:
2342 gcc_unreachable ();
2345 return offset;
2348 /* If there are more than a trivial number of register spills, we use
2349 two interleaved iterators so that we can get two memory references
2350 per insn group.
2352 In order to simplify things in the prologue and epilogue expanders,
2353 we use helper functions to fix up the memory references after the
2354 fact with the appropriate offsets to a POST_MODIFY memory mode.
2355 The following data structure tracks the state of the two iterators
2356 while insns are being emitted. */
2358 struct spill_fill_data
2360 rtx init_after; /* point at which to emit initializations */
2361 rtx init_reg[2]; /* initial base register */
2362 rtx iter_reg[2]; /* the iterator registers */
2363 rtx *prev_addr[2]; /* address of last memory use */
2364 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2365 HOST_WIDE_INT prev_off[2]; /* last offset */
2366 int n_iter; /* number of iterators in use */
2367 int next_iter; /* next iterator to use */
2368 unsigned int save_gr_used_mask;
2371 static struct spill_fill_data spill_fill_data;
2373 static void
2374 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2376 int i;
2378 spill_fill_data.init_after = get_last_insn ();
2379 spill_fill_data.init_reg[0] = init_reg;
2380 spill_fill_data.init_reg[1] = init_reg;
2381 spill_fill_data.prev_addr[0] = NULL;
2382 spill_fill_data.prev_addr[1] = NULL;
2383 spill_fill_data.prev_insn[0] = NULL;
2384 spill_fill_data.prev_insn[1] = NULL;
2385 spill_fill_data.prev_off[0] = cfa_off;
2386 spill_fill_data.prev_off[1] = cfa_off;
2387 spill_fill_data.next_iter = 0;
2388 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2390 spill_fill_data.n_iter = 1 + (n_spills > 2);
2391 for (i = 0; i < spill_fill_data.n_iter; ++i)
2393 int regno = next_scratch_gr_reg ();
2394 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2395 current_frame_info.gr_used_mask |= 1 << regno;
2399 static void
2400 finish_spill_pointers (void)
2402 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2405 static rtx
2406 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2408 int iter = spill_fill_data.next_iter;
2409 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2410 rtx disp_rtx = GEN_INT (disp);
2411 rtx mem;
2413 if (spill_fill_data.prev_addr[iter])
2415 if (CONST_OK_FOR_N (disp))
2417 *spill_fill_data.prev_addr[iter]
2418 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2419 gen_rtx_PLUS (DImode,
2420 spill_fill_data.iter_reg[iter],
2421 disp_rtx));
2422 REG_NOTES (spill_fill_data.prev_insn[iter])
2423 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2424 REG_NOTES (spill_fill_data.prev_insn[iter]));
2426 else
2428 /* ??? Could use register post_modify for loads. */
2429 if (! CONST_OK_FOR_I (disp))
2431 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2432 emit_move_insn (tmp, disp_rtx);
2433 disp_rtx = tmp;
2435 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2436 spill_fill_data.iter_reg[iter], disp_rtx));
2439 /* Micro-optimization: if we've created a frame pointer, it's at
2440 CFA 0, which may allow the real iterator to be initialized lower,
2441 slightly increasing parallelism. Also, if there are few saves
2442 it may eliminate the iterator entirely. */
2443 else if (disp == 0
2444 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2445 && frame_pointer_needed)
2447 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2448 set_mem_alias_set (mem, get_varargs_alias_set ());
2449 return mem;
2451 else
2453 rtx seq, insn;
2455 if (disp == 0)
2456 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2457 spill_fill_data.init_reg[iter]);
2458 else
2460 start_sequence ();
2462 if (! CONST_OK_FOR_I (disp))
2464 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2465 emit_move_insn (tmp, disp_rtx);
2466 disp_rtx = tmp;
2469 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2470 spill_fill_data.init_reg[iter],
2471 disp_rtx));
2473 seq = get_insns ();
2474 end_sequence ();
2477 /* Careful for being the first insn in a sequence. */
2478 if (spill_fill_data.init_after)
2479 insn = emit_insn_after (seq, spill_fill_data.init_after);
2480 else
2482 rtx first = get_insns ();
2483 if (first)
2484 insn = emit_insn_before (seq, first);
2485 else
2486 insn = emit_insn (seq);
2488 spill_fill_data.init_after = insn;
2490 /* If DISP is 0, we may or may not have a further adjustment
2491 afterward. If we do, then the load/store insn may be modified
2492 to be a post-modify. If we don't, then this copy may be
2493 eliminated by copyprop_hardreg_forward, which makes this
2494 insn garbage, which runs afoul of the sanity check in
2495 propagate_one_insn. So mark this insn as legal to delete. */
2496 if (disp == 0)
2497 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2498 REG_NOTES (insn));
2501 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2503 /* ??? Not all of the spills are for varargs, but some of them are.
2504 The rest of the spills belong in an alias set of their own. But
2505 it doesn't actually hurt to include them here. */
2506 set_mem_alias_set (mem, get_varargs_alias_set ());
2508 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2509 spill_fill_data.prev_off[iter] = cfa_off;
2511 if (++iter >= spill_fill_data.n_iter)
2512 iter = 0;
2513 spill_fill_data.next_iter = iter;
2515 return mem;
2518 static void
2519 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2520 rtx frame_reg)
2522 int iter = spill_fill_data.next_iter;
2523 rtx mem, insn;
2525 mem = spill_restore_mem (reg, cfa_off);
2526 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2527 spill_fill_data.prev_insn[iter] = insn;
2529 if (frame_reg)
2531 rtx base;
2532 HOST_WIDE_INT off;
2534 RTX_FRAME_RELATED_P (insn) = 1;
2536 /* Don't even pretend that the unwind code can intuit its way
2537 through a pair of interleaved post_modify iterators. Just
2538 provide the correct answer. */
2540 if (frame_pointer_needed)
2542 base = hard_frame_pointer_rtx;
2543 off = - cfa_off;
2545 else
2547 base = stack_pointer_rtx;
2548 off = current_frame_info.total_size - cfa_off;
2551 REG_NOTES (insn)
2552 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2553 gen_rtx_SET (VOIDmode,
2554 gen_rtx_MEM (GET_MODE (reg),
2555 plus_constant (base, off)),
2556 frame_reg),
2557 REG_NOTES (insn));
2561 static void
2562 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
2564 int iter = spill_fill_data.next_iter;
2565 rtx insn;
2567 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2568 GEN_INT (cfa_off)));
2569 spill_fill_data.prev_insn[iter] = insn;
2572 /* Wrapper functions that discards the CONST_INT spill offset. These
2573 exist so that we can give gr_spill/gr_fill the offset they need and
2574 use a consistent function interface. */
2576 static rtx
2577 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2579 return gen_movdi (dest, src);
2582 static rtx
2583 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2585 return gen_fr_spill (dest, src);
2588 static rtx
2589 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2591 return gen_fr_restore (dest, src);
2594 /* Called after register allocation to add any instructions needed for the
2595 prologue. Using a prologue insn is favored compared to putting all of the
2596 instructions in output_function_prologue(), since it allows the scheduler
2597 to intermix instructions with the saves of the caller saved registers. In
2598 some cases, it might be necessary to emit a barrier instruction as the last
2599 insn to prevent such scheduling.
2601 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2602 so that the debug info generation code can handle them properly.
2604 The register save area is layed out like so:
2605 cfa+16
2606 [ varargs spill area ]
2607 [ fr register spill area ]
2608 [ br register spill area ]
2609 [ ar register spill area ]
2610 [ pr register spill area ]
2611 [ gr register spill area ] */
2613 /* ??? Get inefficient code when the frame size is larger than can fit in an
2614 adds instruction. */
2616 void
2617 ia64_expand_prologue (void)
2619 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2620 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2621 rtx reg, alt_reg;
2623 ia64_compute_frame_size (get_frame_size ());
2624 last_scratch_gr_reg = 15;
2626 /* If there is no epilogue, then we don't need some prologue insns.
2627 We need to avoid emitting the dead prologue insns, because flow
2628 will complain about them. */
2629 if (optimize)
2631 edge e;
2632 edge_iterator ei;
2634 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2635 if ((e->flags & EDGE_FAKE) == 0
2636 && (e->flags & EDGE_FALLTHRU) != 0)
2637 break;
2638 epilogue_p = (e != NULL);
2640 else
2641 epilogue_p = 1;
2643 /* Set the local, input, and output register names. We need to do this
2644 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2645 half. If we use in/loc/out register names, then we get assembler errors
2646 in crtn.S because there is no alloc insn or regstk directive in there. */
2647 if (! TARGET_REG_NAMES)
2649 int inputs = current_frame_info.n_input_regs;
2650 int locals = current_frame_info.n_local_regs;
2651 int outputs = current_frame_info.n_output_regs;
2653 for (i = 0; i < inputs; i++)
2654 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2655 for (i = 0; i < locals; i++)
2656 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2657 for (i = 0; i < outputs; i++)
2658 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2661 /* Set the frame pointer register name. The regnum is logically loc79,
2662 but of course we'll not have allocated that many locals. Rather than
2663 worrying about renumbering the existing rtxs, we adjust the name. */
2664 /* ??? This code means that we can never use one local register when
2665 there is a frame pointer. loc79 gets wasted in this case, as it is
2666 renamed to a register that will never be used. See also the try_locals
2667 code in find_gr_spill. */
2668 if (current_frame_info.reg_fp)
2670 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2671 reg_names[HARD_FRAME_POINTER_REGNUM]
2672 = reg_names[current_frame_info.reg_fp];
2673 reg_names[current_frame_info.reg_fp] = tmp;
2676 /* We don't need an alloc instruction if we've used no outputs or locals. */
2677 if (current_frame_info.n_local_regs == 0
2678 && current_frame_info.n_output_regs == 0
2679 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2680 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2682 /* If there is no alloc, but there are input registers used, then we
2683 need a .regstk directive. */
2684 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2685 ar_pfs_save_reg = NULL_RTX;
2687 else
2689 current_frame_info.need_regstk = 0;
2691 if (current_frame_info.reg_save_ar_pfs)
2692 regno = current_frame_info.reg_save_ar_pfs;
2693 else
2694 regno = next_scratch_gr_reg ();
2695 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2697 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2698 GEN_INT (current_frame_info.n_input_regs),
2699 GEN_INT (current_frame_info.n_local_regs),
2700 GEN_INT (current_frame_info.n_output_regs),
2701 GEN_INT (current_frame_info.n_rotate_regs)));
2702 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2705 /* Set up frame pointer, stack pointer, and spill iterators. */
2707 n_varargs = cfun->machine->n_varargs;
2708 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2709 stack_pointer_rtx, 0);
2711 if (frame_pointer_needed)
2713 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2714 RTX_FRAME_RELATED_P (insn) = 1;
2717 if (current_frame_info.total_size != 0)
2719 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2720 rtx offset;
2722 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2723 offset = frame_size_rtx;
2724 else
2726 regno = next_scratch_gr_reg ();
2727 offset = gen_rtx_REG (DImode, regno);
2728 emit_move_insn (offset, frame_size_rtx);
2731 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2732 stack_pointer_rtx, offset));
2734 if (! frame_pointer_needed)
2736 RTX_FRAME_RELATED_P (insn) = 1;
2737 if (GET_CODE (offset) != CONST_INT)
2739 REG_NOTES (insn)
2740 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2741 gen_rtx_SET (VOIDmode,
2742 stack_pointer_rtx,
2743 gen_rtx_PLUS (DImode,
2744 stack_pointer_rtx,
2745 frame_size_rtx)),
2746 REG_NOTES (insn));
2750 /* ??? At this point we must generate a magic insn that appears to
2751 modify the stack pointer, the frame pointer, and all spill
2752 iterators. This would allow the most scheduling freedom. For
2753 now, just hard stop. */
2754 emit_insn (gen_blockage ());
2757 /* Must copy out ar.unat before doing any integer spills. */
2758 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2760 if (current_frame_info.reg_save_ar_unat)
2761 ar_unat_save_reg
2762 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2763 else
2765 alt_regno = next_scratch_gr_reg ();
2766 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2767 current_frame_info.gr_used_mask |= 1 << alt_regno;
2770 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2771 insn = emit_move_insn (ar_unat_save_reg, reg);
2772 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2774 /* Even if we're not going to generate an epilogue, we still
2775 need to save the register so that EH works. */
2776 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2777 emit_insn (gen_prologue_use (ar_unat_save_reg));
2779 else
2780 ar_unat_save_reg = NULL_RTX;
2782 /* Spill all varargs registers. Do this before spilling any GR registers,
2783 since we want the UNAT bits for the GR registers to override the UNAT
2784 bits from varargs, which we don't care about. */
2786 cfa_off = -16;
2787 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2789 reg = gen_rtx_REG (DImode, regno);
2790 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2793 /* Locate the bottom of the register save area. */
2794 cfa_off = (current_frame_info.spill_cfa_off
2795 + current_frame_info.spill_size
2796 + current_frame_info.extra_spill_size);
2798 /* Save the predicate register block either in a register or in memory. */
2799 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2801 reg = gen_rtx_REG (DImode, PR_REG (0));
2802 if (current_frame_info.reg_save_pr != 0)
2804 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2805 insn = emit_move_insn (alt_reg, reg);
2807 /* ??? Denote pr spill/fill by a DImode move that modifies all
2808 64 hard registers. */
2809 RTX_FRAME_RELATED_P (insn) = 1;
2810 REG_NOTES (insn)
2811 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2812 gen_rtx_SET (VOIDmode, alt_reg, reg),
2813 REG_NOTES (insn));
2815 /* Even if we're not going to generate an epilogue, we still
2816 need to save the register so that EH works. */
2817 if (! epilogue_p)
2818 emit_insn (gen_prologue_use (alt_reg));
2820 else
2822 alt_regno = next_scratch_gr_reg ();
2823 alt_reg = gen_rtx_REG (DImode, alt_regno);
2824 insn = emit_move_insn (alt_reg, reg);
2825 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2826 cfa_off -= 8;
2830 /* Handle AR regs in numerical order. All of them get special handling. */
2831 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2832 && current_frame_info.reg_save_ar_unat == 0)
2834 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2835 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2836 cfa_off -= 8;
2839 /* The alloc insn already copied ar.pfs into a general register. The
2840 only thing we have to do now is copy that register to a stack slot
2841 if we'd not allocated a local register for the job. */
2842 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2843 && current_frame_info.reg_save_ar_pfs == 0)
2845 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2846 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2847 cfa_off -= 8;
2850 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2852 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2853 if (current_frame_info.reg_save_ar_lc != 0)
2855 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2856 insn = emit_move_insn (alt_reg, reg);
2857 RTX_FRAME_RELATED_P (insn) = 1;
2859 /* Even if we're not going to generate an epilogue, we still
2860 need to save the register so that EH works. */
2861 if (! epilogue_p)
2862 emit_insn (gen_prologue_use (alt_reg));
2864 else
2866 alt_regno = next_scratch_gr_reg ();
2867 alt_reg = gen_rtx_REG (DImode, alt_regno);
2868 emit_move_insn (alt_reg, reg);
2869 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2870 cfa_off -= 8;
2874 if (current_frame_info.reg_save_gp)
2876 insn = emit_move_insn (gen_rtx_REG (DImode,
2877 current_frame_info.reg_save_gp),
2878 pic_offset_table_rtx);
2879 /* We don't know for sure yet if this is actually needed, since
2880 we've not split the PIC call patterns. If all of the calls
2881 are indirect, and not followed by any uses of the gp, then
2882 this save is dead. Allow it to go away. */
2883 REG_NOTES (insn)
2884 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2887 /* We should now be at the base of the gr/br/fr spill area. */
2888 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
2889 + current_frame_info.spill_size));
2891 /* Spill all general registers. */
2892 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2893 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2895 reg = gen_rtx_REG (DImode, regno);
2896 do_spill (gen_gr_spill, reg, cfa_off, reg);
2897 cfa_off -= 8;
2900 /* Handle BR0 specially -- it may be getting stored permanently in
2901 some GR register. */
2902 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2904 reg = gen_rtx_REG (DImode, BR_REG (0));
2905 if (current_frame_info.reg_save_b0 != 0)
2907 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2908 insn = emit_move_insn (alt_reg, reg);
2909 RTX_FRAME_RELATED_P (insn) = 1;
2911 /* Even if we're not going to generate an epilogue, we still
2912 need to save the register so that EH works. */
2913 if (! epilogue_p)
2914 emit_insn (gen_prologue_use (alt_reg));
2916 else
2918 alt_regno = next_scratch_gr_reg ();
2919 alt_reg = gen_rtx_REG (DImode, alt_regno);
2920 emit_move_insn (alt_reg, reg);
2921 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2922 cfa_off -= 8;
2926 /* Spill the rest of the BR registers. */
2927 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2928 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2930 alt_regno = next_scratch_gr_reg ();
2931 alt_reg = gen_rtx_REG (DImode, alt_regno);
2932 reg = gen_rtx_REG (DImode, regno);
2933 emit_move_insn (alt_reg, reg);
2934 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2935 cfa_off -= 8;
2938 /* Align the frame and spill all FR registers. */
2939 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2940 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2942 gcc_assert (!(cfa_off & 15));
2943 reg = gen_rtx_REG (XFmode, regno);
2944 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2945 cfa_off -= 16;
2948 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
2950 finish_spill_pointers ();
2953 /* Called after register allocation to add any instructions needed for the
2954 epilogue. Using an epilogue insn is favored compared to putting all of the
2955 instructions in output_function_prologue(), since it allows the scheduler
2956 to intermix instructions with the saves of the caller saved registers. In
2957 some cases, it might be necessary to emit a barrier instruction as the last
2958 insn to prevent such scheduling. */
2960 void
2961 ia64_expand_epilogue (int sibcall_p)
2963 rtx insn, reg, alt_reg, ar_unat_save_reg;
2964 int regno, alt_regno, cfa_off;
2966 ia64_compute_frame_size (get_frame_size ());
2968 /* If there is a frame pointer, then we use it instead of the stack
2969 pointer, so that the stack pointer does not need to be valid when
2970 the epilogue starts. See EXIT_IGNORE_STACK. */
2971 if (frame_pointer_needed)
2972 setup_spill_pointers (current_frame_info.n_spilled,
2973 hard_frame_pointer_rtx, 0);
2974 else
2975 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2976 current_frame_info.total_size);
2978 if (current_frame_info.total_size != 0)
2980 /* ??? At this point we must generate a magic insn that appears to
2981 modify the spill iterators and the frame pointer. This would
2982 allow the most scheduling freedom. For now, just hard stop. */
2983 emit_insn (gen_blockage ());
2986 /* Locate the bottom of the register save area. */
2987 cfa_off = (current_frame_info.spill_cfa_off
2988 + current_frame_info.spill_size
2989 + current_frame_info.extra_spill_size);
2991 /* Restore the predicate registers. */
2992 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2994 if (current_frame_info.reg_save_pr != 0)
2995 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2996 else
2998 alt_regno = next_scratch_gr_reg ();
2999 alt_reg = gen_rtx_REG (DImode, alt_regno);
3000 do_restore (gen_movdi_x, alt_reg, cfa_off);
3001 cfa_off -= 8;
3003 reg = gen_rtx_REG (DImode, PR_REG (0));
3004 emit_move_insn (reg, alt_reg);
3007 /* Restore the application registers. */
3009 /* Load the saved unat from the stack, but do not restore it until
3010 after the GRs have been restored. */
3011 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3013 if (current_frame_info.reg_save_ar_unat != 0)
3014 ar_unat_save_reg
3015 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
3016 else
3018 alt_regno = next_scratch_gr_reg ();
3019 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3020 current_frame_info.gr_used_mask |= 1 << alt_regno;
3021 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3022 cfa_off -= 8;
3025 else
3026 ar_unat_save_reg = NULL_RTX;
3028 if (current_frame_info.reg_save_ar_pfs != 0)
3030 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
3031 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3032 emit_move_insn (reg, alt_reg);
3034 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3036 alt_regno = next_scratch_gr_reg ();
3037 alt_reg = gen_rtx_REG (DImode, alt_regno);
3038 do_restore (gen_movdi_x, alt_reg, cfa_off);
3039 cfa_off -= 8;
3040 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3041 emit_move_insn (reg, alt_reg);
3044 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3046 if (current_frame_info.reg_save_ar_lc != 0)
3047 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3048 else
3050 alt_regno = next_scratch_gr_reg ();
3051 alt_reg = gen_rtx_REG (DImode, alt_regno);
3052 do_restore (gen_movdi_x, alt_reg, cfa_off);
3053 cfa_off -= 8;
3055 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3056 emit_move_insn (reg, alt_reg);
3059 /* We should now be at the base of the gr/br/fr spill area. */
3060 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3061 + current_frame_info.spill_size));
3063 /* The GP may be stored on the stack in the prologue, but it's
3064 never restored in the epilogue. Skip the stack slot. */
3065 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3066 cfa_off -= 8;
3068 /* Restore all general registers. */
3069 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3070 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3072 reg = gen_rtx_REG (DImode, regno);
3073 do_restore (gen_gr_restore, reg, cfa_off);
3074 cfa_off -= 8;
3077 /* Restore the branch registers. Handle B0 specially, as it may
3078 have gotten stored in some GR register. */
3079 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3081 if (current_frame_info.reg_save_b0 != 0)
3082 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3083 else
3085 alt_regno = next_scratch_gr_reg ();
3086 alt_reg = gen_rtx_REG (DImode, alt_regno);
3087 do_restore (gen_movdi_x, alt_reg, cfa_off);
3088 cfa_off -= 8;
3090 reg = gen_rtx_REG (DImode, BR_REG (0));
3091 emit_move_insn (reg, alt_reg);
3094 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3095 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3097 alt_regno = next_scratch_gr_reg ();
3098 alt_reg = gen_rtx_REG (DImode, alt_regno);
3099 do_restore (gen_movdi_x, alt_reg, cfa_off);
3100 cfa_off -= 8;
3101 reg = gen_rtx_REG (DImode, regno);
3102 emit_move_insn (reg, alt_reg);
3105 /* Restore floating point registers. */
3106 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3107 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3109 gcc_assert (!(cfa_off & 15));
3110 reg = gen_rtx_REG (XFmode, regno);
3111 do_restore (gen_fr_restore_x, reg, cfa_off);
3112 cfa_off -= 16;
3115 /* Restore ar.unat for real. */
3116 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3118 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3119 emit_move_insn (reg, ar_unat_save_reg);
3122 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3124 finish_spill_pointers ();
3126 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
3128 /* ??? At this point we must generate a magic insn that appears to
3129 modify the spill iterators, the stack pointer, and the frame
3130 pointer. This would allow the most scheduling freedom. For now,
3131 just hard stop. */
3132 emit_insn (gen_blockage ());
3135 if (cfun->machine->ia64_eh_epilogue_sp)
3136 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3137 else if (frame_pointer_needed)
3139 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3140 RTX_FRAME_RELATED_P (insn) = 1;
3142 else if (current_frame_info.total_size)
3144 rtx offset, frame_size_rtx;
3146 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3147 if (CONST_OK_FOR_I (current_frame_info.total_size))
3148 offset = frame_size_rtx;
3149 else
3151 regno = next_scratch_gr_reg ();
3152 offset = gen_rtx_REG (DImode, regno);
3153 emit_move_insn (offset, frame_size_rtx);
3156 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3157 offset));
3159 RTX_FRAME_RELATED_P (insn) = 1;
3160 if (GET_CODE (offset) != CONST_INT)
3162 REG_NOTES (insn)
3163 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3164 gen_rtx_SET (VOIDmode,
3165 stack_pointer_rtx,
3166 gen_rtx_PLUS (DImode,
3167 stack_pointer_rtx,
3168 frame_size_rtx)),
3169 REG_NOTES (insn));
3173 if (cfun->machine->ia64_eh_epilogue_bsp)
3174 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3176 if (! sibcall_p)
3177 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3178 else
3180 int fp = GR_REG (2);
3181 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3182 first available call clobbered register. If there was a frame_pointer
3183 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3184 so we have to make sure we're using the string "r2" when emitting
3185 the register name for the assembler. */
3186 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
3187 fp = HARD_FRAME_POINTER_REGNUM;
3189 /* We must emit an alloc to force the input registers to become output
3190 registers. Otherwise, if the callee tries to pass its parameters
3191 through to another call without an intervening alloc, then these
3192 values get lost. */
3193 /* ??? We don't need to preserve all input registers. We only need to
3194 preserve those input registers used as arguments to the sibling call.
3195 It is unclear how to compute that number here. */
3196 if (current_frame_info.n_input_regs != 0)
3198 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3199 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3200 const0_rtx, const0_rtx,
3201 n_inputs, const0_rtx));
3202 RTX_FRAME_RELATED_P (insn) = 1;
3207 /* Return 1 if br.ret can do all the work required to return from a
3208 function. */
3211 ia64_direct_return (void)
3213 if (reload_completed && ! frame_pointer_needed)
3215 ia64_compute_frame_size (get_frame_size ());
3217 return (current_frame_info.total_size == 0
3218 && current_frame_info.n_spilled == 0
3219 && current_frame_info.reg_save_b0 == 0
3220 && current_frame_info.reg_save_pr == 0
3221 && current_frame_info.reg_save_ar_pfs == 0
3222 && current_frame_info.reg_save_ar_unat == 0
3223 && current_frame_info.reg_save_ar_lc == 0);
3225 return 0;
3228 /* Return the magic cookie that we use to hold the return address
3229 during early compilation. */
3232 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3234 if (count != 0)
3235 return NULL;
3236 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3239 /* Split this value after reload, now that we know where the return
3240 address is saved. */
3242 void
3243 ia64_split_return_addr_rtx (rtx dest)
3245 rtx src;
3247 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3249 if (current_frame_info.reg_save_b0 != 0)
3250 src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3251 else
3253 HOST_WIDE_INT off;
3254 unsigned int regno;
3256 /* Compute offset from CFA for BR0. */
3257 /* ??? Must be kept in sync with ia64_expand_prologue. */
3258 off = (current_frame_info.spill_cfa_off
3259 + current_frame_info.spill_size);
3260 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3261 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3262 off -= 8;
3264 /* Convert CFA offset to a register based offset. */
3265 if (frame_pointer_needed)
3266 src = hard_frame_pointer_rtx;
3267 else
3269 src = stack_pointer_rtx;
3270 off += current_frame_info.total_size;
3273 /* Load address into scratch register. */
3274 if (CONST_OK_FOR_I (off))
3275 emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
3276 else
3278 emit_move_insn (dest, GEN_INT (off));
3279 emit_insn (gen_adddi3 (dest, src, dest));
3282 src = gen_rtx_MEM (Pmode, dest);
3285 else
3286 src = gen_rtx_REG (DImode, BR_REG (0));
3288 emit_move_insn (dest, src);
3292 ia64_hard_regno_rename_ok (int from, int to)
3294 /* Don't clobber any of the registers we reserved for the prologue. */
3295 if (to == current_frame_info.reg_fp
3296 || to == current_frame_info.reg_save_b0
3297 || to == current_frame_info.reg_save_pr
3298 || to == current_frame_info.reg_save_ar_pfs
3299 || to == current_frame_info.reg_save_ar_unat
3300 || to == current_frame_info.reg_save_ar_lc)
3301 return 0;
3303 if (from == current_frame_info.reg_fp
3304 || from == current_frame_info.reg_save_b0
3305 || from == current_frame_info.reg_save_pr
3306 || from == current_frame_info.reg_save_ar_pfs
3307 || from == current_frame_info.reg_save_ar_unat
3308 || from == current_frame_info.reg_save_ar_lc)
3309 return 0;
3311 /* Don't use output registers outside the register frame. */
3312 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3313 return 0;
3315 /* Retain even/oddness on predicate register pairs. */
3316 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3317 return (from & 1) == (to & 1);
3319 return 1;
3322 /* Target hook for assembling integer objects. Handle word-sized
3323 aligned objects and detect the cases when @fptr is needed. */
3325 static bool
3326 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3328 if (size == POINTER_SIZE / BITS_PER_UNIT
3329 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3330 && GET_CODE (x) == SYMBOL_REF
3331 && SYMBOL_REF_FUNCTION_P (x))
3333 static const char * const directive[2][2] = {
3334 /* 64-bit pointer */ /* 32-bit pointer */
3335 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3336 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3338 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
3339 output_addr_const (asm_out_file, x);
3340 fputs (")\n", asm_out_file);
3341 return true;
3343 return default_assemble_integer (x, size, aligned_p);
3346 /* Emit the function prologue. */
3348 static void
3349 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3351 int mask, grsave, grsave_prev;
3353 if (current_frame_info.need_regstk)
3354 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3355 current_frame_info.n_input_regs,
3356 current_frame_info.n_local_regs,
3357 current_frame_info.n_output_regs,
3358 current_frame_info.n_rotate_regs);
3360 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3361 return;
3363 /* Emit the .prologue directive. */
3365 mask = 0;
3366 grsave = grsave_prev = 0;
3367 if (current_frame_info.reg_save_b0 != 0)
3369 mask |= 8;
3370 grsave = grsave_prev = current_frame_info.reg_save_b0;
3372 if (current_frame_info.reg_save_ar_pfs != 0
3373 && (grsave_prev == 0
3374 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3376 mask |= 4;
3377 if (grsave_prev == 0)
3378 grsave = current_frame_info.reg_save_ar_pfs;
3379 grsave_prev = current_frame_info.reg_save_ar_pfs;
3381 if (current_frame_info.reg_fp != 0
3382 && (grsave_prev == 0
3383 || current_frame_info.reg_fp == grsave_prev + 1))
3385 mask |= 2;
3386 if (grsave_prev == 0)
3387 grsave = HARD_FRAME_POINTER_REGNUM;
3388 grsave_prev = current_frame_info.reg_fp;
3390 if (current_frame_info.reg_save_pr != 0
3391 && (grsave_prev == 0
3392 || current_frame_info.reg_save_pr == grsave_prev + 1))
3394 mask |= 1;
3395 if (grsave_prev == 0)
3396 grsave = current_frame_info.reg_save_pr;
3399 if (mask && TARGET_GNU_AS)
3400 fprintf (file, "\t.prologue %d, %d\n", mask,
3401 ia64_dbx_register_number (grsave));
3402 else
3403 fputs ("\t.prologue\n", file);
3405 /* Emit a .spill directive, if necessary, to relocate the base of
3406 the register spill area. */
3407 if (current_frame_info.spill_cfa_off != -16)
3408 fprintf (file, "\t.spill %ld\n",
3409 (long) (current_frame_info.spill_cfa_off
3410 + current_frame_info.spill_size));
3413 /* Emit the .body directive at the scheduled end of the prologue. */
3415 static void
3416 ia64_output_function_end_prologue (FILE *file)
3418 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3419 return;
3421 fputs ("\t.body\n", file);
3424 /* Emit the function epilogue. */
3426 static void
3427 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3428 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3430 int i;
3432 if (current_frame_info.reg_fp)
3434 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3435 reg_names[HARD_FRAME_POINTER_REGNUM]
3436 = reg_names[current_frame_info.reg_fp];
3437 reg_names[current_frame_info.reg_fp] = tmp;
3439 if (! TARGET_REG_NAMES)
3441 for (i = 0; i < current_frame_info.n_input_regs; i++)
3442 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3443 for (i = 0; i < current_frame_info.n_local_regs; i++)
3444 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3445 for (i = 0; i < current_frame_info.n_output_regs; i++)
3446 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3449 current_frame_info.initialized = 0;
3453 ia64_dbx_register_number (int regno)
3455 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3456 from its home at loc79 to something inside the register frame. We
3457 must perform the same renumbering here for the debug info. */
3458 if (current_frame_info.reg_fp)
3460 if (regno == HARD_FRAME_POINTER_REGNUM)
3461 regno = current_frame_info.reg_fp;
3462 else if (regno == current_frame_info.reg_fp)
3463 regno = HARD_FRAME_POINTER_REGNUM;
3466 if (IN_REGNO_P (regno))
3467 return 32 + regno - IN_REG (0);
3468 else if (LOC_REGNO_P (regno))
3469 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3470 else if (OUT_REGNO_P (regno))
3471 return (32 + current_frame_info.n_input_regs
3472 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3473 else
3474 return regno;
3477 void
3478 ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
3480 rtx addr_reg, eight = GEN_INT (8);
3482 /* The Intel assembler requires that the global __ia64_trampoline symbol
3483 be declared explicitly */
3484 if (!TARGET_GNU_AS)
3486 static bool declared_ia64_trampoline = false;
3488 if (!declared_ia64_trampoline)
3490 declared_ia64_trampoline = true;
3491 (*targetm.asm_out.globalize_label) (asm_out_file,
3492 "__ia64_trampoline");
3496 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
3497 addr = convert_memory_address (Pmode, addr);
3498 fnaddr = convert_memory_address (Pmode, fnaddr);
3499 static_chain = convert_memory_address (Pmode, static_chain);
3501 /* Load up our iterator. */
3502 addr_reg = gen_reg_rtx (Pmode);
3503 emit_move_insn (addr_reg, addr);
3505 /* The first two words are the fake descriptor:
3506 __ia64_trampoline, ADDR+16. */
3507 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3508 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3509 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3511 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3512 copy_to_reg (plus_constant (addr, 16)));
3513 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3515 /* The third word is the target descriptor. */
3516 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3517 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3519 /* The fourth word is the static chain. */
3520 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3523 /* Do any needed setup for a variadic function. CUM has not been updated
3524 for the last named argument which has type TYPE and mode MODE.
3526 We generate the actual spill instructions during prologue generation. */
3528 static void
3529 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3530 tree type, int * pretend_size,
3531 int second_time ATTRIBUTE_UNUSED)
3533 CUMULATIVE_ARGS next_cum = *cum;
3535 /* Skip the current argument. */
3536 ia64_function_arg_advance (&next_cum, mode, type, 1);
3538 if (next_cum.words < MAX_ARGUMENT_SLOTS)
3540 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
3541 *pretend_size = n * UNITS_PER_WORD;
3542 cfun->machine->n_varargs = n;
3546 /* Check whether TYPE is a homogeneous floating point aggregate. If
3547 it is, return the mode of the floating point type that appears
3548 in all leafs. If it is not, return VOIDmode.
3550 An aggregate is a homogeneous floating point aggregate is if all
3551 fields/elements in it have the same floating point type (e.g,
3552 SFmode). 128-bit quad-precision floats are excluded.
3554 Variable sized aggregates should never arrive here, since we should
3555 have already decided to pass them by reference. Top-level zero-sized
3556 aggregates are excluded because our parallels crash the middle-end. */
3558 static enum machine_mode
3559 hfa_element_mode (tree type, bool nested)
3561 enum machine_mode element_mode = VOIDmode;
3562 enum machine_mode mode;
3563 enum tree_code code = TREE_CODE (type);
3564 int know_element_mode = 0;
3565 tree t;
3567 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
3568 return VOIDmode;
3570 switch (code)
3572 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
3573 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
3574 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
3575 case LANG_TYPE: case FUNCTION_TYPE:
3576 return VOIDmode;
3578 /* Fortran complex types are supposed to be HFAs, so we need to handle
3579 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3580 types though. */
3581 case COMPLEX_TYPE:
3582 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3583 && TYPE_MODE (type) != TCmode)
3584 return GET_MODE_INNER (TYPE_MODE (type));
3585 else
3586 return VOIDmode;
3588 case REAL_TYPE:
3589 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3590 mode if this is contained within an aggregate. */
3591 if (nested && TYPE_MODE (type) != TFmode)
3592 return TYPE_MODE (type);
3593 else
3594 return VOIDmode;
3596 case ARRAY_TYPE:
3597 return hfa_element_mode (TREE_TYPE (type), 1);
3599 case RECORD_TYPE:
3600 case UNION_TYPE:
3601 case QUAL_UNION_TYPE:
3602 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3604 if (TREE_CODE (t) != FIELD_DECL)
3605 continue;
3607 mode = hfa_element_mode (TREE_TYPE (t), 1);
3608 if (know_element_mode)
3610 if (mode != element_mode)
3611 return VOIDmode;
3613 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3614 return VOIDmode;
3615 else
3617 know_element_mode = 1;
3618 element_mode = mode;
3621 return element_mode;
3623 default:
3624 /* If we reach here, we probably have some front-end specific type
3625 that the backend doesn't know about. This can happen via the
3626 aggregate_value_p call in init_function_start. All we can do is
3627 ignore unknown tree types. */
3628 return VOIDmode;
3631 return VOIDmode;
3634 /* Return the number of words required to hold a quantity of TYPE and MODE
3635 when passed as an argument. */
3636 static int
3637 ia64_function_arg_words (tree type, enum machine_mode mode)
3639 int words;
3641 if (mode == BLKmode)
3642 words = int_size_in_bytes (type);
3643 else
3644 words = GET_MODE_SIZE (mode);
3646 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
3649 /* Return the number of registers that should be skipped so the current
3650 argument (described by TYPE and WORDS) will be properly aligned.
3652 Integer and float arguments larger than 8 bytes start at the next
3653 even boundary. Aggregates larger than 8 bytes start at the next
3654 even boundary if the aggregate has 16 byte alignment. Note that
3655 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3656 but are still to be aligned in registers.
3658 ??? The ABI does not specify how to handle aggregates with
3659 alignment from 9 to 15 bytes, or greater than 16. We handle them
3660 all as if they had 16 byte alignment. Such aggregates can occur
3661 only if gcc extensions are used. */
3662 static int
3663 ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3665 if ((cum->words & 1) == 0)
3666 return 0;
3668 if (type
3669 && TREE_CODE (type) != INTEGER_TYPE
3670 && TREE_CODE (type) != REAL_TYPE)
3671 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3672 else
3673 return words > 1;
3676 /* Return rtx for register where argument is passed, or zero if it is passed
3677 on the stack. */
3678 /* ??? 128-bit quad-precision floats are always passed in general
3679 registers. */
3682 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3683 int named, int incoming)
3685 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3686 int words = ia64_function_arg_words (type, mode);
3687 int offset = ia64_function_arg_offset (cum, type, words);
3688 enum machine_mode hfa_mode = VOIDmode;
3690 /* If all argument slots are used, then it must go on the stack. */
3691 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3692 return 0;
3694 /* Check for and handle homogeneous FP aggregates. */
3695 if (type)
3696 hfa_mode = hfa_element_mode (type, 0);
3698 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3699 and unprototyped hfas are passed specially. */
3700 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3702 rtx loc[16];
3703 int i = 0;
3704 int fp_regs = cum->fp_regs;
3705 int int_regs = cum->words + offset;
3706 int hfa_size = GET_MODE_SIZE (hfa_mode);
3707 int byte_size;
3708 int args_byte_size;
3710 /* If prototyped, pass it in FR regs then GR regs.
3711 If not prototyped, pass it in both FR and GR regs.
3713 If this is an SFmode aggregate, then it is possible to run out of
3714 FR regs while GR regs are still left. In that case, we pass the
3715 remaining part in the GR regs. */
3717 /* Fill the FP regs. We do this always. We stop if we reach the end
3718 of the argument, the last FP register, or the last argument slot. */
3720 byte_size = ((mode == BLKmode)
3721 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3722 args_byte_size = int_regs * UNITS_PER_WORD;
3723 offset = 0;
3724 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3725 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3727 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3728 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3729 + fp_regs)),
3730 GEN_INT (offset));
3731 offset += hfa_size;
3732 args_byte_size += hfa_size;
3733 fp_regs++;
3736 /* If no prototype, then the whole thing must go in GR regs. */
3737 if (! cum->prototype)
3738 offset = 0;
3739 /* If this is an SFmode aggregate, then we might have some left over
3740 that needs to go in GR regs. */
3741 else if (byte_size != offset)
3742 int_regs += offset / UNITS_PER_WORD;
3744 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3746 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3748 enum machine_mode gr_mode = DImode;
3749 unsigned int gr_size;
3751 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3752 then this goes in a GR reg left adjusted/little endian, right
3753 adjusted/big endian. */
3754 /* ??? Currently this is handled wrong, because 4-byte hunks are
3755 always right adjusted/little endian. */
3756 if (offset & 0x4)
3757 gr_mode = SImode;
3758 /* If we have an even 4 byte hunk because the aggregate is a
3759 multiple of 4 bytes in size, then this goes in a GR reg right
3760 adjusted/little endian. */
3761 else if (byte_size - offset == 4)
3762 gr_mode = SImode;
3764 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3765 gen_rtx_REG (gr_mode, (basereg
3766 + int_regs)),
3767 GEN_INT (offset));
3769 gr_size = GET_MODE_SIZE (gr_mode);
3770 offset += gr_size;
3771 if (gr_size == UNITS_PER_WORD
3772 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
3773 int_regs++;
3774 else if (gr_size > UNITS_PER_WORD)
3775 int_regs += gr_size / UNITS_PER_WORD;
3777 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3780 /* Integral and aggregates go in general registers. If we have run out of
3781 FR registers, then FP values must also go in general registers. This can
3782 happen when we have a SFmode HFA. */
3783 else if (mode == TFmode || mode == TCmode
3784 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3786 int byte_size = ((mode == BLKmode)
3787 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3788 if (BYTES_BIG_ENDIAN
3789 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3790 && byte_size < UNITS_PER_WORD
3791 && byte_size > 0)
3793 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3794 gen_rtx_REG (DImode,
3795 (basereg + cum->words
3796 + offset)),
3797 const0_rtx);
3798 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3800 else
3801 return gen_rtx_REG (mode, basereg + cum->words + offset);
3805 /* If there is a prototype, then FP values go in a FR register when
3806 named, and in a GR register when unnamed. */
3807 else if (cum->prototype)
3809 if (named)
3810 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3811 /* In big-endian mode, an anonymous SFmode value must be represented
3812 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
3813 the value into the high half of the general register. */
3814 else if (BYTES_BIG_ENDIAN && mode == SFmode)
3815 return gen_rtx_PARALLEL (mode,
3816 gen_rtvec (1,
3817 gen_rtx_EXPR_LIST (VOIDmode,
3818 gen_rtx_REG (DImode, basereg + cum->words + offset),
3819 const0_rtx)));
3820 else
3821 return gen_rtx_REG (mode, basereg + cum->words + offset);
3823 /* If there is no prototype, then FP values go in both FR and GR
3824 registers. */
3825 else
3827 /* See comment above. */
3828 enum machine_mode inner_mode =
3829 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
3831 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3832 gen_rtx_REG (mode, (FR_ARG_FIRST
3833 + cum->fp_regs)),
3834 const0_rtx);
3835 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3836 gen_rtx_REG (inner_mode,
3837 (basereg + cum->words
3838 + offset)),
3839 const0_rtx);
3841 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3845 /* Return number of bytes, at the beginning of the argument, that must be
3846 put in registers. 0 is the argument is entirely in registers or entirely
3847 in memory. */
3849 static int
3850 ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3851 tree type, bool named ATTRIBUTE_UNUSED)
3853 int words = ia64_function_arg_words (type, mode);
3854 int offset = ia64_function_arg_offset (cum, type, words);
3856 /* If all argument slots are used, then it must go on the stack. */
3857 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3858 return 0;
3860 /* It doesn't matter whether the argument goes in FR or GR regs. If
3861 it fits within the 8 argument slots, then it goes entirely in
3862 registers. If it extends past the last argument slot, then the rest
3863 goes on the stack. */
3865 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3866 return 0;
3868 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
3871 /* Update CUM to point after this argument. This is patterned after
3872 ia64_function_arg. */
3874 void
3875 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3876 tree type, int named)
3878 int words = ia64_function_arg_words (type, mode);
3879 int offset = ia64_function_arg_offset (cum, type, words);
3880 enum machine_mode hfa_mode = VOIDmode;
3882 /* If all arg slots are already full, then there is nothing to do. */
3883 if (cum->words >= MAX_ARGUMENT_SLOTS)
3884 return;
3886 cum->words += words + offset;
3888 /* Check for and handle homogeneous FP aggregates. */
3889 if (type)
3890 hfa_mode = hfa_element_mode (type, 0);
3892 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3893 and unprototyped hfas are passed specially. */
3894 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3896 int fp_regs = cum->fp_regs;
3897 /* This is the original value of cum->words + offset. */
3898 int int_regs = cum->words - words;
3899 int hfa_size = GET_MODE_SIZE (hfa_mode);
3900 int byte_size;
3901 int args_byte_size;
3903 /* If prototyped, pass it in FR regs then GR regs.
3904 If not prototyped, pass it in both FR and GR regs.
3906 If this is an SFmode aggregate, then it is possible to run out of
3907 FR regs while GR regs are still left. In that case, we pass the
3908 remaining part in the GR regs. */
3910 /* Fill the FP regs. We do this always. We stop if we reach the end
3911 of the argument, the last FP register, or the last argument slot. */
3913 byte_size = ((mode == BLKmode)
3914 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3915 args_byte_size = int_regs * UNITS_PER_WORD;
3916 offset = 0;
3917 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3918 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3920 offset += hfa_size;
3921 args_byte_size += hfa_size;
3922 fp_regs++;
3925 cum->fp_regs = fp_regs;
3928 /* Integral and aggregates go in general registers. So do TFmode FP values.
3929 If we have run out of FR registers, then other FP values must also go in
3930 general registers. This can happen when we have a SFmode HFA. */
3931 else if (mode == TFmode || mode == TCmode
3932 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3933 cum->int_regs = cum->words;
3935 /* If there is a prototype, then FP values go in a FR register when
3936 named, and in a GR register when unnamed. */
3937 else if (cum->prototype)
3939 if (! named)
3940 cum->int_regs = cum->words;
3941 else
3942 /* ??? Complex types should not reach here. */
3943 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3945 /* If there is no prototype, then FP values go in both FR and GR
3946 registers. */
3947 else
3949 /* ??? Complex types should not reach here. */
3950 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3951 cum->int_regs = cum->words;
3955 /* Arguments with alignment larger than 8 bytes start at the next even
3956 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
3957 even though their normal alignment is 8 bytes. See ia64_function_arg. */
3960 ia64_function_arg_boundary (enum machine_mode mode, tree type)
3963 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
3964 return PARM_BOUNDARY * 2;
3966 if (type)
3968 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
3969 return PARM_BOUNDARY * 2;
3970 else
3971 return PARM_BOUNDARY;
3974 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
3975 return PARM_BOUNDARY * 2;
3976 else
3977 return PARM_BOUNDARY;
3980 /* Variable sized types are passed by reference. */
3981 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3983 static bool
3984 ia64_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3985 enum machine_mode mode ATTRIBUTE_UNUSED,
3986 tree type, bool named ATTRIBUTE_UNUSED)
3988 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3991 /* True if it is OK to do sibling call optimization for the specified
3992 call expression EXP. DECL will be the called function, or NULL if
3993 this is an indirect call. */
3994 static bool
3995 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3997 /* We can't perform a sibcall if the current function has the syscall_linkage
3998 attribute. */
3999 if (lookup_attribute ("syscall_linkage",
4000 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4001 return false;
4003 /* We must always return with our current GP. This means we can
4004 only sibcall to functions defined in the current module. */
4005 return decl && (*targetm.binds_local_p) (decl);
4009 /* Implement va_arg. */
4011 static tree
4012 ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4014 /* Variable sized types are passed by reference. */
4015 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4017 tree ptrtype = build_pointer_type (type);
4018 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4019 return build_va_arg_indirect_ref (addr);
4022 /* Aggregate arguments with alignment larger than 8 bytes start at
4023 the next even boundary. Integer and floating point arguments
4024 do so if they are larger than 8 bytes, whether or not they are
4025 also aligned larger than 8 bytes. */
4026 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4027 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4029 tree t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
4030 build_int_cst (NULL_TREE, 2 * UNITS_PER_WORD - 1));
4031 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
4032 build_int_cst (NULL_TREE, -2 * UNITS_PER_WORD));
4033 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
4034 gimplify_and_add (t, pre_p);
4037 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4040 /* Return 1 if function return value returned in memory. Return 0 if it is
4041 in a register. */
4043 static bool
4044 ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
4046 enum machine_mode mode;
4047 enum machine_mode hfa_mode;
4048 HOST_WIDE_INT byte_size;
4050 mode = TYPE_MODE (valtype);
4051 byte_size = GET_MODE_SIZE (mode);
4052 if (mode == BLKmode)
4054 byte_size = int_size_in_bytes (valtype);
4055 if (byte_size < 0)
4056 return true;
4059 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4061 hfa_mode = hfa_element_mode (valtype, 0);
4062 if (hfa_mode != VOIDmode)
4064 int hfa_size = GET_MODE_SIZE (hfa_mode);
4066 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4067 return true;
4068 else
4069 return false;
4071 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4072 return true;
4073 else
4074 return false;
4077 /* Return rtx for register that holds the function return value. */
4080 ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
4082 enum machine_mode mode;
4083 enum machine_mode hfa_mode;
4085 mode = TYPE_MODE (valtype);
4086 hfa_mode = hfa_element_mode (valtype, 0);
4088 if (hfa_mode != VOIDmode)
4090 rtx loc[8];
4091 int i;
4092 int hfa_size;
4093 int byte_size;
4094 int offset;
4096 hfa_size = GET_MODE_SIZE (hfa_mode);
4097 byte_size = ((mode == BLKmode)
4098 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4099 offset = 0;
4100 for (i = 0; offset < byte_size; i++)
4102 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4103 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4104 GEN_INT (offset));
4105 offset += hfa_size;
4107 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4109 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4110 return gen_rtx_REG (mode, FR_ARG_FIRST);
4111 else
4113 bool need_parallel = false;
4115 /* In big-endian mode, we need to manage the layout of aggregates
4116 in the registers so that we get the bits properly aligned in
4117 the highpart of the registers. */
4118 if (BYTES_BIG_ENDIAN
4119 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4120 need_parallel = true;
4122 /* Something like struct S { long double x; char a[0] } is not an
4123 HFA structure, and therefore doesn't go in fp registers. But
4124 the middle-end will give it XFmode anyway, and XFmode values
4125 don't normally fit in integer registers. So we need to smuggle
4126 the value inside a parallel. */
4127 else if (mode == XFmode || mode == XCmode)
4128 need_parallel = true;
4130 if (need_parallel)
4132 rtx loc[8];
4133 int offset;
4134 int bytesize;
4135 int i;
4137 offset = 0;
4138 bytesize = int_size_in_bytes (valtype);
4139 /* An empty PARALLEL is invalid here, but the return value
4140 doesn't matter for empty structs. */
4141 if (bytesize == 0)
4142 return gen_rtx_REG (mode, GR_RET_FIRST);
4143 for (i = 0; offset < bytesize; i++)
4145 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4146 gen_rtx_REG (DImode,
4147 GR_RET_FIRST + i),
4148 GEN_INT (offset));
4149 offset += UNITS_PER_WORD;
4151 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4154 return gen_rtx_REG (mode, GR_RET_FIRST);
4158 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
4159 We need to emit DTP-relative relocations. */
4161 void
4162 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4164 gcc_assert (size == 8);
4165 fputs ("\tdata8.ua\t@dtprel(", file);
4166 output_addr_const (file, x);
4167 fputs (")", file);
4170 /* Print a memory address as an operand to reference that memory location. */
4172 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4173 also call this from ia64_print_operand for memory addresses. */
4175 void
4176 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4177 rtx address ATTRIBUTE_UNUSED)
4181 /* Print an operand to an assembler instruction.
4182 C Swap and print a comparison operator.
4183 D Print an FP comparison operator.
4184 E Print 32 - constant, for SImode shifts as extract.
4185 e Print 64 - constant, for DImode rotates.
4186 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4187 a floating point register emitted normally.
4188 I Invert a predicate register by adding 1.
4189 J Select the proper predicate register for a condition.
4190 j Select the inverse predicate register for a condition.
4191 O Append .acq for volatile load.
4192 P Postincrement of a MEM.
4193 Q Append .rel for volatile store.
4194 S Shift amount for shladd instruction.
4195 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4196 for Intel assembler.
4197 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4198 for Intel assembler.
4199 r Print register name, or constant 0 as r0. HP compatibility for
4200 Linux kernel.
4201 v Print vector constant value as an 8-byte integer value. */
4203 void
4204 ia64_print_operand (FILE * file, rtx x, int code)
4206 const char *str;
4208 switch (code)
4210 case 0:
4211 /* Handled below. */
4212 break;
4214 case 'C':
4216 enum rtx_code c = swap_condition (GET_CODE (x));
4217 fputs (GET_RTX_NAME (c), file);
4218 return;
4221 case 'D':
4222 switch (GET_CODE (x))
4224 case NE:
4225 str = "neq";
4226 break;
4227 case UNORDERED:
4228 str = "unord";
4229 break;
4230 case ORDERED:
4231 str = "ord";
4232 break;
4233 default:
4234 str = GET_RTX_NAME (GET_CODE (x));
4235 break;
4237 fputs (str, file);
4238 return;
4240 case 'E':
4241 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4242 return;
4244 case 'e':
4245 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4246 return;
4248 case 'F':
4249 if (x == CONST0_RTX (GET_MODE (x)))
4250 str = reg_names [FR_REG (0)];
4251 else if (x == CONST1_RTX (GET_MODE (x)))
4252 str = reg_names [FR_REG (1)];
4253 else
4255 gcc_assert (GET_CODE (x) == REG);
4256 str = reg_names [REGNO (x)];
4258 fputs (str, file);
4259 return;
4261 case 'I':
4262 fputs (reg_names [REGNO (x) + 1], file);
4263 return;
4265 case 'J':
4266 case 'j':
4268 unsigned int regno = REGNO (XEXP (x, 0));
4269 if (GET_CODE (x) == EQ)
4270 regno += 1;
4271 if (code == 'j')
4272 regno ^= 1;
4273 fputs (reg_names [regno], file);
4275 return;
4277 case 'O':
4278 if (MEM_VOLATILE_P (x))
4279 fputs(".acq", file);
4280 return;
4282 case 'P':
4284 HOST_WIDE_INT value;
4286 switch (GET_CODE (XEXP (x, 0)))
4288 default:
4289 return;
4291 case POST_MODIFY:
4292 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4293 if (GET_CODE (x) == CONST_INT)
4294 value = INTVAL (x);
4295 else
4297 gcc_assert (GET_CODE (x) == REG);
4298 fprintf (file, ", %s", reg_names[REGNO (x)]);
4299 return;
4301 break;
4303 case POST_INC:
4304 value = GET_MODE_SIZE (GET_MODE (x));
4305 break;
4307 case POST_DEC:
4308 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4309 break;
4312 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4313 return;
4316 case 'Q':
4317 if (MEM_VOLATILE_P (x))
4318 fputs(".rel", file);
4319 return;
4321 case 'S':
4322 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4323 return;
4325 case 'T':
4326 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4328 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4329 return;
4331 break;
4333 case 'U':
4334 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4336 const char *prefix = "0x";
4337 if (INTVAL (x) & 0x80000000)
4339 fprintf (file, "0xffffffff");
4340 prefix = "";
4342 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4343 return;
4345 break;
4347 case 'r':
4348 /* If this operand is the constant zero, write it as register zero.
4349 Any register, zero, or CONST_INT value is OK here. */
4350 if (GET_CODE (x) == REG)
4351 fputs (reg_names[REGNO (x)], file);
4352 else if (x == CONST0_RTX (GET_MODE (x)))
4353 fputs ("r0", file);
4354 else if (GET_CODE (x) == CONST_INT)
4355 output_addr_const (file, x);
4356 else
4357 output_operand_lossage ("invalid %%r value");
4358 return;
4360 case 'v':
4361 gcc_assert (GET_CODE (x) == CONST_VECTOR);
4362 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
4363 break;
4365 case '+':
4367 const char *which;
4369 /* For conditional branches, returns or calls, substitute
4370 sptk, dptk, dpnt, or spnt for %s. */
4371 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4372 if (x)
4374 int pred_val = INTVAL (XEXP (x, 0));
4376 /* Guess top and bottom 10% statically predicted. */
4377 if (pred_val < REG_BR_PROB_BASE / 50)
4378 which = ".spnt";
4379 else if (pred_val < REG_BR_PROB_BASE / 2)
4380 which = ".dpnt";
4381 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
4382 which = ".dptk";
4383 else
4384 which = ".sptk";
4386 else if (GET_CODE (current_output_insn) == CALL_INSN)
4387 which = ".sptk";
4388 else
4389 which = ".dptk";
4391 fputs (which, file);
4392 return;
4395 case ',':
4396 x = current_insn_predicate;
4397 if (x)
4399 unsigned int regno = REGNO (XEXP (x, 0));
4400 if (GET_CODE (x) == EQ)
4401 regno += 1;
4402 fprintf (file, "(%s) ", reg_names [regno]);
4404 return;
4406 default:
4407 output_operand_lossage ("ia64_print_operand: unknown code");
4408 return;
4411 switch (GET_CODE (x))
4413 /* This happens for the spill/restore instructions. */
4414 case POST_INC:
4415 case POST_DEC:
4416 case POST_MODIFY:
4417 x = XEXP (x, 0);
4418 /* ... fall through ... */
4420 case REG:
4421 fputs (reg_names [REGNO (x)], file);
4422 break;
4424 case MEM:
4426 rtx addr = XEXP (x, 0);
4427 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
4428 addr = XEXP (addr, 0);
4429 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4430 break;
4433 default:
4434 output_addr_const (file, x);
4435 break;
4438 return;
4441 /* Compute a (partial) cost for rtx X. Return true if the complete
4442 cost has been computed, and false if subexpressions should be
4443 scanned. In either case, *TOTAL contains the cost result. */
4444 /* ??? This is incomplete. */
4446 static bool
4447 ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
4449 switch (code)
4451 case CONST_INT:
4452 switch (outer_code)
4454 case SET:
4455 *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4456 return true;
4457 case PLUS:
4458 if (CONST_OK_FOR_I (INTVAL (x)))
4459 *total = 0;
4460 else if (CONST_OK_FOR_J (INTVAL (x)))
4461 *total = 1;
4462 else
4463 *total = COSTS_N_INSNS (1);
4464 return true;
4465 default:
4466 if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4467 *total = 0;
4468 else
4469 *total = COSTS_N_INSNS (1);
4470 return true;
4473 case CONST_DOUBLE:
4474 *total = COSTS_N_INSNS (1);
4475 return true;
4477 case CONST:
4478 case SYMBOL_REF:
4479 case LABEL_REF:
4480 *total = COSTS_N_INSNS (3);
4481 return true;
4483 case MULT:
4484 /* For multiplies wider than HImode, we have to go to the FPU,
4485 which normally involves copies. Plus there's the latency
4486 of the multiply itself, and the latency of the instructions to
4487 transfer integer regs to FP regs. */
4488 /* ??? Check for FP mode. */
4489 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4490 *total = COSTS_N_INSNS (10);
4491 else
4492 *total = COSTS_N_INSNS (2);
4493 return true;
4495 case PLUS:
4496 case MINUS:
4497 case ASHIFT:
4498 case ASHIFTRT:
4499 case LSHIFTRT:
4500 *total = COSTS_N_INSNS (1);
4501 return true;
4503 case DIV:
4504 case UDIV:
4505 case MOD:
4506 case UMOD:
4507 /* We make divide expensive, so that divide-by-constant will be
4508 optimized to a multiply. */
4509 *total = COSTS_N_INSNS (60);
4510 return true;
4512 default:
4513 return false;
4517 /* Calculate the cost of moving data from a register in class FROM to
4518 one in class TO, using MODE. */
4521 ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
4522 enum reg_class to)
4524 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4525 if (to == ADDL_REGS)
4526 to = GR_REGS;
4527 if (from == ADDL_REGS)
4528 from = GR_REGS;
4530 /* All costs are symmetric, so reduce cases by putting the
4531 lower number class as the destination. */
4532 if (from < to)
4534 enum reg_class tmp = to;
4535 to = from, from = tmp;
4538 /* Moving from FR<->GR in XFmode must be more expensive than 2,
4539 so that we get secondary memory reloads. Between FR_REGS,
4540 we have to make this at least as expensive as MEMORY_MOVE_COST
4541 to avoid spectacularly poor register class preferencing. */
4542 if (mode == XFmode)
4544 if (to != GR_REGS || from != GR_REGS)
4545 return MEMORY_MOVE_COST (mode, to, 0);
4546 else
4547 return 3;
4550 switch (to)
4552 case PR_REGS:
4553 /* Moving between PR registers takes two insns. */
4554 if (from == PR_REGS)
4555 return 3;
4556 /* Moving between PR and anything but GR is impossible. */
4557 if (from != GR_REGS)
4558 return MEMORY_MOVE_COST (mode, to, 0);
4559 break;
4561 case BR_REGS:
4562 /* Moving between BR and anything but GR is impossible. */
4563 if (from != GR_REGS && from != GR_AND_BR_REGS)
4564 return MEMORY_MOVE_COST (mode, to, 0);
4565 break;
4567 case AR_I_REGS:
4568 case AR_M_REGS:
4569 /* Moving between AR and anything but GR is impossible. */
4570 if (from != GR_REGS)
4571 return MEMORY_MOVE_COST (mode, to, 0);
4572 break;
4574 case GR_REGS:
4575 case FR_REGS:
4576 case GR_AND_FR_REGS:
4577 case GR_AND_BR_REGS:
4578 case ALL_REGS:
4579 break;
4581 default:
4582 gcc_unreachable ();
4585 return 2;
4588 /* Implement PREFERRED_RELOAD_CLASS. Place additional restrictions on CLASS
4589 to use when copying X into that class. */
4591 enum reg_class
4592 ia64_preferred_reload_class (rtx x, enum reg_class class)
4594 switch (class)
4596 case FR_REGS:
4597 /* Don't allow volatile mem reloads into floating point registers.
4598 This is defined to force reload to choose the r/m case instead
4599 of the f/f case when reloading (set (reg fX) (mem/v)). */
4600 if (MEM_P (x) && MEM_VOLATILE_P (x))
4601 return NO_REGS;
4603 /* Force all unrecognized constants into the constant pool. */
4604 if (CONSTANT_P (x))
4605 return NO_REGS;
4606 break;
4608 case AR_M_REGS:
4609 case AR_I_REGS:
4610 if (!OBJECT_P (x))
4611 return NO_REGS;
4612 break;
4614 default:
4615 break;
4618 return class;
4621 /* This function returns the register class required for a secondary
4622 register when copying between one of the registers in CLASS, and X,
4623 using MODE. A return value of NO_REGS means that no secondary register
4624 is required. */
4626 enum reg_class
4627 ia64_secondary_reload_class (enum reg_class class,
4628 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
4630 int regno = -1;
4632 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4633 regno = true_regnum (x);
4635 switch (class)
4637 case BR_REGS:
4638 case AR_M_REGS:
4639 case AR_I_REGS:
4640 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4641 interaction. We end up with two pseudos with overlapping lifetimes
4642 both of which are equiv to the same constant, and both which need
4643 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
4644 changes depending on the path length, which means the qty_first_reg
4645 check in make_regs_eqv can give different answers at different times.
4646 At some point I'll probably need a reload_indi pattern to handle
4647 this.
4649 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4650 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
4651 non-general registers for good measure. */
4652 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4653 return GR_REGS;
4655 /* This is needed if a pseudo used as a call_operand gets spilled to a
4656 stack slot. */
4657 if (GET_CODE (x) == MEM)
4658 return GR_REGS;
4659 break;
4661 case FR_REGS:
4662 /* Need to go through general registers to get to other class regs. */
4663 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4664 return GR_REGS;
4666 /* This can happen when a paradoxical subreg is an operand to the
4667 muldi3 pattern. */
4668 /* ??? This shouldn't be necessary after instruction scheduling is
4669 enabled, because paradoxical subregs are not accepted by
4670 register_operand when INSN_SCHEDULING is defined. Or alternatively,
4671 stop the paradoxical subreg stupidity in the *_operand functions
4672 in recog.c. */
4673 if (GET_CODE (x) == MEM
4674 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4675 || GET_MODE (x) == QImode))
4676 return GR_REGS;
4678 /* This can happen because of the ior/and/etc patterns that accept FP
4679 registers as operands. If the third operand is a constant, then it
4680 needs to be reloaded into a FP register. */
4681 if (GET_CODE (x) == CONST_INT)
4682 return GR_REGS;
4684 /* This can happen because of register elimination in a muldi3 insn.
4685 E.g. `26107 * (unsigned long)&u'. */
4686 if (GET_CODE (x) == PLUS)
4687 return GR_REGS;
4688 break;
4690 case PR_REGS:
4691 /* ??? This happens if we cse/gcse a BImode value across a call,
4692 and the function has a nonlocal goto. This is because global
4693 does not allocate call crossing pseudos to hard registers when
4694 current_function_has_nonlocal_goto is true. This is relatively
4695 common for C++ programs that use exceptions. To reproduce,
4696 return NO_REGS and compile libstdc++. */
4697 if (GET_CODE (x) == MEM)
4698 return GR_REGS;
4700 /* This can happen when we take a BImode subreg of a DImode value,
4701 and that DImode value winds up in some non-GR register. */
4702 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4703 return GR_REGS;
4704 break;
4706 default:
4707 break;
4710 return NO_REGS;
4714 /* Emit text to declare externally defined variables and functions, because
4715 the Intel assembler does not support undefined externals. */
4717 void
4718 ia64_asm_output_external (FILE *file, tree decl, const char *name)
4720 int save_referenced;
4722 /* GNU as does not need anything here, but the HP linker does need
4723 something for external functions. */
4725 if (TARGET_GNU_AS
4726 && (!TARGET_HPUX_LD
4727 || TREE_CODE (decl) != FUNCTION_DECL
4728 || strstr (name, "__builtin_") == name))
4729 return;
4731 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4732 the linker when we do this, so we need to be careful not to do this for
4733 builtin functions which have no library equivalent. Unfortunately, we
4734 can't tell here whether or not a function will actually be called by
4735 expand_expr, so we pull in library functions even if we may not need
4736 them later. */
4737 if (! strcmp (name, "__builtin_next_arg")
4738 || ! strcmp (name, "alloca")
4739 || ! strcmp (name, "__builtin_constant_p")
4740 || ! strcmp (name, "__builtin_args_info"))
4741 return;
4743 if (TARGET_HPUX_LD)
4744 ia64_hpux_add_extern_decl (decl);
4745 else
4747 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4748 restore it. */
4749 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4750 if (TREE_CODE (decl) == FUNCTION_DECL)
4751 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4752 (*targetm.asm_out.globalize_label) (file, name);
4753 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4757 /* Parse the -mfixed-range= option string. */
4759 static void
4760 fix_range (const char *const_str)
4762 int i, first, last;
4763 char *str, *dash, *comma;
4765 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4766 REG2 are either register names or register numbers. The effect
4767 of this option is to mark the registers in the range from REG1 to
4768 REG2 as ``fixed'' so they won't be used by the compiler. This is
4769 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4771 i = strlen (const_str);
4772 str = (char *) alloca (i + 1);
4773 memcpy (str, const_str, i + 1);
4775 while (1)
4777 dash = strchr (str, '-');
4778 if (!dash)
4780 warning (0, "value of -mfixed-range must have form REG1-REG2");
4781 return;
4783 *dash = '\0';
4785 comma = strchr (dash + 1, ',');
4786 if (comma)
4787 *comma = '\0';
4789 first = decode_reg_name (str);
4790 if (first < 0)
4792 warning (0, "unknown register name: %s", str);
4793 return;
4796 last = decode_reg_name (dash + 1);
4797 if (last < 0)
4799 warning (0, "unknown register name: %s", dash + 1);
4800 return;
4803 *dash = '-';
4805 if (first > last)
4807 warning (0, "%s-%s is an empty range", str, dash + 1);
4808 return;
4811 for (i = first; i <= last; ++i)
4812 fixed_regs[i] = call_used_regs[i] = 1;
4814 if (!comma)
4815 break;
4817 *comma = ',';
4818 str = comma + 1;
4822 /* Implement TARGET_HANDLE_OPTION. */
4824 static bool
4825 ia64_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
4827 switch (code)
4829 case OPT_mfixed_range_:
4830 fix_range (arg);
4831 return true;
4833 case OPT_mtls_size_:
4835 char *end;
4836 unsigned long tmp = strtoul (arg, &end, 10);
4837 if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4838 error ("bad value %<%s%> for -mtls-size= switch", arg);
4839 else
4840 ia64_tls_size = tmp;
4841 return true;
4844 case OPT_mtune_:
4846 static struct pta
4848 const char *name; /* processor name or nickname. */
4849 enum processor_type processor;
4851 const processor_alias_table[] =
4853 {"itanium", PROCESSOR_ITANIUM},
4854 {"itanium1", PROCESSOR_ITANIUM},
4855 {"merced", PROCESSOR_ITANIUM},
4856 {"itanium2", PROCESSOR_ITANIUM2},
4857 {"mckinley", PROCESSOR_ITANIUM2},
4859 int const pta_size = ARRAY_SIZE (processor_alias_table);
4860 int i;
4862 for (i = 0; i < pta_size; i++)
4863 if (!strcmp (arg, processor_alias_table[i].name))
4865 ia64_tune = processor_alias_table[i].processor;
4866 break;
4868 if (i == pta_size)
4869 error ("bad value %<%s%> for -mtune= switch", arg);
4870 return true;
4873 default:
4874 return true;
4878 /* Handle TARGET_OPTIONS switches. */
4880 void
4881 ia64_override_options (void)
4883 if (TARGET_AUTO_PIC)
4884 target_flags |= MASK_CONST_GP;
4886 if (TARGET_INLINE_SQRT == INL_MIN_LAT)
4888 warning (0, "not yet implemented: latency-optimized inline square root");
4889 TARGET_INLINE_SQRT = INL_MAX_THR;
4892 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4893 flag_schedule_insns_after_reload = 0;
4895 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4897 init_machine_status = ia64_init_machine_status;
4900 static struct machine_function *
4901 ia64_init_machine_status (void)
4903 return ggc_alloc_cleared (sizeof (struct machine_function));
4906 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
4907 static enum attr_type ia64_safe_type (rtx);
4909 static enum attr_itanium_class
4910 ia64_safe_itanium_class (rtx insn)
4912 if (recog_memoized (insn) >= 0)
4913 return get_attr_itanium_class (insn);
4914 else
4915 return ITANIUM_CLASS_UNKNOWN;
4918 static enum attr_type
4919 ia64_safe_type (rtx insn)
4921 if (recog_memoized (insn) >= 0)
4922 return get_attr_type (insn);
4923 else
4924 return TYPE_UNKNOWN;
4927 /* The following collection of routines emit instruction group stop bits as
4928 necessary to avoid dependencies. */
4930 /* Need to track some additional registers as far as serialization is
4931 concerned so we can properly handle br.call and br.ret. We could
4932 make these registers visible to gcc, but since these registers are
4933 never explicitly used in gcc generated code, it seems wasteful to
4934 do so (plus it would make the call and return patterns needlessly
4935 complex). */
4936 #define REG_RP (BR_REG (0))
4937 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4938 /* This is used for volatile asms which may require a stop bit immediately
4939 before and after them. */
4940 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4941 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4942 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4944 /* For each register, we keep track of how it has been written in the
4945 current instruction group.
4947 If a register is written unconditionally (no qualifying predicate),
4948 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4950 If a register is written if its qualifying predicate P is true, we
4951 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4952 may be written again by the complement of P (P^1) and when this happens,
4953 WRITE_COUNT gets set to 2.
4955 The result of this is that whenever an insn attempts to write a register
4956 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4958 If a predicate register is written by a floating-point insn, we set
4959 WRITTEN_BY_FP to true.
4961 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4962 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4964 struct reg_write_state
4966 unsigned int write_count : 2;
4967 unsigned int first_pred : 16;
4968 unsigned int written_by_fp : 1;
4969 unsigned int written_by_and : 1;
4970 unsigned int written_by_or : 1;
4973 /* Cumulative info for the current instruction group. */
4974 struct reg_write_state rws_sum[NUM_REGS];
4975 /* Info for the current instruction. This gets copied to rws_sum after a
4976 stop bit is emitted. */
4977 struct reg_write_state rws_insn[NUM_REGS];
4979 /* Indicates whether this is the first instruction after a stop bit,
4980 in which case we don't need another stop bit. Without this,
4981 ia64_variable_issue will die when scheduling an alloc. */
4982 static int first_instruction;
4984 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4985 RTL for one instruction. */
4986 struct reg_flags
4988 unsigned int is_write : 1; /* Is register being written? */
4989 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4990 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4991 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4992 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4993 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4996 static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
4997 static int rws_access_regno (int, struct reg_flags, int);
4998 static int rws_access_reg (rtx, struct reg_flags, int);
4999 static void update_set_flags (rtx, struct reg_flags *);
5000 static int set_src_needs_barrier (rtx, struct reg_flags, int);
5001 static int rtx_needs_barrier (rtx, struct reg_flags, int);
5002 static void init_insn_group_barriers (void);
5003 static int group_barrier_needed (rtx);
5004 static int safe_group_barrier_needed (rtx);
5006 /* Update *RWS for REGNO, which is being written by the current instruction,
5007 with predicate PRED, and associated register flags in FLAGS. */
5009 static void
5010 rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
5012 if (pred)
5013 rws[regno].write_count++;
5014 else
5015 rws[regno].write_count = 2;
5016 rws[regno].written_by_fp |= flags.is_fp;
5017 /* ??? Not tracking and/or across differing predicates. */
5018 rws[regno].written_by_and = flags.is_and;
5019 rws[regno].written_by_or = flags.is_or;
5020 rws[regno].first_pred = pred;
5023 /* Handle an access to register REGNO of type FLAGS using predicate register
5024 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
5025 a dependency with an earlier instruction in the same group. */
5027 static int
5028 rws_access_regno (int regno, struct reg_flags flags, int pred)
5030 int need_barrier = 0;
5032 gcc_assert (regno < NUM_REGS);
5034 if (! PR_REGNO_P (regno))
5035 flags.is_and = flags.is_or = 0;
5037 if (flags.is_write)
5039 int write_count;
5041 /* One insn writes same reg multiple times? */
5042 gcc_assert (!rws_insn[regno].write_count);
5044 /* Update info for current instruction. */
5045 rws_update (rws_insn, regno, flags, pred);
5046 write_count = rws_sum[regno].write_count;
5048 switch (write_count)
5050 case 0:
5051 /* The register has not been written yet. */
5052 rws_update (rws_sum, regno, flags, pred);
5053 break;
5055 case 1:
5056 /* The register has been written via a predicate. If this is
5057 not a complementary predicate, then we need a barrier. */
5058 /* ??? This assumes that P and P+1 are always complementary
5059 predicates for P even. */
5060 if (flags.is_and && rws_sum[regno].written_by_and)
5062 else if (flags.is_or && rws_sum[regno].written_by_or)
5064 else if ((rws_sum[regno].first_pred ^ 1) != pred)
5065 need_barrier = 1;
5066 rws_update (rws_sum, regno, flags, pred);
5067 break;
5069 case 2:
5070 /* The register has been unconditionally written already. We
5071 need a barrier. */
5072 if (flags.is_and && rws_sum[regno].written_by_and)
5074 else if (flags.is_or && rws_sum[regno].written_by_or)
5076 else
5077 need_barrier = 1;
5078 rws_sum[regno].written_by_and = flags.is_and;
5079 rws_sum[regno].written_by_or = flags.is_or;
5080 break;
5082 default:
5083 gcc_unreachable ();
5086 else
5088 if (flags.is_branch)
5090 /* Branches have several RAW exceptions that allow to avoid
5091 barriers. */
5093 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
5094 /* RAW dependencies on branch regs are permissible as long
5095 as the writer is a non-branch instruction. Since we
5096 never generate code that uses a branch register written
5097 by a branch instruction, handling this case is
5098 easy. */
5099 return 0;
5101 if (REGNO_REG_CLASS (regno) == PR_REGS
5102 && ! rws_sum[regno].written_by_fp)
5103 /* The predicates of a branch are available within the
5104 same insn group as long as the predicate was written by
5105 something other than a floating-point instruction. */
5106 return 0;
5109 if (flags.is_and && rws_sum[regno].written_by_and)
5110 return 0;
5111 if (flags.is_or && rws_sum[regno].written_by_or)
5112 return 0;
5114 switch (rws_sum[regno].write_count)
5116 case 0:
5117 /* The register has not been written yet. */
5118 break;
5120 case 1:
5121 /* The register has been written via a predicate. If this is
5122 not a complementary predicate, then we need a barrier. */
5123 /* ??? This assumes that P and P+1 are always complementary
5124 predicates for P even. */
5125 if ((rws_sum[regno].first_pred ^ 1) != pred)
5126 need_barrier = 1;
5127 break;
5129 case 2:
5130 /* The register has been unconditionally written already. We
5131 need a barrier. */
5132 need_barrier = 1;
5133 break;
5135 default:
5136 gcc_unreachable ();
5140 return need_barrier;
5143 static int
5144 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
5146 int regno = REGNO (reg);
5147 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5149 if (n == 1)
5150 return rws_access_regno (regno, flags, pred);
5151 else
5153 int need_barrier = 0;
5154 while (--n >= 0)
5155 need_barrier |= rws_access_regno (regno + n, flags, pred);
5156 return need_barrier;
5160 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
5161 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
5163 static void
5164 update_set_flags (rtx x, struct reg_flags *pflags)
5166 rtx src = SET_SRC (x);
5168 switch (GET_CODE (src))
5170 case CALL:
5171 return;
5173 case IF_THEN_ELSE:
5174 if (SET_DEST (x) == pc_rtx)
5175 /* X is a conditional branch. */
5176 return;
5177 else
5179 /* X is a conditional move. */
5180 rtx cond = XEXP (src, 0);
5181 cond = XEXP (cond, 0);
5183 /* We always split conditional moves into COND_EXEC patterns, so the
5184 only pattern that can reach here is doloop_end_internal. We don't
5185 need to do anything special for this pattern. */
5186 gcc_assert (GET_CODE (cond) == REG && REGNO (cond) == AR_LC_REGNUM);
5187 return;
5190 default:
5191 if (COMPARISON_P (src)
5192 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
5193 /* Set pflags->is_fp to 1 so that we know we're dealing
5194 with a floating point comparison when processing the
5195 destination of the SET. */
5196 pflags->is_fp = 1;
5198 /* Discover if this is a parallel comparison. We only handle
5199 and.orcm and or.andcm at present, since we must retain a
5200 strict inverse on the predicate pair. */
5201 else if (GET_CODE (src) == AND)
5202 pflags->is_and = 1;
5203 else if (GET_CODE (src) == IOR)
5204 pflags->is_or = 1;
5206 break;
5210 /* Subroutine of rtx_needs_barrier; this function determines whether the
5211 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5212 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5213 for this insn. */
5215 static int
5216 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
5218 int need_barrier = 0;
5219 rtx dst;
5220 rtx src = SET_SRC (x);
5222 if (GET_CODE (src) == CALL)
5223 /* We don't need to worry about the result registers that
5224 get written by subroutine call. */
5225 return rtx_needs_barrier (src, flags, pred);
5226 else if (SET_DEST (x) == pc_rtx)
5228 /* X is a conditional branch. */
5229 /* ??? This seems redundant, as the caller sets this bit for
5230 all JUMP_INSNs. */
5231 flags.is_branch = 1;
5232 return rtx_needs_barrier (src, flags, pred);
5235 need_barrier = rtx_needs_barrier (src, flags, pred);
5237 dst = SET_DEST (x);
5238 if (GET_CODE (dst) == ZERO_EXTRACT)
5240 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5241 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5242 dst = XEXP (dst, 0);
5244 return need_barrier;
5247 /* Handle an access to rtx X of type FLAGS using predicate register
5248 PRED. Return 1 if this access creates a dependency with an earlier
5249 instruction in the same group. */
5251 static int
5252 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5254 int i, j;
5255 int is_complemented = 0;
5256 int need_barrier = 0;
5257 const char *format_ptr;
5258 struct reg_flags new_flags;
5259 rtx cond;
5261 if (! x)
5262 return 0;
5264 new_flags = flags;
5266 switch (GET_CODE (x))
5268 case SET:
5269 update_set_flags (x, &new_flags);
5270 need_barrier = set_src_needs_barrier (x, new_flags, pred);
5271 if (GET_CODE (SET_SRC (x)) != CALL)
5273 new_flags.is_write = 1;
5274 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5276 break;
5278 case CALL:
5279 new_flags.is_write = 0;
5280 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5282 /* Avoid multiple register writes, in case this is a pattern with
5283 multiple CALL rtx. This avoids a failure in rws_access_reg. */
5284 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
5286 new_flags.is_write = 1;
5287 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5288 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5289 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5291 break;
5293 case COND_EXEC:
5294 /* X is a predicated instruction. */
5296 cond = COND_EXEC_TEST (x);
5297 gcc_assert (!pred);
5298 need_barrier = rtx_needs_barrier (cond, flags, 0);
5300 if (GET_CODE (cond) == EQ)
5301 is_complemented = 1;
5302 cond = XEXP (cond, 0);
5303 gcc_assert (GET_CODE (cond) == REG
5304 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
5305 pred = REGNO (cond);
5306 if (is_complemented)
5307 ++pred;
5309 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5310 return need_barrier;
5312 case CLOBBER:
5313 case USE:
5314 /* Clobber & use are for earlier compiler-phases only. */
5315 break;
5317 case ASM_OPERANDS:
5318 case ASM_INPUT:
5319 /* We always emit stop bits for traditional asms. We emit stop bits
5320 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
5321 if (GET_CODE (x) != ASM_OPERANDS
5322 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5324 /* Avoid writing the register multiple times if we have multiple
5325 asm outputs. This avoids a failure in rws_access_reg. */
5326 if (! rws_insn[REG_VOLATILE].write_count)
5328 new_flags.is_write = 1;
5329 rws_access_regno (REG_VOLATILE, new_flags, pred);
5331 return 1;
5334 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5335 We cannot just fall through here since then we would be confused
5336 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5337 traditional asms unlike their normal usage. */
5339 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5340 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5341 need_barrier = 1;
5342 break;
5344 case PARALLEL:
5345 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5347 rtx pat = XVECEXP (x, 0, i);
5348 switch (GET_CODE (pat))
5350 case SET:
5351 update_set_flags (pat, &new_flags);
5352 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
5353 break;
5355 case USE:
5356 case CALL:
5357 case ASM_OPERANDS:
5358 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5359 break;
5361 case CLOBBER:
5362 case RETURN:
5363 break;
5365 default:
5366 gcc_unreachable ();
5369 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5371 rtx pat = XVECEXP (x, 0, i);
5372 if (GET_CODE (pat) == SET)
5374 if (GET_CODE (SET_SRC (pat)) != CALL)
5376 new_flags.is_write = 1;
5377 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5378 pred);
5381 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5382 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5384 break;
5386 case SUBREG:
5387 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
5388 break;
5389 case REG:
5390 if (REGNO (x) == AR_UNAT_REGNUM)
5392 for (i = 0; i < 64; ++i)
5393 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5395 else
5396 need_barrier = rws_access_reg (x, flags, pred);
5397 break;
5399 case MEM:
5400 /* Find the regs used in memory address computation. */
5401 new_flags.is_write = 0;
5402 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5403 break;
5405 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
5406 case SYMBOL_REF: case LABEL_REF: case CONST:
5407 break;
5409 /* Operators with side-effects. */
5410 case POST_INC: case POST_DEC:
5411 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5413 new_flags.is_write = 0;
5414 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5415 new_flags.is_write = 1;
5416 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5417 break;
5419 case POST_MODIFY:
5420 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5422 new_flags.is_write = 0;
5423 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5424 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5425 new_flags.is_write = 1;
5426 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5427 break;
5429 /* Handle common unary and binary ops for efficiency. */
5430 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
5431 case MOD: case UDIV: case UMOD: case AND: case IOR:
5432 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
5433 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
5434 case NE: case EQ: case GE: case GT: case LE:
5435 case LT: case GEU: case GTU: case LEU: case LTU:
5436 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5437 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5438 break;
5440 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
5441 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
5442 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
5443 case SQRT: case FFS: case POPCOUNT:
5444 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5445 break;
5447 case VEC_SELECT:
5448 /* VEC_SELECT's second argument is a PARALLEL with integers that
5449 describe the elements selected. On ia64, those integers are
5450 always constants. Avoid walking the PARALLEL so that we don't
5451 get confused with "normal" parallels and then die. */
5452 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5453 break;
5455 case UNSPEC:
5456 switch (XINT (x, 1))
5458 case UNSPEC_LTOFF_DTPMOD:
5459 case UNSPEC_LTOFF_DTPREL:
5460 case UNSPEC_DTPREL:
5461 case UNSPEC_LTOFF_TPREL:
5462 case UNSPEC_TPREL:
5463 case UNSPEC_PRED_REL_MUTEX:
5464 case UNSPEC_PIC_CALL:
5465 case UNSPEC_MF:
5466 case UNSPEC_FETCHADD_ACQ:
5467 case UNSPEC_BSP_VALUE:
5468 case UNSPEC_FLUSHRS:
5469 case UNSPEC_BUNDLE_SELECTOR:
5470 break;
5472 case UNSPEC_GR_SPILL:
5473 case UNSPEC_GR_RESTORE:
5475 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5476 HOST_WIDE_INT bit = (offset >> 3) & 63;
5478 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5479 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
5480 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5481 new_flags, pred);
5482 break;
5485 case UNSPEC_FR_SPILL:
5486 case UNSPEC_FR_RESTORE:
5487 case UNSPEC_GETF_EXP:
5488 case UNSPEC_SETF_EXP:
5489 case UNSPEC_ADDP4:
5490 case UNSPEC_FR_SQRT_RECIP_APPROX:
5491 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5492 break;
5494 case UNSPEC_FR_RECIP_APPROX:
5495 case UNSPEC_SHRP:
5496 case UNSPEC_COPYSIGN:
5497 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5498 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5499 break;
5501 case UNSPEC_CMPXCHG_ACQ:
5502 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5503 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5504 break;
5506 default:
5507 gcc_unreachable ();
5509 break;
5511 case UNSPEC_VOLATILE:
5512 switch (XINT (x, 1))
5514 case UNSPECV_ALLOC:
5515 /* Alloc must always be the first instruction of a group.
5516 We force this by always returning true. */
5517 /* ??? We might get better scheduling if we explicitly check for
5518 input/local/output register dependencies, and modify the
5519 scheduler so that alloc is always reordered to the start of
5520 the current group. We could then eliminate all of the
5521 first_instruction code. */
5522 rws_access_regno (AR_PFS_REGNUM, flags, pred);
5524 new_flags.is_write = 1;
5525 rws_access_regno (REG_AR_CFM, new_flags, pred);
5526 return 1;
5528 case UNSPECV_SET_BSP:
5529 need_barrier = 1;
5530 break;
5532 case UNSPECV_BLOCKAGE:
5533 case UNSPECV_INSN_GROUP_BARRIER:
5534 case UNSPECV_BREAK:
5535 case UNSPECV_PSAC_ALL:
5536 case UNSPECV_PSAC_NORMAL:
5537 return 0;
5539 default:
5540 gcc_unreachable ();
5542 break;
5544 case RETURN:
5545 new_flags.is_write = 0;
5546 need_barrier = rws_access_regno (REG_RP, flags, pred);
5547 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5549 new_flags.is_write = 1;
5550 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5551 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5552 break;
5554 default:
5555 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5556 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5557 switch (format_ptr[i])
5559 case '0': /* unused field */
5560 case 'i': /* integer */
5561 case 'n': /* note */
5562 case 'w': /* wide integer */
5563 case 's': /* pointer to string */
5564 case 'S': /* optional pointer to string */
5565 break;
5567 case 'e':
5568 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5569 need_barrier = 1;
5570 break;
5572 case 'E':
5573 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5574 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5575 need_barrier = 1;
5576 break;
5578 default:
5579 gcc_unreachable ();
5581 break;
5583 return need_barrier;
5586 /* Clear out the state for group_barrier_needed at the start of a
5587 sequence of insns. */
5589 static void
5590 init_insn_group_barriers (void)
5592 memset (rws_sum, 0, sizeof (rws_sum));
5593 first_instruction = 1;
5596 /* Given the current state, determine whether a group barrier (a stop bit) is
5597 necessary before INSN. Return nonzero if so. This modifies the state to
5598 include the effects of INSN as a side-effect. */
5600 static int
5601 group_barrier_needed (rtx insn)
5603 rtx pat;
5604 int need_barrier = 0;
5605 struct reg_flags flags;
5607 memset (&flags, 0, sizeof (flags));
5608 switch (GET_CODE (insn))
5610 case NOTE:
5611 break;
5613 case BARRIER:
5614 /* A barrier doesn't imply an instruction group boundary. */
5615 break;
5617 case CODE_LABEL:
5618 memset (rws_insn, 0, sizeof (rws_insn));
5619 return 1;
5621 case CALL_INSN:
5622 flags.is_branch = 1;
5623 flags.is_sibcall = SIBLING_CALL_P (insn);
5624 memset (rws_insn, 0, sizeof (rws_insn));
5626 /* Don't bundle a call following another call. */
5627 if ((pat = prev_active_insn (insn))
5628 && GET_CODE (pat) == CALL_INSN)
5630 need_barrier = 1;
5631 break;
5634 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5635 break;
5637 case JUMP_INSN:
5638 flags.is_branch = 1;
5640 /* Don't bundle a jump following a call. */
5641 if ((pat = prev_active_insn (insn))
5642 && GET_CODE (pat) == CALL_INSN)
5644 need_barrier = 1;
5645 break;
5647 /* FALLTHRU */
5649 case INSN:
5650 if (GET_CODE (PATTERN (insn)) == USE
5651 || GET_CODE (PATTERN (insn)) == CLOBBER)
5652 /* Don't care about USE and CLOBBER "insns"---those are used to
5653 indicate to the optimizer that it shouldn't get rid of
5654 certain operations. */
5655 break;
5657 pat = PATTERN (insn);
5659 /* Ug. Hack hacks hacked elsewhere. */
5660 switch (recog_memoized (insn))
5662 /* We play dependency tricks with the epilogue in order
5663 to get proper schedules. Undo this for dv analysis. */
5664 case CODE_FOR_epilogue_deallocate_stack:
5665 case CODE_FOR_prologue_allocate_stack:
5666 pat = XVECEXP (pat, 0, 0);
5667 break;
5669 /* The pattern we use for br.cloop confuses the code above.
5670 The second element of the vector is representative. */
5671 case CODE_FOR_doloop_end_internal:
5672 pat = XVECEXP (pat, 0, 1);
5673 break;
5675 /* Doesn't generate code. */
5676 case CODE_FOR_pred_rel_mutex:
5677 case CODE_FOR_prologue_use:
5678 return 0;
5680 default:
5681 break;
5684 memset (rws_insn, 0, sizeof (rws_insn));
5685 need_barrier = rtx_needs_barrier (pat, flags, 0);
5687 /* Check to see if the previous instruction was a volatile
5688 asm. */
5689 if (! need_barrier)
5690 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5691 break;
5693 default:
5694 gcc_unreachable ();
5697 if (first_instruction && INSN_P (insn)
5698 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5699 && GET_CODE (PATTERN (insn)) != USE
5700 && GET_CODE (PATTERN (insn)) != CLOBBER)
5702 need_barrier = 0;
5703 first_instruction = 0;
5706 return need_barrier;
5709 /* Like group_barrier_needed, but do not clobber the current state. */
5711 static int
5712 safe_group_barrier_needed (rtx insn)
5714 struct reg_write_state rws_saved[NUM_REGS];
5715 int saved_first_instruction;
5716 int t;
5718 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5719 saved_first_instruction = first_instruction;
5721 t = group_barrier_needed (insn);
5723 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5724 first_instruction = saved_first_instruction;
5726 return t;
5729 /* Scan the current function and insert stop bits as necessary to
5730 eliminate dependencies. This function assumes that a final
5731 instruction scheduling pass has been run which has already
5732 inserted most of the necessary stop bits. This function only
5733 inserts new ones at basic block boundaries, since these are
5734 invisible to the scheduler. */
5736 static void
5737 emit_insn_group_barriers (FILE *dump)
5739 rtx insn;
5740 rtx last_label = 0;
5741 int insns_since_last_label = 0;
5743 init_insn_group_barriers ();
5745 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5747 if (GET_CODE (insn) == CODE_LABEL)
5749 if (insns_since_last_label)
5750 last_label = insn;
5751 insns_since_last_label = 0;
5753 else if (GET_CODE (insn) == NOTE
5754 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5756 if (insns_since_last_label)
5757 last_label = insn;
5758 insns_since_last_label = 0;
5760 else if (GET_CODE (insn) == INSN
5761 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5762 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5764 init_insn_group_barriers ();
5765 last_label = 0;
5767 else if (INSN_P (insn))
5769 insns_since_last_label = 1;
5771 if (group_barrier_needed (insn))
5773 if (last_label)
5775 if (dump)
5776 fprintf (dump, "Emitting stop before label %d\n",
5777 INSN_UID (last_label));
5778 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5779 insn = last_label;
5781 init_insn_group_barriers ();
5782 last_label = 0;
5789 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5790 This function has to emit all necessary group barriers. */
5792 static void
5793 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
5795 rtx insn;
5797 init_insn_group_barriers ();
5799 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5801 if (GET_CODE (insn) == BARRIER)
5803 rtx last = prev_active_insn (insn);
5805 if (! last)
5806 continue;
5807 if (GET_CODE (last) == JUMP_INSN
5808 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5809 last = prev_active_insn (last);
5810 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5811 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5813 init_insn_group_barriers ();
5815 else if (INSN_P (insn))
5817 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5818 init_insn_group_barriers ();
5819 else if (group_barrier_needed (insn))
5821 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5822 init_insn_group_barriers ();
5823 group_barrier_needed (insn);
5831 /* Instruction scheduling support. */
5833 #define NR_BUNDLES 10
5835 /* A list of names of all available bundles. */
5837 static const char *bundle_name [NR_BUNDLES] =
5839 ".mii",
5840 ".mmi",
5841 ".mfi",
5842 ".mmf",
5843 #if NR_BUNDLES == 10
5844 ".bbb",
5845 ".mbb",
5846 #endif
5847 ".mib",
5848 ".mmb",
5849 ".mfb",
5850 ".mlx"
5853 /* Nonzero if we should insert stop bits into the schedule. */
5855 int ia64_final_schedule = 0;
5857 /* Codes of the corresponding queried units: */
5859 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
5860 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
5862 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
5863 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
5865 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
5867 /* The following variable value is an insn group barrier. */
5869 static rtx dfa_stop_insn;
5871 /* The following variable value is the last issued insn. */
5873 static rtx last_scheduled_insn;
5875 /* The following variable value is size of the DFA state. */
5877 static size_t dfa_state_size;
5879 /* The following variable value is pointer to a DFA state used as
5880 temporary variable. */
5882 static state_t temp_dfa_state = NULL;
5884 /* The following variable value is DFA state after issuing the last
5885 insn. */
5887 static state_t prev_cycle_state = NULL;
5889 /* The following array element values are TRUE if the corresponding
5890 insn requires to add stop bits before it. */
5892 static char *stops_p;
5894 /* The following variable is used to set up the mentioned above array. */
5896 static int stop_before_p = 0;
5898 /* The following variable value is length of the arrays `clocks' and
5899 `add_cycles'. */
5901 static int clocks_length;
5903 /* The following array element values are cycles on which the
5904 corresponding insn will be issued. The array is used only for
5905 Itanium1. */
5907 static int *clocks;
5909 /* The following array element values are numbers of cycles should be
5910 added to improve insn scheduling for MM_insns for Itanium1. */
5912 static int *add_cycles;
5914 static rtx ia64_single_set (rtx);
5915 static void ia64_emit_insn_before (rtx, rtx);
5917 /* Map a bundle number to its pseudo-op. */
5919 const char *
5920 get_bundle_name (int b)
5922 return bundle_name[b];
5926 /* Return the maximum number of instructions a cpu can issue. */
5928 static int
5929 ia64_issue_rate (void)
5931 return 6;
5934 /* Helper function - like single_set, but look inside COND_EXEC. */
5936 static rtx
5937 ia64_single_set (rtx insn)
5939 rtx x = PATTERN (insn), ret;
5940 if (GET_CODE (x) == COND_EXEC)
5941 x = COND_EXEC_CODE (x);
5942 if (GET_CODE (x) == SET)
5943 return x;
5945 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5946 Although they are not classical single set, the second set is there just
5947 to protect it from moving past FP-relative stack accesses. */
5948 switch (recog_memoized (insn))
5950 case CODE_FOR_prologue_allocate_stack:
5951 case CODE_FOR_epilogue_deallocate_stack:
5952 ret = XVECEXP (x, 0, 0);
5953 break;
5955 default:
5956 ret = single_set_2 (insn, x);
5957 break;
5960 return ret;
5963 /* Adjust the cost of a scheduling dependency. Return the new cost of
5964 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5966 static int
5967 ia64_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
5969 enum attr_itanium_class dep_class;
5970 enum attr_itanium_class insn_class;
5972 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
5973 return cost;
5975 insn_class = ia64_safe_itanium_class (insn);
5976 dep_class = ia64_safe_itanium_class (dep_insn);
5977 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
5978 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
5979 return 0;
5981 return cost;
5984 /* Like emit_insn_before, but skip cycle_display notes.
5985 ??? When cycle display notes are implemented, update this. */
5987 static void
5988 ia64_emit_insn_before (rtx insn, rtx before)
5990 emit_insn_before (insn, before);
5993 /* The following function marks insns who produce addresses for load
5994 and store insns. Such insns will be placed into M slots because it
5995 decrease latency time for Itanium1 (see function
5996 `ia64_produce_address_p' and the DFA descriptions). */
5998 static void
5999 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6001 rtx insn, link, next, next_tail;
6003 /* Before reload, which_alternative is not set, which means that
6004 ia64_safe_itanium_class will produce wrong results for (at least)
6005 move instructions. */
6006 if (!reload_completed)
6007 return;
6009 next_tail = NEXT_INSN (tail);
6010 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6011 if (INSN_P (insn))
6012 insn->call = 0;
6013 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6014 if (INSN_P (insn)
6015 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6017 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
6019 if (REG_NOTE_KIND (link) != REG_DEP_TRUE)
6020 continue;
6021 next = XEXP (link, 0);
6022 if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
6023 || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
6024 && ia64_st_address_bypass_p (insn, next))
6025 break;
6026 else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
6027 || ia64_safe_itanium_class (next)
6028 == ITANIUM_CLASS_FLD)
6029 && ia64_ld_address_bypass_p (insn, next))
6030 break;
6032 insn->call = link != 0;
6036 /* We're beginning a new block. Initialize data structures as necessary. */
6038 static void
6039 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6040 int sched_verbose ATTRIBUTE_UNUSED,
6041 int max_ready ATTRIBUTE_UNUSED)
6043 #ifdef ENABLE_CHECKING
6044 rtx insn;
6046 if (reload_completed)
6047 for (insn = NEXT_INSN (current_sched_info->prev_head);
6048 insn != current_sched_info->next_tail;
6049 insn = NEXT_INSN (insn))
6050 gcc_assert (!SCHED_GROUP_P (insn));
6051 #endif
6052 last_scheduled_insn = NULL_RTX;
6053 init_insn_group_barriers ();
6056 /* We are about to being issuing insns for this clock cycle.
6057 Override the default sort algorithm to better slot instructions. */
6059 static int
6060 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6061 int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
6062 int reorder_type)
6064 int n_asms;
6065 int n_ready = *pn_ready;
6066 rtx *e_ready = ready + n_ready;
6067 rtx *insnp;
6069 if (sched_verbose)
6070 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6072 if (reorder_type == 0)
6074 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6075 n_asms = 0;
6076 for (insnp = ready; insnp < e_ready; insnp++)
6077 if (insnp < e_ready)
6079 rtx insn = *insnp;
6080 enum attr_type t = ia64_safe_type (insn);
6081 if (t == TYPE_UNKNOWN)
6083 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6084 || asm_noperands (PATTERN (insn)) >= 0)
6086 rtx lowest = ready[n_asms];
6087 ready[n_asms] = insn;
6088 *insnp = lowest;
6089 n_asms++;
6091 else
6093 rtx highest = ready[n_ready - 1];
6094 ready[n_ready - 1] = insn;
6095 *insnp = highest;
6096 return 1;
6101 if (n_asms < n_ready)
6103 /* Some normal insns to process. Skip the asms. */
6104 ready += n_asms;
6105 n_ready -= n_asms;
6107 else if (n_ready > 0)
6108 return 1;
6111 if (ia64_final_schedule)
6113 int deleted = 0;
6114 int nr_need_stop = 0;
6116 for (insnp = ready; insnp < e_ready; insnp++)
6117 if (safe_group_barrier_needed (*insnp))
6118 nr_need_stop++;
6120 if (reorder_type == 1 && n_ready == nr_need_stop)
6121 return 0;
6122 if (reorder_type == 0)
6123 return 1;
6124 insnp = e_ready;
6125 /* Move down everything that needs a stop bit, preserving
6126 relative order. */
6127 while (insnp-- > ready + deleted)
6128 while (insnp >= ready + deleted)
6130 rtx insn = *insnp;
6131 if (! safe_group_barrier_needed (insn))
6132 break;
6133 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6134 *ready = insn;
6135 deleted++;
6137 n_ready -= deleted;
6138 ready += deleted;
6141 return 1;
6144 /* We are about to being issuing insns for this clock cycle. Override
6145 the default sort algorithm to better slot instructions. */
6147 static int
6148 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6149 int clock_var)
6151 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6152 pn_ready, clock_var, 0);
6155 /* Like ia64_sched_reorder, but called after issuing each insn.
6156 Override the default sort algorithm to better slot instructions. */
6158 static int
6159 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6160 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6161 int *pn_ready, int clock_var)
6163 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6164 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6165 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6166 clock_var, 1);
6169 /* We are about to issue INSN. Return the number of insns left on the
6170 ready queue that can be issued this cycle. */
6172 static int
6173 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6174 int sched_verbose ATTRIBUTE_UNUSED,
6175 rtx insn ATTRIBUTE_UNUSED,
6176 int can_issue_more ATTRIBUTE_UNUSED)
6178 last_scheduled_insn = insn;
6179 memcpy (prev_cycle_state, curr_state, dfa_state_size);
6180 if (reload_completed)
6182 int needed = group_barrier_needed (insn);
6184 gcc_assert (!needed);
6185 if (GET_CODE (insn) == CALL_INSN)
6186 init_insn_group_barriers ();
6187 stops_p [INSN_UID (insn)] = stop_before_p;
6188 stop_before_p = 0;
6190 return 1;
6193 /* We are choosing insn from the ready queue. Return nonzero if INSN
6194 can be chosen. */
6196 static int
6197 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
6199 gcc_assert (insn && INSN_P (insn));
6200 return (!reload_completed
6201 || !safe_group_barrier_needed (insn));
6204 /* The following variable value is pseudo-insn used by the DFA insn
6205 scheduler to change the DFA state when the simulated clock is
6206 increased. */
6208 static rtx dfa_pre_cycle_insn;
6210 /* We are about to being issuing INSN. Return nonzero if we cannot
6211 issue it on given cycle CLOCK and return zero if we should not sort
6212 the ready queue on the next clock start. */
6214 static int
6215 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
6216 int clock, int *sort_p)
6218 int setup_clocks_p = FALSE;
6220 gcc_assert (insn && INSN_P (insn));
6221 if ((reload_completed && safe_group_barrier_needed (insn))
6222 || (last_scheduled_insn
6223 && (GET_CODE (last_scheduled_insn) == CALL_INSN
6224 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6225 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6227 init_insn_group_barriers ();
6228 if (verbose && dump)
6229 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
6230 last_clock == clock ? " + cycle advance" : "");
6231 stop_before_p = 1;
6232 if (last_clock == clock)
6234 state_transition (curr_state, dfa_stop_insn);
6235 if (TARGET_EARLY_STOP_BITS)
6236 *sort_p = (last_scheduled_insn == NULL_RTX
6237 || GET_CODE (last_scheduled_insn) != CALL_INSN);
6238 else
6239 *sort_p = 0;
6240 return 1;
6242 else if (reload_completed)
6243 setup_clocks_p = TRUE;
6244 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6245 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
6246 state_reset (curr_state);
6247 else
6249 memcpy (curr_state, prev_cycle_state, dfa_state_size);
6250 state_transition (curr_state, dfa_stop_insn);
6251 state_transition (curr_state, dfa_pre_cycle_insn);
6252 state_transition (curr_state, NULL);
6255 else if (reload_completed)
6256 setup_clocks_p = TRUE;
6257 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
6258 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6259 && asm_noperands (PATTERN (insn)) < 0)
6261 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6263 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6265 rtx link;
6266 int d = -1;
6268 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6269 if (REG_NOTE_KIND (link) == 0)
6271 enum attr_itanium_class dep_class;
6272 rtx dep_insn = XEXP (link, 0);
6274 dep_class = ia64_safe_itanium_class (dep_insn);
6275 if ((dep_class == ITANIUM_CLASS_MMMUL
6276 || dep_class == ITANIUM_CLASS_MMSHF)
6277 && last_clock - clocks [INSN_UID (dep_insn)] < 4
6278 && (d < 0
6279 || last_clock - clocks [INSN_UID (dep_insn)] < d))
6280 d = last_clock - clocks [INSN_UID (dep_insn)];
6282 if (d >= 0)
6283 add_cycles [INSN_UID (insn)] = 3 - d;
6286 return 0;
6291 /* The following page contains abstract data `bundle states' which are
6292 used for bundling insns (inserting nops and template generation). */
6294 /* The following describes state of insn bundling. */
6296 struct bundle_state
6298 /* Unique bundle state number to identify them in the debugging
6299 output */
6300 int unique_num;
6301 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
6302 /* number nops before and after the insn */
6303 short before_nops_num, after_nops_num;
6304 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
6305 insn */
6306 int cost; /* cost of the state in cycles */
6307 int accumulated_insns_num; /* number of all previous insns including
6308 nops. L is considered as 2 insns */
6309 int branch_deviation; /* deviation of previous branches from 3rd slots */
6310 struct bundle_state *next; /* next state with the same insn_num */
6311 struct bundle_state *originator; /* originator (previous insn state) */
6312 /* All bundle states are in the following chain. */
6313 struct bundle_state *allocated_states_chain;
6314 /* The DFA State after issuing the insn and the nops. */
6315 state_t dfa_state;
6318 /* The following is map insn number to the corresponding bundle state. */
6320 static struct bundle_state **index_to_bundle_states;
6322 /* The unique number of next bundle state. */
6324 static int bundle_states_num;
6326 /* All allocated bundle states are in the following chain. */
6328 static struct bundle_state *allocated_bundle_states_chain;
6330 /* All allocated but not used bundle states are in the following
6331 chain. */
6333 static struct bundle_state *free_bundle_state_chain;
6336 /* The following function returns a free bundle state. */
6338 static struct bundle_state *
6339 get_free_bundle_state (void)
6341 struct bundle_state *result;
6343 if (free_bundle_state_chain != NULL)
6345 result = free_bundle_state_chain;
6346 free_bundle_state_chain = result->next;
6348 else
6350 result = xmalloc (sizeof (struct bundle_state));
6351 result->dfa_state = xmalloc (dfa_state_size);
6352 result->allocated_states_chain = allocated_bundle_states_chain;
6353 allocated_bundle_states_chain = result;
6355 result->unique_num = bundle_states_num++;
6356 return result;
6360 /* The following function frees given bundle state. */
6362 static void
6363 free_bundle_state (struct bundle_state *state)
6365 state->next = free_bundle_state_chain;
6366 free_bundle_state_chain = state;
6369 /* Start work with abstract data `bundle states'. */
6371 static void
6372 initiate_bundle_states (void)
6374 bundle_states_num = 0;
6375 free_bundle_state_chain = NULL;
6376 allocated_bundle_states_chain = NULL;
6379 /* Finish work with abstract data `bundle states'. */
6381 static void
6382 finish_bundle_states (void)
6384 struct bundle_state *curr_state, *next_state;
6386 for (curr_state = allocated_bundle_states_chain;
6387 curr_state != NULL;
6388 curr_state = next_state)
6390 next_state = curr_state->allocated_states_chain;
6391 free (curr_state->dfa_state);
6392 free (curr_state);
6396 /* Hash table of the bundle states. The key is dfa_state and insn_num
6397 of the bundle states. */
6399 static htab_t bundle_state_table;
6401 /* The function returns hash of BUNDLE_STATE. */
6403 static unsigned
6404 bundle_state_hash (const void *bundle_state)
6406 const struct bundle_state *state = (struct bundle_state *) bundle_state;
6407 unsigned result, i;
6409 for (result = i = 0; i < dfa_state_size; i++)
6410 result += (((unsigned char *) state->dfa_state) [i]
6411 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
6412 return result + state->insn_num;
6415 /* The function returns nonzero if the bundle state keys are equal. */
6417 static int
6418 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
6420 const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
6421 const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
6423 return (state1->insn_num == state2->insn_num
6424 && memcmp (state1->dfa_state, state2->dfa_state,
6425 dfa_state_size) == 0);
6428 /* The function inserts the BUNDLE_STATE into the hash table. The
6429 function returns nonzero if the bundle has been inserted into the
6430 table. The table contains the best bundle state with given key. */
6432 static int
6433 insert_bundle_state (struct bundle_state *bundle_state)
6435 void **entry_ptr;
6437 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
6438 if (*entry_ptr == NULL)
6440 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
6441 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
6442 *entry_ptr = (void *) bundle_state;
6443 return TRUE;
6445 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
6446 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
6447 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
6448 > bundle_state->accumulated_insns_num
6449 || (((struct bundle_state *)
6450 *entry_ptr)->accumulated_insns_num
6451 == bundle_state->accumulated_insns_num
6452 && ((struct bundle_state *)
6453 *entry_ptr)->branch_deviation
6454 > bundle_state->branch_deviation))))
6457 struct bundle_state temp;
6459 temp = *(struct bundle_state *) *entry_ptr;
6460 *(struct bundle_state *) *entry_ptr = *bundle_state;
6461 ((struct bundle_state *) *entry_ptr)->next = temp.next;
6462 *bundle_state = temp;
6464 return FALSE;
6467 /* Start work with the hash table. */
6469 static void
6470 initiate_bundle_state_table (void)
6472 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
6473 (htab_del) 0);
6476 /* Finish work with the hash table. */
6478 static void
6479 finish_bundle_state_table (void)
6481 htab_delete (bundle_state_table);
6486 /* The following variable is a insn `nop' used to check bundle states
6487 with different number of inserted nops. */
6489 static rtx ia64_nop;
6491 /* The following function tries to issue NOPS_NUM nops for the current
6492 state without advancing processor cycle. If it failed, the
6493 function returns FALSE and frees the current state. */
6495 static int
6496 try_issue_nops (struct bundle_state *curr_state, int nops_num)
6498 int i;
6500 for (i = 0; i < nops_num; i++)
6501 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6503 free_bundle_state (curr_state);
6504 return FALSE;
6506 return TRUE;
6509 /* The following function tries to issue INSN for the current
6510 state without advancing processor cycle. If it failed, the
6511 function returns FALSE and frees the current state. */
6513 static int
6514 try_issue_insn (struct bundle_state *curr_state, rtx insn)
6516 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6518 free_bundle_state (curr_state);
6519 return FALSE;
6521 return TRUE;
6524 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6525 starting with ORIGINATOR without advancing processor cycle. If
6526 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6527 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6528 If it was successful, the function creates new bundle state and
6529 insert into the hash table and into `index_to_bundle_states'. */
6531 static void
6532 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
6533 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
6535 struct bundle_state *curr_state;
6537 curr_state = get_free_bundle_state ();
6538 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6539 curr_state->insn = insn;
6540 curr_state->insn_num = originator->insn_num + 1;
6541 curr_state->cost = originator->cost;
6542 curr_state->originator = originator;
6543 curr_state->before_nops_num = before_nops_num;
6544 curr_state->after_nops_num = 0;
6545 curr_state->accumulated_insns_num
6546 = originator->accumulated_insns_num + before_nops_num;
6547 curr_state->branch_deviation = originator->branch_deviation;
6548 gcc_assert (insn);
6549 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6551 gcc_assert (GET_MODE (insn) != TImode);
6552 if (!try_issue_nops (curr_state, before_nops_num))
6553 return;
6554 if (!try_issue_insn (curr_state, insn))
6555 return;
6556 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6557 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6558 && curr_state->accumulated_insns_num % 3 != 0)
6560 free_bundle_state (curr_state);
6561 return;
6564 else if (GET_MODE (insn) != TImode)
6566 if (!try_issue_nops (curr_state, before_nops_num))
6567 return;
6568 if (!try_issue_insn (curr_state, insn))
6569 return;
6570 curr_state->accumulated_insns_num++;
6571 gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
6572 && asm_noperands (PATTERN (insn)) < 0);
6574 if (ia64_safe_type (insn) == TYPE_L)
6575 curr_state->accumulated_insns_num++;
6577 else
6579 /* If this is an insn that must be first in a group, then don't allow
6580 nops to be emitted before it. Currently, alloc is the only such
6581 supported instruction. */
6582 /* ??? The bundling automatons should handle this for us, but they do
6583 not yet have support for the first_insn attribute. */
6584 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
6586 free_bundle_state (curr_state);
6587 return;
6590 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6591 state_transition (curr_state->dfa_state, NULL);
6592 curr_state->cost++;
6593 if (!try_issue_nops (curr_state, before_nops_num))
6594 return;
6595 if (!try_issue_insn (curr_state, insn))
6596 return;
6597 curr_state->accumulated_insns_num++;
6598 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6599 || asm_noperands (PATTERN (insn)) >= 0)
6601 /* Finish bundle containing asm insn. */
6602 curr_state->after_nops_num
6603 = 3 - curr_state->accumulated_insns_num % 3;
6604 curr_state->accumulated_insns_num
6605 += 3 - curr_state->accumulated_insns_num % 3;
6607 else if (ia64_safe_type (insn) == TYPE_L)
6608 curr_state->accumulated_insns_num++;
6610 if (ia64_safe_type (insn) == TYPE_B)
6611 curr_state->branch_deviation
6612 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6613 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6615 if (!only_bundle_end_p && insert_bundle_state (curr_state))
6617 state_t dfa_state;
6618 struct bundle_state *curr_state1;
6619 struct bundle_state *allocated_states_chain;
6621 curr_state1 = get_free_bundle_state ();
6622 dfa_state = curr_state1->dfa_state;
6623 allocated_states_chain = curr_state1->allocated_states_chain;
6624 *curr_state1 = *curr_state;
6625 curr_state1->dfa_state = dfa_state;
6626 curr_state1->allocated_states_chain = allocated_states_chain;
6627 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6628 dfa_state_size);
6629 curr_state = curr_state1;
6631 if (!try_issue_nops (curr_state,
6632 3 - curr_state->accumulated_insns_num % 3))
6633 return;
6634 curr_state->after_nops_num
6635 = 3 - curr_state->accumulated_insns_num % 3;
6636 curr_state->accumulated_insns_num
6637 += 3 - curr_state->accumulated_insns_num % 3;
6639 if (!insert_bundle_state (curr_state))
6640 free_bundle_state (curr_state);
6641 return;
6644 /* The following function returns position in the two window bundle
6645 for given STATE. */
6647 static int
6648 get_max_pos (state_t state)
6650 if (cpu_unit_reservation_p (state, pos_6))
6651 return 6;
6652 else if (cpu_unit_reservation_p (state, pos_5))
6653 return 5;
6654 else if (cpu_unit_reservation_p (state, pos_4))
6655 return 4;
6656 else if (cpu_unit_reservation_p (state, pos_3))
6657 return 3;
6658 else if (cpu_unit_reservation_p (state, pos_2))
6659 return 2;
6660 else if (cpu_unit_reservation_p (state, pos_1))
6661 return 1;
6662 else
6663 return 0;
6666 /* The function returns code of a possible template for given position
6667 and state. The function should be called only with 2 values of
6668 position equal to 3 or 6. We avoid generating F NOPs by putting
6669 templates containing F insns at the end of the template search
6670 because undocumented anomaly in McKinley derived cores which can
6671 cause stalls if an F-unit insn (including a NOP) is issued within a
6672 six-cycle window after reading certain application registers (such
6673 as ar.bsp). Furthermore, power-considerations also argue against
6674 the use of F-unit instructions unless they're really needed. */
6676 static int
6677 get_template (state_t state, int pos)
6679 switch (pos)
6681 case 3:
6682 if (cpu_unit_reservation_p (state, _0mmi_))
6683 return 1;
6684 else if (cpu_unit_reservation_p (state, _0mii_))
6685 return 0;
6686 else if (cpu_unit_reservation_p (state, _0mmb_))
6687 return 7;
6688 else if (cpu_unit_reservation_p (state, _0mib_))
6689 return 6;
6690 else if (cpu_unit_reservation_p (state, _0mbb_))
6691 return 5;
6692 else if (cpu_unit_reservation_p (state, _0bbb_))
6693 return 4;
6694 else if (cpu_unit_reservation_p (state, _0mmf_))
6695 return 3;
6696 else if (cpu_unit_reservation_p (state, _0mfi_))
6697 return 2;
6698 else if (cpu_unit_reservation_p (state, _0mfb_))
6699 return 8;
6700 else if (cpu_unit_reservation_p (state, _0mlx_))
6701 return 9;
6702 else
6703 gcc_unreachable ();
6704 case 6:
6705 if (cpu_unit_reservation_p (state, _1mmi_))
6706 return 1;
6707 else if (cpu_unit_reservation_p (state, _1mii_))
6708 return 0;
6709 else if (cpu_unit_reservation_p (state, _1mmb_))
6710 return 7;
6711 else if (cpu_unit_reservation_p (state, _1mib_))
6712 return 6;
6713 else if (cpu_unit_reservation_p (state, _1mbb_))
6714 return 5;
6715 else if (cpu_unit_reservation_p (state, _1bbb_))
6716 return 4;
6717 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6718 return 3;
6719 else if (cpu_unit_reservation_p (state, _1mfi_))
6720 return 2;
6721 else if (cpu_unit_reservation_p (state, _1mfb_))
6722 return 8;
6723 else if (cpu_unit_reservation_p (state, _1mlx_))
6724 return 9;
6725 else
6726 gcc_unreachable ();
6727 default:
6728 gcc_unreachable ();
6732 /* The following function returns an insn important for insn bundling
6733 followed by INSN and before TAIL. */
6735 static rtx
6736 get_next_important_insn (rtx insn, rtx tail)
6738 for (; insn && insn != tail; insn = NEXT_INSN (insn))
6739 if (INSN_P (insn)
6740 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6741 && GET_CODE (PATTERN (insn)) != USE
6742 && GET_CODE (PATTERN (insn)) != CLOBBER)
6743 return insn;
6744 return NULL_RTX;
6747 /* The following function does insn bundling. Bundling means
6748 inserting templates and nop insns to fit insn groups into permitted
6749 templates. Instruction scheduling uses NDFA (non-deterministic
6750 finite automata) encoding informations about the templates and the
6751 inserted nops. Nondeterminism of the automata permits follows
6752 all possible insn sequences very fast.
6754 Unfortunately it is not possible to get information about inserting
6755 nop insns and used templates from the automata states. The
6756 automata only says that we can issue an insn possibly inserting
6757 some nops before it and using some template. Therefore insn
6758 bundling in this function is implemented by using DFA
6759 (deterministic finite automata). We follows all possible insn
6760 sequences by inserting 0-2 nops (that is what the NDFA describe for
6761 insn scheduling) before/after each insn being bundled. We know the
6762 start of simulated processor cycle from insn scheduling (insn
6763 starting a new cycle has TImode).
6765 Simple implementation of insn bundling would create enormous
6766 number of possible insn sequences satisfying information about new
6767 cycle ticks taken from the insn scheduling. To make the algorithm
6768 practical we use dynamic programming. Each decision (about
6769 inserting nops and implicitly about previous decisions) is described
6770 by structure bundle_state (see above). If we generate the same
6771 bundle state (key is automaton state after issuing the insns and
6772 nops for it), we reuse already generated one. As consequence we
6773 reject some decisions which cannot improve the solution and
6774 reduce memory for the algorithm.
6776 When we reach the end of EBB (extended basic block), we choose the
6777 best sequence and then, moving back in EBB, insert templates for
6778 the best alternative. The templates are taken from querying
6779 automaton state for each insn in chosen bundle states.
6781 So the algorithm makes two (forward and backward) passes through
6782 EBB. There is an additional forward pass through EBB for Itanium1
6783 processor. This pass inserts more nops to make dependency between
6784 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
6786 static void
6787 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
6789 struct bundle_state *curr_state, *next_state, *best_state;
6790 rtx insn, next_insn;
6791 int insn_num;
6792 int i, bundle_end_p, only_bundle_end_p, asm_p;
6793 int pos = 0, max_pos, template0, template1;
6794 rtx b;
6795 rtx nop;
6796 enum attr_type type;
6798 insn_num = 0;
6799 /* Count insns in the EBB. */
6800 for (insn = NEXT_INSN (prev_head_insn);
6801 insn && insn != tail;
6802 insn = NEXT_INSN (insn))
6803 if (INSN_P (insn))
6804 insn_num++;
6805 if (insn_num == 0)
6806 return;
6807 bundling_p = 1;
6808 dfa_clean_insn_cache ();
6809 initiate_bundle_state_table ();
6810 index_to_bundle_states = xmalloc ((insn_num + 2)
6811 * sizeof (struct bundle_state *));
6812 /* First (forward) pass -- generation of bundle states. */
6813 curr_state = get_free_bundle_state ();
6814 curr_state->insn = NULL;
6815 curr_state->before_nops_num = 0;
6816 curr_state->after_nops_num = 0;
6817 curr_state->insn_num = 0;
6818 curr_state->cost = 0;
6819 curr_state->accumulated_insns_num = 0;
6820 curr_state->branch_deviation = 0;
6821 curr_state->next = NULL;
6822 curr_state->originator = NULL;
6823 state_reset (curr_state->dfa_state);
6824 index_to_bundle_states [0] = curr_state;
6825 insn_num = 0;
6826 /* Shift cycle mark if it is put on insn which could be ignored. */
6827 for (insn = NEXT_INSN (prev_head_insn);
6828 insn != tail;
6829 insn = NEXT_INSN (insn))
6830 if (INSN_P (insn)
6831 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6832 || GET_CODE (PATTERN (insn)) == USE
6833 || GET_CODE (PATTERN (insn)) == CLOBBER)
6834 && GET_MODE (insn) == TImode)
6836 PUT_MODE (insn, VOIDmode);
6837 for (next_insn = NEXT_INSN (insn);
6838 next_insn != tail;
6839 next_insn = NEXT_INSN (next_insn))
6840 if (INSN_P (next_insn)
6841 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
6842 && GET_CODE (PATTERN (next_insn)) != USE
6843 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
6845 PUT_MODE (next_insn, TImode);
6846 break;
6849 /* Froward pass: generation of bundle states. */
6850 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6851 insn != NULL_RTX;
6852 insn = next_insn)
6854 gcc_assert (INSN_P (insn)
6855 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6856 && GET_CODE (PATTERN (insn)) != USE
6857 && GET_CODE (PATTERN (insn)) != CLOBBER);
6858 type = ia64_safe_type (insn);
6859 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6860 insn_num++;
6861 index_to_bundle_states [insn_num] = NULL;
6862 for (curr_state = index_to_bundle_states [insn_num - 1];
6863 curr_state != NULL;
6864 curr_state = next_state)
6866 pos = curr_state->accumulated_insns_num % 3;
6867 next_state = curr_state->next;
6868 /* We must fill up the current bundle in order to start a
6869 subsequent asm insn in a new bundle. Asm insn is always
6870 placed in a separate bundle. */
6871 only_bundle_end_p
6872 = (next_insn != NULL_RTX
6873 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
6874 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
6875 /* We may fill up the current bundle if it is the cycle end
6876 without a group barrier. */
6877 bundle_end_p
6878 = (only_bundle_end_p || next_insn == NULL_RTX
6879 || (GET_MODE (next_insn) == TImode
6880 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
6881 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
6882 || type == TYPE_S
6883 /* We need to insert 2 nops for cases like M_MII. To
6884 guarantee issuing all insns on the same cycle for
6885 Itanium 1, we need to issue 2 nops after the first M
6886 insn (MnnMII where n is a nop insn). */
6887 || ((type == TYPE_M || type == TYPE_A)
6888 && ia64_tune == PROCESSOR_ITANIUM
6889 && !bundle_end_p && pos == 1))
6890 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
6891 only_bundle_end_p);
6892 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
6893 only_bundle_end_p);
6894 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
6895 only_bundle_end_p);
6897 gcc_assert (index_to_bundle_states [insn_num]);
6898 for (curr_state = index_to_bundle_states [insn_num];
6899 curr_state != NULL;
6900 curr_state = curr_state->next)
6901 if (verbose >= 2 && dump)
6903 /* This structure is taken from generated code of the
6904 pipeline hazard recognizer (see file insn-attrtab.c).
6905 Please don't forget to change the structure if a new
6906 automaton is added to .md file. */
6907 struct DFA_chip
6909 unsigned short one_automaton_state;
6910 unsigned short oneb_automaton_state;
6911 unsigned short two_automaton_state;
6912 unsigned short twob_automaton_state;
6915 fprintf
6916 (dump,
6917 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6918 curr_state->unique_num,
6919 (curr_state->originator == NULL
6920 ? -1 : curr_state->originator->unique_num),
6921 curr_state->cost,
6922 curr_state->before_nops_num, curr_state->after_nops_num,
6923 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6924 (ia64_tune == PROCESSOR_ITANIUM
6925 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6926 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6927 INSN_UID (insn));
6931 /* We should find a solution because the 2nd insn scheduling has
6932 found one. */
6933 gcc_assert (index_to_bundle_states [insn_num]);
6934 /* Find a state corresponding to the best insn sequence. */
6935 best_state = NULL;
6936 for (curr_state = index_to_bundle_states [insn_num];
6937 curr_state != NULL;
6938 curr_state = curr_state->next)
6939 /* We are just looking at the states with fully filled up last
6940 bundle. The first we prefer insn sequences with minimal cost
6941 then with minimal inserted nops and finally with branch insns
6942 placed in the 3rd slots. */
6943 if (curr_state->accumulated_insns_num % 3 == 0
6944 && (best_state == NULL || best_state->cost > curr_state->cost
6945 || (best_state->cost == curr_state->cost
6946 && (curr_state->accumulated_insns_num
6947 < best_state->accumulated_insns_num
6948 || (curr_state->accumulated_insns_num
6949 == best_state->accumulated_insns_num
6950 && curr_state->branch_deviation
6951 < best_state->branch_deviation)))))
6952 best_state = curr_state;
6953 /* Second (backward) pass: adding nops and templates. */
6954 insn_num = best_state->before_nops_num;
6955 template0 = template1 = -1;
6956 for (curr_state = best_state;
6957 curr_state->originator != NULL;
6958 curr_state = curr_state->originator)
6960 insn = curr_state->insn;
6961 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
6962 || asm_noperands (PATTERN (insn)) >= 0);
6963 insn_num++;
6964 if (verbose >= 2 && dump)
6966 struct DFA_chip
6968 unsigned short one_automaton_state;
6969 unsigned short oneb_automaton_state;
6970 unsigned short two_automaton_state;
6971 unsigned short twob_automaton_state;
6974 fprintf
6975 (dump,
6976 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6977 curr_state->unique_num,
6978 (curr_state->originator == NULL
6979 ? -1 : curr_state->originator->unique_num),
6980 curr_state->cost,
6981 curr_state->before_nops_num, curr_state->after_nops_num,
6982 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6983 (ia64_tune == PROCESSOR_ITANIUM
6984 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6985 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6986 INSN_UID (insn));
6988 /* Find the position in the current bundle window. The window can
6989 contain at most two bundles. Two bundle window means that
6990 the processor will make two bundle rotation. */
6991 max_pos = get_max_pos (curr_state->dfa_state);
6992 if (max_pos == 6
6993 /* The following (negative template number) means that the
6994 processor did one bundle rotation. */
6995 || (max_pos == 3 && template0 < 0))
6997 /* We are at the end of the window -- find template(s) for
6998 its bundle(s). */
6999 pos = max_pos;
7000 if (max_pos == 3)
7001 template0 = get_template (curr_state->dfa_state, 3);
7002 else
7004 template1 = get_template (curr_state->dfa_state, 3);
7005 template0 = get_template (curr_state->dfa_state, 6);
7008 if (max_pos > 3 && template1 < 0)
7009 /* It may happen when we have the stop inside a bundle. */
7011 gcc_assert (pos <= 3);
7012 template1 = get_template (curr_state->dfa_state, 3);
7013 pos += 3;
7015 if (!asm_p)
7016 /* Emit nops after the current insn. */
7017 for (i = 0; i < curr_state->after_nops_num; i++)
7019 nop = gen_nop ();
7020 emit_insn_after (nop, insn);
7021 pos--;
7022 gcc_assert (pos >= 0);
7023 if (pos % 3 == 0)
7025 /* We are at the start of a bundle: emit the template
7026 (it should be defined). */
7027 gcc_assert (template0 >= 0);
7028 b = gen_bundle_selector (GEN_INT (template0));
7029 ia64_emit_insn_before (b, nop);
7030 /* If we have two bundle window, we make one bundle
7031 rotation. Otherwise template0 will be undefined
7032 (negative value). */
7033 template0 = template1;
7034 template1 = -1;
7037 /* Move the position backward in the window. Group barrier has
7038 no slot. Asm insn takes all bundle. */
7039 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7040 && GET_CODE (PATTERN (insn)) != ASM_INPUT
7041 && asm_noperands (PATTERN (insn)) < 0)
7042 pos--;
7043 /* Long insn takes 2 slots. */
7044 if (ia64_safe_type (insn) == TYPE_L)
7045 pos--;
7046 gcc_assert (pos >= 0);
7047 if (pos % 3 == 0
7048 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7049 && GET_CODE (PATTERN (insn)) != ASM_INPUT
7050 && asm_noperands (PATTERN (insn)) < 0)
7052 /* The current insn is at the bundle start: emit the
7053 template. */
7054 gcc_assert (template0 >= 0);
7055 b = gen_bundle_selector (GEN_INT (template0));
7056 ia64_emit_insn_before (b, insn);
7057 b = PREV_INSN (insn);
7058 insn = b;
7059 /* See comment above in analogous place for emitting nops
7060 after the insn. */
7061 template0 = template1;
7062 template1 = -1;
7064 /* Emit nops after the current insn. */
7065 for (i = 0; i < curr_state->before_nops_num; i++)
7067 nop = gen_nop ();
7068 ia64_emit_insn_before (nop, insn);
7069 nop = PREV_INSN (insn);
7070 insn = nop;
7071 pos--;
7072 gcc_assert (pos >= 0);
7073 if (pos % 3 == 0)
7075 /* See comment above in analogous place for emitting nops
7076 after the insn. */
7077 gcc_assert (template0 >= 0);
7078 b = gen_bundle_selector (GEN_INT (template0));
7079 ia64_emit_insn_before (b, insn);
7080 b = PREV_INSN (insn);
7081 insn = b;
7082 template0 = template1;
7083 template1 = -1;
7087 if (ia64_tune == PROCESSOR_ITANIUM)
7088 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
7089 Itanium1 has a strange design, if the distance between an insn
7090 and dependent MM-insn is less 4 then we have a 6 additional
7091 cycles stall. So we make the distance equal to 4 cycles if it
7092 is less. */
7093 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
7094 insn != NULL_RTX;
7095 insn = next_insn)
7097 gcc_assert (INSN_P (insn)
7098 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
7099 && GET_CODE (PATTERN (insn)) != USE
7100 && GET_CODE (PATTERN (insn)) != CLOBBER);
7101 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
7102 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
7103 /* We found a MM-insn which needs additional cycles. */
7105 rtx last;
7106 int i, j, n;
7107 int pred_stop_p;
7109 /* Now we are searching for a template of the bundle in
7110 which the MM-insn is placed and the position of the
7111 insn in the bundle (0, 1, 2). Also we are searching
7112 for that there is a stop before the insn. */
7113 last = prev_active_insn (insn);
7114 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
7115 if (pred_stop_p)
7116 last = prev_active_insn (last);
7117 n = 0;
7118 for (;; last = prev_active_insn (last))
7119 if (recog_memoized (last) == CODE_FOR_bundle_selector)
7121 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
7122 if (template0 == 9)
7123 /* The insn is in MLX bundle. Change the template
7124 onto MFI because we will add nops before the
7125 insn. It simplifies subsequent code a lot. */
7126 PATTERN (last)
7127 = gen_bundle_selector (const2_rtx); /* -> MFI */
7128 break;
7130 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
7131 && (ia64_safe_itanium_class (last)
7132 != ITANIUM_CLASS_IGNORE))
7133 n++;
7134 /* Some check of correctness: the stop is not at the
7135 bundle start, there are no more 3 insns in the bundle,
7136 and the MM-insn is not at the start of bundle with
7137 template MLX. */
7138 gcc_assert ((!pred_stop_p || n)
7139 && n <= 2
7140 && (template0 != 9 || !n));
7141 /* Put nops after the insn in the bundle. */
7142 for (j = 3 - n; j > 0; j --)
7143 ia64_emit_insn_before (gen_nop (), insn);
7144 /* It takes into account that we will add more N nops
7145 before the insn lately -- please see code below. */
7146 add_cycles [INSN_UID (insn)]--;
7147 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
7148 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7149 insn);
7150 if (pred_stop_p)
7151 add_cycles [INSN_UID (insn)]--;
7152 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
7154 /* Insert "MII;" template. */
7155 ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
7156 insn);
7157 ia64_emit_insn_before (gen_nop (), insn);
7158 ia64_emit_insn_before (gen_nop (), insn);
7159 if (i > 1)
7161 /* To decrease code size, we use "MI;I;"
7162 template. */
7163 ia64_emit_insn_before
7164 (gen_insn_group_barrier (GEN_INT (3)), insn);
7165 i--;
7167 ia64_emit_insn_before (gen_nop (), insn);
7168 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7169 insn);
7171 /* Put the MM-insn in the same slot of a bundle with the
7172 same template as the original one. */
7173 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
7174 insn);
7175 /* To put the insn in the same slot, add necessary number
7176 of nops. */
7177 for (j = n; j > 0; j --)
7178 ia64_emit_insn_before (gen_nop (), insn);
7179 /* Put the stop if the original bundle had it. */
7180 if (pred_stop_p)
7181 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7182 insn);
7185 free (index_to_bundle_states);
7186 finish_bundle_state_table ();
7187 bundling_p = 0;
7188 dfa_clean_insn_cache ();
7191 /* The following function is called at the end of scheduling BB or
7192 EBB. After reload, it inserts stop bits and does insn bundling. */
7194 static void
7195 ia64_sched_finish (FILE *dump, int sched_verbose)
7197 if (sched_verbose)
7198 fprintf (dump, "// Finishing schedule.\n");
7199 if (!reload_completed)
7200 return;
7201 if (reload_completed)
7203 final_emit_insn_group_barriers (dump);
7204 bundling (dump, sched_verbose, current_sched_info->prev_head,
7205 current_sched_info->next_tail);
7206 if (sched_verbose && dump)
7207 fprintf (dump, "// finishing %d-%d\n",
7208 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
7209 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
7211 return;
7215 /* The following function inserts stop bits in scheduled BB or EBB. */
7217 static void
7218 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7220 rtx insn;
7221 int need_barrier_p = 0;
7222 rtx prev_insn = NULL_RTX;
7224 init_insn_group_barriers ();
7226 for (insn = NEXT_INSN (current_sched_info->prev_head);
7227 insn != current_sched_info->next_tail;
7228 insn = NEXT_INSN (insn))
7230 if (GET_CODE (insn) == BARRIER)
7232 rtx last = prev_active_insn (insn);
7234 if (! last)
7235 continue;
7236 if (GET_CODE (last) == JUMP_INSN
7237 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
7238 last = prev_active_insn (last);
7239 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7240 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7242 init_insn_group_barriers ();
7243 need_barrier_p = 0;
7244 prev_insn = NULL_RTX;
7246 else if (INSN_P (insn))
7248 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7250 init_insn_group_barriers ();
7251 need_barrier_p = 0;
7252 prev_insn = NULL_RTX;
7254 else if (need_barrier_p || group_barrier_needed (insn))
7256 if (TARGET_EARLY_STOP_BITS)
7258 rtx last;
7260 for (last = insn;
7261 last != current_sched_info->prev_head;
7262 last = PREV_INSN (last))
7263 if (INSN_P (last) && GET_MODE (last) == TImode
7264 && stops_p [INSN_UID (last)])
7265 break;
7266 if (last == current_sched_info->prev_head)
7267 last = insn;
7268 last = prev_active_insn (last);
7269 if (last
7270 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
7271 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
7272 last);
7273 init_insn_group_barriers ();
7274 for (last = NEXT_INSN (last);
7275 last != insn;
7276 last = NEXT_INSN (last))
7277 if (INSN_P (last))
7278 group_barrier_needed (last);
7280 else
7282 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7283 insn);
7284 init_insn_group_barriers ();
7286 group_barrier_needed (insn);
7287 prev_insn = NULL_RTX;
7289 else if (recog_memoized (insn) >= 0)
7290 prev_insn = insn;
7291 need_barrier_p = (GET_CODE (insn) == CALL_INSN
7292 || GET_CODE (PATTERN (insn)) == ASM_INPUT
7293 || asm_noperands (PATTERN (insn)) >= 0);
7300 /* If the following function returns TRUE, we will use the the DFA
7301 insn scheduler. */
7303 static int
7304 ia64_first_cycle_multipass_dfa_lookahead (void)
7306 return (reload_completed ? 6 : 4);
7309 /* The following function initiates variable `dfa_pre_cycle_insn'. */
7311 static void
7312 ia64_init_dfa_pre_cycle_insn (void)
7314 if (temp_dfa_state == NULL)
7316 dfa_state_size = state_size ();
7317 temp_dfa_state = xmalloc (dfa_state_size);
7318 prev_cycle_state = xmalloc (dfa_state_size);
7320 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
7321 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
7322 recog_memoized (dfa_pre_cycle_insn);
7323 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
7324 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
7325 recog_memoized (dfa_stop_insn);
7328 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
7329 used by the DFA insn scheduler. */
7331 static rtx
7332 ia64_dfa_pre_cycle_insn (void)
7334 return dfa_pre_cycle_insn;
7337 /* The following function returns TRUE if PRODUCER (of type ilog or
7338 ld) produces address for CONSUMER (of type st or stf). */
7341 ia64_st_address_bypass_p (rtx producer, rtx consumer)
7343 rtx dest, reg, mem;
7345 gcc_assert (producer && consumer);
7346 dest = ia64_single_set (producer);
7347 gcc_assert (dest);
7348 reg = SET_DEST (dest);
7349 gcc_assert (reg);
7350 if (GET_CODE (reg) == SUBREG)
7351 reg = SUBREG_REG (reg);
7352 gcc_assert (GET_CODE (reg) == REG);
7354 dest = ia64_single_set (consumer);
7355 gcc_assert (dest);
7356 mem = SET_DEST (dest);
7357 gcc_assert (mem && GET_CODE (mem) == MEM);
7358 return reg_mentioned_p (reg, mem);
7361 /* The following function returns TRUE if PRODUCER (of type ilog or
7362 ld) produces address for CONSUMER (of type ld or fld). */
7365 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
7367 rtx dest, src, reg, mem;
7369 gcc_assert (producer && consumer);
7370 dest = ia64_single_set (producer);
7371 gcc_assert (dest);
7372 reg = SET_DEST (dest);
7373 gcc_assert (reg);
7374 if (GET_CODE (reg) == SUBREG)
7375 reg = SUBREG_REG (reg);
7376 gcc_assert (GET_CODE (reg) == REG);
7378 src = ia64_single_set (consumer);
7379 gcc_assert (src);
7380 mem = SET_SRC (src);
7381 gcc_assert (mem);
7382 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
7383 mem = XVECEXP (mem, 0, 0);
7384 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
7385 mem = XEXP (mem, 0);
7387 /* Note that LO_SUM is used for GOT loads. */
7388 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
7390 return reg_mentioned_p (reg, mem);
7393 /* The following function returns TRUE if INSN produces address for a
7394 load/store insn. We will place such insns into M slot because it
7395 decreases its latency time. */
7398 ia64_produce_address_p (rtx insn)
7400 return insn->call;
7404 /* Emit pseudo-ops for the assembler to describe predicate relations.
7405 At present this assumes that we only consider predicate pairs to
7406 be mutex, and that the assembler can deduce proper values from
7407 straight-line code. */
7409 static void
7410 emit_predicate_relation_info (void)
7412 basic_block bb;
7414 FOR_EACH_BB_REVERSE (bb)
7416 int r;
7417 rtx head = BB_HEAD (bb);
7419 /* We only need such notes at code labels. */
7420 if (GET_CODE (head) != CODE_LABEL)
7421 continue;
7422 if (GET_CODE (NEXT_INSN (head)) == NOTE
7423 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
7424 head = NEXT_INSN (head);
7426 for (r = PR_REG (0); r < PR_REG (64); r += 2)
7427 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
7429 rtx p = gen_rtx_REG (BImode, r);
7430 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
7431 if (head == BB_END (bb))
7432 BB_END (bb) = n;
7433 head = n;
7437 /* Look for conditional calls that do not return, and protect predicate
7438 relations around them. Otherwise the assembler will assume the call
7439 returns, and complain about uses of call-clobbered predicates after
7440 the call. */
7441 FOR_EACH_BB_REVERSE (bb)
7443 rtx insn = BB_HEAD (bb);
7445 while (1)
7447 if (GET_CODE (insn) == CALL_INSN
7448 && GET_CODE (PATTERN (insn)) == COND_EXEC
7449 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
7451 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
7452 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
7453 if (BB_HEAD (bb) == insn)
7454 BB_HEAD (bb) = b;
7455 if (BB_END (bb) == insn)
7456 BB_END (bb) = a;
7459 if (insn == BB_END (bb))
7460 break;
7461 insn = NEXT_INSN (insn);
7466 /* Perform machine dependent operations on the rtl chain INSNS. */
7468 static void
7469 ia64_reorg (void)
7471 /* We are freeing block_for_insn in the toplev to keep compatibility
7472 with old MDEP_REORGS that are not CFG based. Recompute it now. */
7473 compute_bb_for_insn ();
7475 /* If optimizing, we'll have split before scheduling. */
7476 if (optimize == 0)
7477 split_all_insns (0);
7479 /* ??? update_life_info_in_dirty_blocks fails to terminate during
7480 non-optimizing bootstrap. */
7481 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7483 if (ia64_flag_schedule_insns2)
7485 timevar_push (TV_SCHED2);
7486 ia64_final_schedule = 1;
7488 initiate_bundle_states ();
7489 ia64_nop = make_insn_raw (gen_nop ());
7490 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
7491 recog_memoized (ia64_nop);
7492 clocks_length = get_max_uid () + 1;
7493 stops_p = xcalloc (1, clocks_length);
7494 if (ia64_tune == PROCESSOR_ITANIUM)
7496 clocks = xcalloc (clocks_length, sizeof (int));
7497 add_cycles = xcalloc (clocks_length, sizeof (int));
7499 if (ia64_tune == PROCESSOR_ITANIUM2)
7501 pos_1 = get_cpu_unit_code ("2_1");
7502 pos_2 = get_cpu_unit_code ("2_2");
7503 pos_3 = get_cpu_unit_code ("2_3");
7504 pos_4 = get_cpu_unit_code ("2_4");
7505 pos_5 = get_cpu_unit_code ("2_5");
7506 pos_6 = get_cpu_unit_code ("2_6");
7507 _0mii_ = get_cpu_unit_code ("2b_0mii.");
7508 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7509 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7510 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7511 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7512 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7513 _0mib_ = get_cpu_unit_code ("2b_0mib.");
7514 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7515 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7516 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7517 _1mii_ = get_cpu_unit_code ("2b_1mii.");
7518 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7519 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7520 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7521 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7522 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7523 _1mib_ = get_cpu_unit_code ("2b_1mib.");
7524 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7525 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7526 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7528 else
7530 pos_1 = get_cpu_unit_code ("1_1");
7531 pos_2 = get_cpu_unit_code ("1_2");
7532 pos_3 = get_cpu_unit_code ("1_3");
7533 pos_4 = get_cpu_unit_code ("1_4");
7534 pos_5 = get_cpu_unit_code ("1_5");
7535 pos_6 = get_cpu_unit_code ("1_6");
7536 _0mii_ = get_cpu_unit_code ("1b_0mii.");
7537 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7538 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7539 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7540 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7541 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7542 _0mib_ = get_cpu_unit_code ("1b_0mib.");
7543 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7544 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7545 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7546 _1mii_ = get_cpu_unit_code ("1b_1mii.");
7547 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7548 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7549 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7550 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7551 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7552 _1mib_ = get_cpu_unit_code ("1b_1mib.");
7553 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7554 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7555 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7557 schedule_ebbs (dump_file);
7558 finish_bundle_states ();
7559 if (ia64_tune == PROCESSOR_ITANIUM)
7561 free (add_cycles);
7562 free (clocks);
7564 free (stops_p);
7565 emit_insn_group_barriers (dump_file);
7567 ia64_final_schedule = 0;
7568 timevar_pop (TV_SCHED2);
7570 else
7571 emit_all_insn_group_barriers (dump_file);
7573 /* A call must not be the last instruction in a function, so that the
7574 return address is still within the function, so that unwinding works
7575 properly. Note that IA-64 differs from dwarf2 on this point. */
7576 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7578 rtx insn;
7579 int saw_stop = 0;
7581 insn = get_last_insn ();
7582 if (! INSN_P (insn))
7583 insn = prev_active_insn (insn);
7584 /* Skip over insns that expand to nothing. */
7585 while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
7587 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7588 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7589 saw_stop = 1;
7590 insn = prev_active_insn (insn);
7592 if (GET_CODE (insn) == CALL_INSN)
7594 if (! saw_stop)
7595 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7596 emit_insn (gen_break_f ());
7597 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7601 emit_predicate_relation_info ();
7603 if (ia64_flag_var_tracking)
7605 timevar_push (TV_VAR_TRACKING);
7606 variable_tracking_main ();
7607 timevar_pop (TV_VAR_TRACKING);
7611 /* Return true if REGNO is used by the epilogue. */
7614 ia64_epilogue_uses (int regno)
7616 switch (regno)
7618 case R_GR (1):
7619 /* With a call to a function in another module, we will write a new
7620 value to "gp". After returning from such a call, we need to make
7621 sure the function restores the original gp-value, even if the
7622 function itself does not use the gp anymore. */
7623 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7625 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7626 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7627 /* For functions defined with the syscall_linkage attribute, all
7628 input registers are marked as live at all function exits. This
7629 prevents the register allocator from using the input registers,
7630 which in turn makes it possible to restart a system call after
7631 an interrupt without having to save/restore the input registers.
7632 This also prevents kernel data from leaking to application code. */
7633 return lookup_attribute ("syscall_linkage",
7634 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7636 case R_BR (0):
7637 /* Conditional return patterns can't represent the use of `b0' as
7638 the return address, so we force the value live this way. */
7639 return 1;
7641 case AR_PFS_REGNUM:
7642 /* Likewise for ar.pfs, which is used by br.ret. */
7643 return 1;
7645 default:
7646 return 0;
7650 /* Return true if REGNO is used by the frame unwinder. */
7653 ia64_eh_uses (int regno)
7655 if (! reload_completed)
7656 return 0;
7658 if (current_frame_info.reg_save_b0
7659 && regno == current_frame_info.reg_save_b0)
7660 return 1;
7661 if (current_frame_info.reg_save_pr
7662 && regno == current_frame_info.reg_save_pr)
7663 return 1;
7664 if (current_frame_info.reg_save_ar_pfs
7665 && regno == current_frame_info.reg_save_ar_pfs)
7666 return 1;
7667 if (current_frame_info.reg_save_ar_unat
7668 && regno == current_frame_info.reg_save_ar_unat)
7669 return 1;
7670 if (current_frame_info.reg_save_ar_lc
7671 && regno == current_frame_info.reg_save_ar_lc)
7672 return 1;
7674 return 0;
7677 /* Return true if this goes in small data/bss. */
7679 /* ??? We could also support own long data here. Generating movl/add/ld8
7680 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7681 code faster because there is one less load. This also includes incomplete
7682 types which can't go in sdata/sbss. */
7684 static bool
7685 ia64_in_small_data_p (tree exp)
7687 if (TARGET_NO_SDATA)
7688 return false;
7690 /* We want to merge strings, so we never consider them small data. */
7691 if (TREE_CODE (exp) == STRING_CST)
7692 return false;
7694 /* Functions are never small data. */
7695 if (TREE_CODE (exp) == FUNCTION_DECL)
7696 return false;
7698 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7700 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7702 if (strcmp (section, ".sdata") == 0
7703 || strncmp (section, ".sdata.", 7) == 0
7704 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
7705 || strcmp (section, ".sbss") == 0
7706 || strncmp (section, ".sbss.", 6) == 0
7707 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
7708 return true;
7710 else
7712 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7714 /* If this is an incomplete type with size 0, then we can't put it
7715 in sdata because it might be too big when completed. */
7716 if (size > 0 && size <= ia64_section_threshold)
7717 return true;
7720 return false;
7723 /* Output assembly directives for prologue regions. */
7725 /* The current basic block number. */
7727 static bool last_block;
7729 /* True if we need a copy_state command at the start of the next block. */
7731 static bool need_copy_state;
7733 /* The function emits unwind directives for the start of an epilogue. */
7735 static void
7736 process_epilogue (void)
7738 /* If this isn't the last block of the function, then we need to label the
7739 current state, and copy it back in at the start of the next block. */
7741 if (!last_block)
7743 fprintf (asm_out_file, "\t.label_state %d\n",
7744 ++cfun->machine->state_num);
7745 need_copy_state = true;
7748 fprintf (asm_out_file, "\t.restore sp\n");
7751 /* This function processes a SET pattern looking for specific patterns
7752 which result in emitting an assembly directive required for unwinding. */
7754 static int
7755 process_set (FILE *asm_out_file, rtx pat)
7757 rtx src = SET_SRC (pat);
7758 rtx dest = SET_DEST (pat);
7759 int src_regno, dest_regno;
7761 /* Look for the ALLOC insn. */
7762 if (GET_CODE (src) == UNSPEC_VOLATILE
7763 && XINT (src, 1) == UNSPECV_ALLOC
7764 && GET_CODE (dest) == REG)
7766 dest_regno = REGNO (dest);
7768 /* If this is the final destination for ar.pfs, then this must
7769 be the alloc in the prologue. */
7770 if (dest_regno == current_frame_info.reg_save_ar_pfs)
7771 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7772 ia64_dbx_register_number (dest_regno));
7773 else
7775 /* This must be an alloc before a sibcall. We must drop the
7776 old frame info. The easiest way to drop the old frame
7777 info is to ensure we had a ".restore sp" directive
7778 followed by a new prologue. If the procedure doesn't
7779 have a memory-stack frame, we'll issue a dummy ".restore
7780 sp" now. */
7781 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
7782 /* if haven't done process_epilogue() yet, do it now */
7783 process_epilogue ();
7784 fprintf (asm_out_file, "\t.prologue\n");
7786 return 1;
7789 /* Look for SP = .... */
7790 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7792 if (GET_CODE (src) == PLUS)
7794 rtx op0 = XEXP (src, 0);
7795 rtx op1 = XEXP (src, 1);
7797 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
7799 if (INTVAL (op1) < 0)
7800 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
7801 -INTVAL (op1));
7802 else
7803 process_epilogue ();
7805 else
7807 gcc_assert (GET_CODE (src) == REG
7808 && REGNO (src) == HARD_FRAME_POINTER_REGNUM);
7809 process_epilogue ();
7812 return 1;
7815 /* Register move we need to look at. */
7816 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7818 src_regno = REGNO (src);
7819 dest_regno = REGNO (dest);
7821 switch (src_regno)
7823 case BR_REG (0):
7824 /* Saving return address pointer. */
7825 gcc_assert (dest_regno == current_frame_info.reg_save_b0);
7826 fprintf (asm_out_file, "\t.save rp, r%d\n",
7827 ia64_dbx_register_number (dest_regno));
7828 return 1;
7830 case PR_REG (0):
7831 gcc_assert (dest_regno == current_frame_info.reg_save_pr);
7832 fprintf (asm_out_file, "\t.save pr, r%d\n",
7833 ia64_dbx_register_number (dest_regno));
7834 return 1;
7836 case AR_UNAT_REGNUM:
7837 gcc_assert (dest_regno == current_frame_info.reg_save_ar_unat);
7838 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7839 ia64_dbx_register_number (dest_regno));
7840 return 1;
7842 case AR_LC_REGNUM:
7843 gcc_assert (dest_regno == current_frame_info.reg_save_ar_lc);
7844 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7845 ia64_dbx_register_number (dest_regno));
7846 return 1;
7848 case STACK_POINTER_REGNUM:
7849 gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM
7850 && frame_pointer_needed);
7851 fprintf (asm_out_file, "\t.vframe r%d\n",
7852 ia64_dbx_register_number (dest_regno));
7853 return 1;
7855 default:
7856 /* Everything else should indicate being stored to memory. */
7857 gcc_unreachable ();
7861 /* Memory store we need to look at. */
7862 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7864 long off;
7865 rtx base;
7866 const char *saveop;
7868 if (GET_CODE (XEXP (dest, 0)) == REG)
7870 base = XEXP (dest, 0);
7871 off = 0;
7873 else
7875 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
7876 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
7877 base = XEXP (XEXP (dest, 0), 0);
7878 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7881 if (base == hard_frame_pointer_rtx)
7883 saveop = ".savepsp";
7884 off = - off;
7886 else
7888 gcc_assert (base == stack_pointer_rtx);
7889 saveop = ".savesp";
7892 src_regno = REGNO (src);
7893 switch (src_regno)
7895 case BR_REG (0):
7896 gcc_assert (!current_frame_info.reg_save_b0);
7897 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7898 return 1;
7900 case PR_REG (0):
7901 gcc_assert (!current_frame_info.reg_save_pr);
7902 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7903 return 1;
7905 case AR_LC_REGNUM:
7906 gcc_assert (!current_frame_info.reg_save_ar_lc);
7907 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7908 return 1;
7910 case AR_PFS_REGNUM:
7911 gcc_assert (!current_frame_info.reg_save_ar_pfs);
7912 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7913 return 1;
7915 case AR_UNAT_REGNUM:
7916 gcc_assert (!current_frame_info.reg_save_ar_unat);
7917 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7918 return 1;
7920 case GR_REG (4):
7921 case GR_REG (5):
7922 case GR_REG (6):
7923 case GR_REG (7):
7924 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7925 1 << (src_regno - GR_REG (4)));
7926 return 1;
7928 case BR_REG (1):
7929 case BR_REG (2):
7930 case BR_REG (3):
7931 case BR_REG (4):
7932 case BR_REG (5):
7933 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7934 1 << (src_regno - BR_REG (1)));
7935 return 1;
7937 case FR_REG (2):
7938 case FR_REG (3):
7939 case FR_REG (4):
7940 case FR_REG (5):
7941 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7942 1 << (src_regno - FR_REG (2)));
7943 return 1;
7945 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7946 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7947 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7948 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7949 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7950 1 << (src_regno - FR_REG (12)));
7951 return 1;
7953 default:
7954 return 0;
7958 return 0;
7962 /* This function looks at a single insn and emits any directives
7963 required to unwind this insn. */
7964 void
7965 process_for_unwind_directive (FILE *asm_out_file, rtx insn)
7967 if (flag_unwind_tables
7968 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7970 rtx pat;
7972 if (GET_CODE (insn) == NOTE
7973 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7975 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7977 /* Restore unwind state from immediately before the epilogue. */
7978 if (need_copy_state)
7980 fprintf (asm_out_file, "\t.body\n");
7981 fprintf (asm_out_file, "\t.copy_state %d\n",
7982 cfun->machine->state_num);
7983 need_copy_state = false;
7987 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7988 return;
7990 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7991 if (pat)
7992 pat = XEXP (pat, 0);
7993 else
7994 pat = PATTERN (insn);
7996 switch (GET_CODE (pat))
7998 case SET:
7999 process_set (asm_out_file, pat);
8000 break;
8002 case PARALLEL:
8004 int par_index;
8005 int limit = XVECLEN (pat, 0);
8006 for (par_index = 0; par_index < limit; par_index++)
8008 rtx x = XVECEXP (pat, 0, par_index);
8009 if (GET_CODE (x) == SET)
8010 process_set (asm_out_file, x);
8012 break;
8015 default:
8016 gcc_unreachable ();
8022 enum ia64_builtins
8024 IA64_BUILTIN_BSP,
8025 IA64_BUILTIN_FLUSHRS
8028 void
8029 ia64_init_builtins (void)
8031 tree fpreg_type;
8032 tree float80_type;
8034 /* The __fpreg type. */
8035 fpreg_type = make_node (REAL_TYPE);
8036 /* ??? The back end should know to load/save __fpreg variables using
8037 the ldf.fill and stf.spill instructions. */
8038 TYPE_PRECISION (fpreg_type) = 80;
8039 layout_type (fpreg_type);
8040 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
8042 /* The __float80 type. */
8043 float80_type = make_node (REAL_TYPE);
8044 TYPE_PRECISION (float80_type) = 80;
8045 layout_type (float80_type);
8046 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
8048 /* The __float128 type. */
8049 if (!TARGET_HPUX)
8051 tree float128_type = make_node (REAL_TYPE);
8052 TYPE_PRECISION (float128_type) = 128;
8053 layout_type (float128_type);
8054 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
8056 else
8057 /* Under HPUX, this is a synonym for "long double". */
8058 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
8059 "__float128");
8061 #define def_builtin(name, type, code) \
8062 lang_hooks.builtin_function ((name), (type), (code), BUILT_IN_MD, \
8063 NULL, NULL_TREE)
8065 def_builtin ("__builtin_ia64_bsp",
8066 build_function_type (ptr_type_node, void_list_node),
8067 IA64_BUILTIN_BSP);
8069 def_builtin ("__builtin_ia64_flushrs",
8070 build_function_type (void_type_node, void_list_node),
8071 IA64_BUILTIN_FLUSHRS);
8073 #undef def_builtin
8077 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
8078 enum machine_mode mode ATTRIBUTE_UNUSED,
8079 int ignore ATTRIBUTE_UNUSED)
8081 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8082 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8084 switch (fcode)
8086 case IA64_BUILTIN_BSP:
8087 if (! target || ! register_operand (target, DImode))
8088 target = gen_reg_rtx (DImode);
8089 emit_insn (gen_bsp_value (target));
8090 #ifdef POINTERS_EXTEND_UNSIGNED
8091 target = convert_memory_address (ptr_mode, target);
8092 #endif
8093 return target;
8095 case IA64_BUILTIN_FLUSHRS:
8096 emit_insn (gen_flushrs ());
8097 return const0_rtx;
8099 default:
8100 break;
8103 return NULL_RTX;
8106 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8107 most significant bits of the stack slot. */
8109 enum direction
8110 ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
8112 /* Exception to normal case for structures/unions/etc. */
8114 if (type && AGGREGATE_TYPE_P (type)
8115 && int_size_in_bytes (type) < UNITS_PER_WORD)
8116 return upward;
8118 /* Fall back to the default. */
8119 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
8122 /* Linked list of all external functions that are to be emitted by GCC.
8123 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8124 order to avoid putting out names that are never really used. */
8126 struct extern_func_list GTY(())
8128 struct extern_func_list *next;
8129 tree decl;
8132 static GTY(()) struct extern_func_list *extern_func_head;
8134 static void
8135 ia64_hpux_add_extern_decl (tree decl)
8137 struct extern_func_list *p = ggc_alloc (sizeof (struct extern_func_list));
8139 p->decl = decl;
8140 p->next = extern_func_head;
8141 extern_func_head = p;
8144 /* Print out the list of used global functions. */
8146 static void
8147 ia64_hpux_file_end (void)
8149 struct extern_func_list *p;
8151 for (p = extern_func_head; p; p = p->next)
8153 tree decl = p->decl;
8154 tree id = DECL_ASSEMBLER_NAME (decl);
8156 gcc_assert (id);
8158 if (!TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (id))
8160 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
8162 TREE_ASM_WRITTEN (decl) = 1;
8163 (*targetm.asm_out.globalize_label) (asm_out_file, name);
8164 fputs (TYPE_ASM_OP, asm_out_file);
8165 assemble_name (asm_out_file, name);
8166 fprintf (asm_out_file, "," TYPE_OPERAND_FMT "\n", "function");
8170 extern_func_head = 0;
8173 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
8174 modes of word_mode and larger. Rename the TFmode libfuncs using the
8175 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
8176 backward compatibility. */
8178 static void
8179 ia64_init_libfuncs (void)
8181 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
8182 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
8183 set_optab_libfunc (smod_optab, SImode, "__modsi3");
8184 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
8186 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
8187 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
8188 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
8189 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
8190 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
8192 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
8193 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
8194 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
8195 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
8196 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
8197 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
8199 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
8200 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
8201 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
8202 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
8204 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
8205 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
8208 /* Rename all the TFmode libfuncs using the HPUX conventions. */
8210 static void
8211 ia64_hpux_init_libfuncs (void)
8213 ia64_init_libfuncs ();
8215 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
8216 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
8217 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
8219 /* ia64_expand_compare uses this. */
8220 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
8222 /* These should never be used. */
8223 set_optab_libfunc (eq_optab, TFmode, 0);
8224 set_optab_libfunc (ne_optab, TFmode, 0);
8225 set_optab_libfunc (gt_optab, TFmode, 0);
8226 set_optab_libfunc (ge_optab, TFmode, 0);
8227 set_optab_libfunc (lt_optab, TFmode, 0);
8228 set_optab_libfunc (le_optab, TFmode, 0);
8231 /* Rename the division and modulus functions in VMS. */
8233 static void
8234 ia64_vms_init_libfuncs (void)
8236 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
8237 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
8238 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
8239 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
8240 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
8241 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
8242 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
8243 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
8246 /* Rename the TFmode libfuncs available from soft-fp in glibc using
8247 the HPUX conventions. */
8249 static void
8250 ia64_sysv4_init_libfuncs (void)
8252 ia64_init_libfuncs ();
8254 /* These functions are not part of the HPUX TFmode interface. We
8255 use them instead of _U_Qfcmp, which doesn't work the way we
8256 expect. */
8257 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
8258 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
8259 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
8260 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
8261 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
8262 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
8264 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
8265 glibc doesn't have them. */
8268 /* Switch to the section to which we should output X. The only thing
8269 special we do here is to honor small data. */
8271 static void
8272 ia64_select_rtx_section (enum machine_mode mode, rtx x,
8273 unsigned HOST_WIDE_INT align)
8275 if (GET_MODE_SIZE (mode) > 0
8276 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8277 sdata_section ();
8278 else
8279 default_elf_select_rtx_section (mode, x, align);
8282 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8283 Pretend flag_pic is always set. */
8285 static void
8286 ia64_rwreloc_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
8288 default_elf_select_section_1 (exp, reloc, align, true);
8291 static void
8292 ia64_rwreloc_unique_section (tree decl, int reloc)
8294 default_unique_section_1 (decl, reloc, true);
8297 static void
8298 ia64_rwreloc_select_rtx_section (enum machine_mode mode, rtx x,
8299 unsigned HOST_WIDE_INT align)
8301 int save_pic = flag_pic;
8302 flag_pic = 1;
8303 ia64_select_rtx_section (mode, x, align);
8304 flag_pic = save_pic;
8307 #ifndef TARGET_RWRELOC
8308 #define TARGET_RWRELOC flag_pic
8309 #endif
8311 static unsigned int
8312 ia64_section_type_flags (tree decl, const char *name, int reloc)
8314 unsigned int flags = 0;
8316 if (strcmp (name, ".sdata") == 0
8317 || strncmp (name, ".sdata.", 7) == 0
8318 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
8319 || strncmp (name, ".sdata2.", 8) == 0
8320 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
8321 || strcmp (name, ".sbss") == 0
8322 || strncmp (name, ".sbss.", 6) == 0
8323 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
8324 flags = SECTION_SMALL;
8326 flags |= default_section_type_flags_1 (decl, name, reloc, TARGET_RWRELOC);
8327 return flags;
8330 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
8331 structure type and that the address of that type should be passed
8332 in out0, rather than in r8. */
8334 static bool
8335 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
8337 tree ret_type = TREE_TYPE (fntype);
8339 /* The Itanium C++ ABI requires that out0, rather than r8, be used
8340 as the structure return address parameter, if the return value
8341 type has a non-trivial copy constructor or destructor. It is not
8342 clear if this same convention should be used for other
8343 programming languages. Until G++ 3.4, we incorrectly used r8 for
8344 these return values. */
8345 return (abi_version_at_least (2)
8346 && ret_type
8347 && TYPE_MODE (ret_type) == BLKmode
8348 && TREE_ADDRESSABLE (ret_type)
8349 && strcmp (lang_hooks.name, "GNU C++") == 0);
8352 /* Output the assembler code for a thunk function. THUNK_DECL is the
8353 declaration for the thunk function itself, FUNCTION is the decl for
8354 the target function. DELTA is an immediate constant offset to be
8355 added to THIS. If VCALL_OFFSET is nonzero, the word at
8356 *(*this + vcall_offset) should be added to THIS. */
8358 static void
8359 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
8360 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8361 tree function)
8363 rtx this, insn, funexp;
8364 unsigned int this_parmno;
8365 unsigned int this_regno;
8367 reload_completed = 1;
8368 epilogue_completed = 1;
8369 no_new_pseudos = 1;
8370 reset_block_changes ();
8372 /* Set things up as ia64_expand_prologue might. */
8373 last_scratch_gr_reg = 15;
8375 memset (&current_frame_info, 0, sizeof (current_frame_info));
8376 current_frame_info.spill_cfa_off = -16;
8377 current_frame_info.n_input_regs = 1;
8378 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8380 /* Mark the end of the (empty) prologue. */
8381 emit_note (NOTE_INSN_PROLOGUE_END);
8383 /* Figure out whether "this" will be the first parameter (the
8384 typical case) or the second parameter (as happens when the
8385 virtual function returns certain class objects). */
8386 this_parmno
8387 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
8388 ? 1 : 0);
8389 this_regno = IN_REG (this_parmno);
8390 if (!TARGET_REG_NAMES)
8391 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
8393 this = gen_rtx_REG (Pmode, this_regno);
8394 if (TARGET_ILP32)
8396 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
8397 REG_POINTER (tmp) = 1;
8398 if (delta && CONST_OK_FOR_I (delta))
8400 emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
8401 delta = 0;
8403 else
8404 emit_insn (gen_ptr_extend (this, tmp));
8407 /* Apply the constant offset, if required. */
8408 if (delta)
8410 rtx delta_rtx = GEN_INT (delta);
8412 if (!CONST_OK_FOR_I (delta))
8414 rtx tmp = gen_rtx_REG (Pmode, 2);
8415 emit_move_insn (tmp, delta_rtx);
8416 delta_rtx = tmp;
8418 emit_insn (gen_adddi3 (this, this, delta_rtx));
8421 /* Apply the offset from the vtable, if required. */
8422 if (vcall_offset)
8424 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8425 rtx tmp = gen_rtx_REG (Pmode, 2);
8427 if (TARGET_ILP32)
8429 rtx t = gen_rtx_REG (ptr_mode, 2);
8430 REG_POINTER (t) = 1;
8431 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
8432 if (CONST_OK_FOR_I (vcall_offset))
8434 emit_insn (gen_ptr_extend_plus_imm (tmp, t,
8435 vcall_offset_rtx));
8436 vcall_offset = 0;
8438 else
8439 emit_insn (gen_ptr_extend (tmp, t));
8441 else
8442 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8444 if (vcall_offset)
8446 if (!CONST_OK_FOR_J (vcall_offset))
8448 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8449 emit_move_insn (tmp2, vcall_offset_rtx);
8450 vcall_offset_rtx = tmp2;
8452 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8455 if (TARGET_ILP32)
8456 emit_move_insn (gen_rtx_REG (ptr_mode, 2),
8457 gen_rtx_MEM (ptr_mode, tmp));
8458 else
8459 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8461 emit_insn (gen_adddi3 (this, this, tmp));
8464 /* Generate a tail call to the target function. */
8465 if (! TREE_USED (function))
8467 assemble_external (function);
8468 TREE_USED (function) = 1;
8470 funexp = XEXP (DECL_RTL (function), 0);
8471 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8472 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8473 insn = get_last_insn ();
8474 SIBLING_CALL_P (insn) = 1;
8476 /* Code generation for calls relies on splitting. */
8477 reload_completed = 1;
8478 epilogue_completed = 1;
8479 try_split (PATTERN (insn), insn, 0);
8481 emit_barrier ();
8483 /* Run just enough of rest_of_compilation to get the insns emitted.
8484 There's not really enough bulk here to make other passes such as
8485 instruction scheduling worth while. Note that use_thunk calls
8486 assemble_start_function and assemble_end_function. */
8488 insn_locators_initialize ();
8489 emit_all_insn_group_barriers (NULL);
8490 insn = get_insns ();
8491 shorten_branches (insn);
8492 final_start_function (insn, file, 1);
8493 final (insn, file, 1);
8494 final_end_function ();
8496 reload_completed = 0;
8497 epilogue_completed = 0;
8498 no_new_pseudos = 0;
8501 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
8503 static rtx
8504 ia64_struct_value_rtx (tree fntype,
8505 int incoming ATTRIBUTE_UNUSED)
8507 if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
8508 return NULL_RTX;
8509 return gen_rtx_REG (Pmode, GR_REG (8));
8512 static bool
8513 ia64_scalar_mode_supported_p (enum machine_mode mode)
8515 switch (mode)
8517 case QImode:
8518 case HImode:
8519 case SImode:
8520 case DImode:
8521 case TImode:
8522 return true;
8524 case SFmode:
8525 case DFmode:
8526 case XFmode:
8527 return true;
8529 case TFmode:
8530 return TARGET_HPUX;
8532 default:
8533 return false;
8537 static bool
8538 ia64_vector_mode_supported_p (enum machine_mode mode)
8540 switch (mode)
8542 case V8QImode:
8543 case V4HImode:
8544 case V2SImode:
8545 return true;
8547 case V2SFmode:
8548 return true;
8550 default:
8551 return false;
8555 #include "gt-ia64.h"