2005-03-31 Vladimir Makarov <vmakarov@redhat.com>
[official-gcc.git] / gcc / config / ia64 / ia64.c
blobfef372983646f426570c99301f3130a7077c7b69
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson <wilson@cygnus.com> and
5 David Mosberger <davidm@hpl.hp.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "regs.h"
31 #include "hard-reg-set.h"
32 #include "real.h"
33 #include "insn-config.h"
34 #include "conditions.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "recog.h"
39 #include "expr.h"
40 #include "optabs.h"
41 #include "except.h"
42 #include "function.h"
43 #include "ggc.h"
44 #include "basic-block.h"
45 #include "toplev.h"
46 #include "sched-int.h"
47 #include "timevar.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "tm_p.h"
51 #include "hashtab.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
54 #include "tree-gimple.h"
56 /* This is used for communication between ASM_OUTPUT_LABEL and
57 ASM_OUTPUT_LABELREF. */
58 int ia64_asm_output_label = 0;
60 /* Define the information needed to generate branch and scc insns. This is
61 stored from the compare operation. */
62 struct rtx_def * ia64_compare_op0;
63 struct rtx_def * ia64_compare_op1;
65 /* Register names for ia64_expand_prologue. */
66 static const char * const ia64_reg_numbers[96] =
67 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
68 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
69 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
70 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
71 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
72 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
73 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
74 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
75 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
76 "r104","r105","r106","r107","r108","r109","r110","r111",
77 "r112","r113","r114","r115","r116","r117","r118","r119",
78 "r120","r121","r122","r123","r124","r125","r126","r127"};
80 /* ??? These strings could be shared with REGISTER_NAMES. */
81 static const char * const ia64_input_reg_names[8] =
82 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
84 /* ??? These strings could be shared with REGISTER_NAMES. */
85 static const char * const ia64_local_reg_names[80] =
86 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
87 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
88 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
89 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
90 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
91 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
92 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
93 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
94 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
95 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
97 /* ??? These strings could be shared with REGISTER_NAMES. */
98 static const char * const ia64_output_reg_names[8] =
99 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
101 /* Determines whether we use adds, addl, or movl to generate our
102 TLS immediate offsets. */
103 int ia64_tls_size = 22;
105 /* Which cpu are we scheduling for. */
106 enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
108 /* Determines whether we run our final scheduling pass or not. We always
109 avoid the normal second scheduling pass. */
110 static int ia64_flag_schedule_insns2;
112 /* Determines whether we run variable tracking in machine dependent
113 reorganization. */
114 static int ia64_flag_var_tracking;
116 /* Variables which are this size or smaller are put in the sdata/sbss
117 sections. */
119 unsigned int ia64_section_threshold;
121 /* The following variable is used by the DFA insn scheduler. The value is
122 TRUE if we do insn bundling instead of insn scheduling. */
123 int bundling_p = 0;
125 /* Structure to be filled in by ia64_compute_frame_size with register
126 save masks and offsets for the current function. */
128 struct ia64_frame_info
130 HOST_WIDE_INT total_size; /* size of the stack frame, not including
131 the caller's scratch area. */
132 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
133 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
134 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
135 HARD_REG_SET mask; /* mask of saved registers. */
136 unsigned int gr_used_mask; /* mask of registers in use as gr spill
137 registers or long-term scratches. */
138 int n_spilled; /* number of spilled registers. */
139 int reg_fp; /* register for fp. */
140 int reg_save_b0; /* save register for b0. */
141 int reg_save_pr; /* save register for prs. */
142 int reg_save_ar_pfs; /* save register for ar.pfs. */
143 int reg_save_ar_unat; /* save register for ar.unat. */
144 int reg_save_ar_lc; /* save register for ar.lc. */
145 int reg_save_gp; /* save register for gp. */
146 int n_input_regs; /* number of input registers used. */
147 int n_local_regs; /* number of local registers used. */
148 int n_output_regs; /* number of output registers used. */
149 int n_rotate_regs; /* number of rotating registers used. */
151 char need_regstk; /* true if a .regstk directive needed. */
152 char initialized; /* true if the data is finalized. */
155 /* Current frame information calculated by ia64_compute_frame_size. */
156 static struct ia64_frame_info current_frame_info;
158 static int ia64_first_cycle_multipass_dfa_lookahead (void);
159 static void ia64_dependencies_evaluation_hook (rtx, rtx);
160 static void ia64_init_dfa_pre_cycle_insn (void);
161 static rtx ia64_dfa_pre_cycle_insn (void);
162 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
163 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
164 static rtx gen_tls_get_addr (void);
165 static rtx gen_thread_pointer (void);
166 static rtx ia64_expand_tls_address (enum tls_model, rtx, rtx);
167 static int find_gr_spill (int);
168 static int next_scratch_gr_reg (void);
169 static void mark_reg_gr_used_mask (rtx, void *);
170 static void ia64_compute_frame_size (HOST_WIDE_INT);
171 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
172 static void finish_spill_pointers (void);
173 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
174 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
175 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
176 static rtx gen_movdi_x (rtx, rtx, rtx);
177 static rtx gen_fr_spill_x (rtx, rtx, rtx);
178 static rtx gen_fr_restore_x (rtx, rtx, rtx);
180 static enum machine_mode hfa_element_mode (tree, bool);
181 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
182 tree, int *, int);
183 static bool ia64_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
184 tree, bool);
185 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
186 tree, bool);
187 static bool ia64_function_ok_for_sibcall (tree, tree);
188 static bool ia64_return_in_memory (tree, tree);
189 static bool ia64_rtx_costs (rtx, int, int, int *);
190 static void fix_range (const char *);
191 static bool ia64_handle_option (size_t, const char *, int);
192 static struct machine_function * ia64_init_machine_status (void);
193 static void emit_insn_group_barriers (FILE *);
194 static void emit_all_insn_group_barriers (FILE *);
195 static void final_emit_insn_group_barriers (FILE *);
196 static void emit_predicate_relation_info (void);
197 static void ia64_reorg (void);
198 static bool ia64_in_small_data_p (tree);
199 static void process_epilogue (void);
200 static int process_set (FILE *, rtx);
202 static rtx ia64_expand_fetch_and_op (optab, enum machine_mode, tree, rtx);
203 static rtx ia64_expand_op_and_fetch (optab, enum machine_mode, tree, rtx);
204 static rtx ia64_expand_compare_and_swap (enum machine_mode, enum machine_mode,
205 int, tree, rtx);
206 static rtx ia64_expand_lock_test_and_set (enum machine_mode, tree, rtx);
207 static rtx ia64_expand_lock_release (enum machine_mode, tree, rtx);
208 static bool ia64_assemble_integer (rtx, unsigned int, int);
209 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
210 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
211 static void ia64_output_function_end_prologue (FILE *);
213 static int ia64_issue_rate (void);
214 static int ia64_adjust_cost (rtx, rtx, rtx, int);
215 static void ia64_sched_init (FILE *, int, int);
216 static void ia64_sched_finish (FILE *, int);
217 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
218 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
219 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
220 static int ia64_variable_issue (FILE *, int, rtx, int);
222 static struct bundle_state *get_free_bundle_state (void);
223 static void free_bundle_state (struct bundle_state *);
224 static void initiate_bundle_states (void);
225 static void finish_bundle_states (void);
226 static unsigned bundle_state_hash (const void *);
227 static int bundle_state_eq_p (const void *, const void *);
228 static int insert_bundle_state (struct bundle_state *);
229 static void initiate_bundle_state_table (void);
230 static void finish_bundle_state_table (void);
231 static int try_issue_nops (struct bundle_state *, int);
232 static int try_issue_insn (struct bundle_state *, rtx);
233 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
234 static int get_max_pos (state_t);
235 static int get_template (state_t, int);
237 static rtx get_next_important_insn (rtx, rtx);
238 static void bundling (FILE *, int, rtx, rtx);
240 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
241 HOST_WIDE_INT, tree);
242 static void ia64_file_start (void);
244 static void ia64_select_rtx_section (enum machine_mode, rtx,
245 unsigned HOST_WIDE_INT);
246 static void ia64_rwreloc_select_section (tree, int, unsigned HOST_WIDE_INT)
247 ATTRIBUTE_UNUSED;
248 static void ia64_rwreloc_unique_section (tree, int)
249 ATTRIBUTE_UNUSED;
250 static void ia64_rwreloc_select_rtx_section (enum machine_mode, rtx,
251 unsigned HOST_WIDE_INT)
252 ATTRIBUTE_UNUSED;
253 static unsigned int ia64_section_type_flags (tree, const char *, int);
254 static void ia64_hpux_add_extern_decl (tree decl)
255 ATTRIBUTE_UNUSED;
256 static void ia64_hpux_file_end (void)
257 ATTRIBUTE_UNUSED;
258 static void ia64_init_libfuncs (void)
259 ATTRIBUTE_UNUSED;
260 static void ia64_hpux_init_libfuncs (void)
261 ATTRIBUTE_UNUSED;
262 static void ia64_sysv4_init_libfuncs (void)
263 ATTRIBUTE_UNUSED;
264 static void ia64_vms_init_libfuncs (void)
265 ATTRIBUTE_UNUSED;
267 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
268 static void ia64_encode_section_info (tree, rtx, int);
269 static rtx ia64_struct_value_rtx (tree, int);
270 static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
271 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
272 static bool ia64_vector_mode_supported_p (enum machine_mode mode);
275 /* Table of valid machine attributes. */
276 static const struct attribute_spec ia64_attribute_table[] =
278 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
279 { "syscall_linkage", 0, 0, false, true, true, NULL },
280 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
281 { NULL, 0, 0, false, false, false, NULL }
284 /* Initialize the GCC target structure. */
285 #undef TARGET_ATTRIBUTE_TABLE
286 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
288 #undef TARGET_INIT_BUILTINS
289 #define TARGET_INIT_BUILTINS ia64_init_builtins
291 #undef TARGET_EXPAND_BUILTIN
292 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
294 #undef TARGET_ASM_BYTE_OP
295 #define TARGET_ASM_BYTE_OP "\tdata1\t"
296 #undef TARGET_ASM_ALIGNED_HI_OP
297 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
298 #undef TARGET_ASM_ALIGNED_SI_OP
299 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
300 #undef TARGET_ASM_ALIGNED_DI_OP
301 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
302 #undef TARGET_ASM_UNALIGNED_HI_OP
303 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
304 #undef TARGET_ASM_UNALIGNED_SI_OP
305 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
306 #undef TARGET_ASM_UNALIGNED_DI_OP
307 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
308 #undef TARGET_ASM_INTEGER
309 #define TARGET_ASM_INTEGER ia64_assemble_integer
311 #undef TARGET_ASM_FUNCTION_PROLOGUE
312 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
313 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
314 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
315 #undef TARGET_ASM_FUNCTION_EPILOGUE
316 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
318 #undef TARGET_IN_SMALL_DATA_P
319 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
321 #undef TARGET_SCHED_ADJUST_COST
322 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
323 #undef TARGET_SCHED_ISSUE_RATE
324 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
325 #undef TARGET_SCHED_VARIABLE_ISSUE
326 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
327 #undef TARGET_SCHED_INIT
328 #define TARGET_SCHED_INIT ia64_sched_init
329 #undef TARGET_SCHED_FINISH
330 #define TARGET_SCHED_FINISH ia64_sched_finish
331 #undef TARGET_SCHED_REORDER
332 #define TARGET_SCHED_REORDER ia64_sched_reorder
333 #undef TARGET_SCHED_REORDER2
334 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
336 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
337 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
339 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
340 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
342 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
343 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
344 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
345 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
347 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
348 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
349 ia64_first_cycle_multipass_dfa_lookahead_guard
351 #undef TARGET_SCHED_DFA_NEW_CYCLE
352 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
354 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
355 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
356 #undef TARGET_PASS_BY_REFERENCE
357 #define TARGET_PASS_BY_REFERENCE ia64_pass_by_reference
358 #undef TARGET_ARG_PARTIAL_BYTES
359 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
361 #undef TARGET_ASM_OUTPUT_MI_THUNK
362 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
363 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
364 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
366 #undef TARGET_ASM_FILE_START
367 #define TARGET_ASM_FILE_START ia64_file_start
369 #undef TARGET_RTX_COSTS
370 #define TARGET_RTX_COSTS ia64_rtx_costs
371 #undef TARGET_ADDRESS_COST
372 #define TARGET_ADDRESS_COST hook_int_rtx_0
374 #undef TARGET_MACHINE_DEPENDENT_REORG
375 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
377 #undef TARGET_ENCODE_SECTION_INFO
378 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
380 #undef TARGET_SECTION_TYPE_FLAGS
381 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
383 /* ??? ABI doesn't allow us to define this. */
384 #if 0
385 #undef TARGET_PROMOTE_FUNCTION_ARGS
386 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
387 #endif
389 /* ??? ABI doesn't allow us to define this. */
390 #if 0
391 #undef TARGET_PROMOTE_FUNCTION_RETURN
392 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
393 #endif
395 /* ??? Investigate. */
396 #if 0
397 #undef TARGET_PROMOTE_PROTOTYPES
398 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
399 #endif
401 #undef TARGET_STRUCT_VALUE_RTX
402 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
403 #undef TARGET_RETURN_IN_MEMORY
404 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
405 #undef TARGET_SETUP_INCOMING_VARARGS
406 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
407 #undef TARGET_STRICT_ARGUMENT_NAMING
408 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
409 #undef TARGET_MUST_PASS_IN_STACK
410 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
412 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
413 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
415 #undef TARGET_UNWIND_EMIT
416 #define TARGET_UNWIND_EMIT process_for_unwind_directive
418 #undef TARGET_SCALAR_MODE_SUPPORTED_P
419 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
420 #undef TARGET_VECTOR_MODE_SUPPORTED_P
421 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
423 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
424 in an order different from the specified program order. */
425 #undef TARGET_RELAXED_ORDERING
426 #define TARGET_RELAXED_ORDERING true
428 #undef TARGET_DEFAULT_TARGET_FLAGS
429 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
430 #undef TARGET_HANDLE_OPTION
431 #define TARGET_HANDLE_OPTION ia64_handle_option
433 struct gcc_target targetm = TARGET_INITIALIZER;
435 typedef enum
437 ADDR_AREA_NORMAL, /* normal address area */
438 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
440 ia64_addr_area;
442 static GTY(()) tree small_ident1;
443 static GTY(()) tree small_ident2;
445 static void
446 init_idents (void)
448 if (small_ident1 == 0)
450 small_ident1 = get_identifier ("small");
451 small_ident2 = get_identifier ("__small__");
455 /* Retrieve the address area that has been chosen for the given decl. */
457 static ia64_addr_area
458 ia64_get_addr_area (tree decl)
460 tree model_attr;
462 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
463 if (model_attr)
465 tree id;
467 init_idents ();
468 id = TREE_VALUE (TREE_VALUE (model_attr));
469 if (id == small_ident1 || id == small_ident2)
470 return ADDR_AREA_SMALL;
472 return ADDR_AREA_NORMAL;
475 static tree
476 ia64_handle_model_attribute (tree *node, tree name, tree args,
477 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
479 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
480 ia64_addr_area area;
481 tree arg, decl = *node;
483 init_idents ();
484 arg = TREE_VALUE (args);
485 if (arg == small_ident1 || arg == small_ident2)
487 addr_area = ADDR_AREA_SMALL;
489 else
491 warning ("invalid argument of %qs attribute",
492 IDENTIFIER_POINTER (name));
493 *no_add_attrs = true;
496 switch (TREE_CODE (decl))
498 case VAR_DECL:
499 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
500 == FUNCTION_DECL)
501 && !TREE_STATIC (decl))
503 error ("%Jan address area attribute cannot be specified for "
504 "local variables", decl, decl);
505 *no_add_attrs = true;
507 area = ia64_get_addr_area (decl);
508 if (area != ADDR_AREA_NORMAL && addr_area != area)
510 error ("%Jaddress area of '%s' conflicts with previous "
511 "declaration", decl, decl);
512 *no_add_attrs = true;
514 break;
516 case FUNCTION_DECL:
517 error ("%Jaddress area attribute cannot be specified for functions",
518 decl, decl);
519 *no_add_attrs = true;
520 break;
522 default:
523 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
524 *no_add_attrs = true;
525 break;
528 return NULL_TREE;
531 static void
532 ia64_encode_addr_area (tree decl, rtx symbol)
534 int flags;
536 flags = SYMBOL_REF_FLAGS (symbol);
537 switch (ia64_get_addr_area (decl))
539 case ADDR_AREA_NORMAL: break;
540 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
541 default: abort ();
543 SYMBOL_REF_FLAGS (symbol) = flags;
546 static void
547 ia64_encode_section_info (tree decl, rtx rtl, int first)
549 default_encode_section_info (decl, rtl, first);
551 /* Careful not to prod global register variables. */
552 if (TREE_CODE (decl) == VAR_DECL
553 && GET_CODE (DECL_RTL (decl)) == MEM
554 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
555 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
556 ia64_encode_addr_area (decl, XEXP (rtl, 0));
559 /* Implement CONST_OK_FOR_LETTER_P. */
561 bool
562 ia64_const_ok_for_letter_p (HOST_WIDE_INT value, char c)
564 switch (c)
566 case 'I':
567 return CONST_OK_FOR_I (value);
568 case 'J':
569 return CONST_OK_FOR_J (value);
570 case 'K':
571 return CONST_OK_FOR_K (value);
572 case 'L':
573 return CONST_OK_FOR_L (value);
574 case 'M':
575 return CONST_OK_FOR_M (value);
576 case 'N':
577 return CONST_OK_FOR_N (value);
578 case 'O':
579 return CONST_OK_FOR_O (value);
580 case 'P':
581 return CONST_OK_FOR_P (value);
582 default:
583 return false;
587 /* Implement CONST_DOUBLE_OK_FOR_LETTER_P. */
589 bool
590 ia64_const_double_ok_for_letter_p (rtx value, char c)
592 switch (c)
594 case 'G':
595 return CONST_DOUBLE_OK_FOR_G (value);
596 default:
597 return false;
601 /* Implement EXTRA_CONSTRAINT. */
603 bool
604 ia64_extra_constraint (rtx value, char c)
606 switch (c)
608 case 'Q':
609 /* Non-volatile memory for FP_REG loads/stores. */
610 return memory_operand(value, VOIDmode) && !MEM_VOLATILE_P (value);
612 case 'R':
613 /* 1..4 for shladd arguments. */
614 return (GET_CODE (value) == CONST_INT
615 && INTVAL (value) >= 1 && INTVAL (value) <= 4);
617 case 'S':
618 /* Non-post-inc memory for asms and other unsavory creatures. */
619 return (GET_CODE (value) == MEM
620 && GET_RTX_CLASS (GET_CODE (XEXP (value, 0))) != RTX_AUTOINC
621 && (reload_in_progress || memory_operand (value, VOIDmode)));
623 case 'T':
624 /* Symbol ref to small-address-area. */
625 return (GET_CODE (value) == SYMBOL_REF
626 && SYMBOL_REF_SMALL_ADDR_P (value));
628 case 'U':
629 /* Vector zero. */
630 return value == CONST0_RTX (GET_MODE (value));
632 case 'W':
633 /* An integer vector, such that conversion to an integer yields a
634 value appropriate for an integer 'J' constraint. */
635 if (GET_CODE (value) == CONST_VECTOR
636 && GET_MODE_CLASS (GET_MODE (value)) == MODE_VECTOR_INT)
638 value = simplify_subreg (DImode, value, GET_MODE (value), 0);
639 return ia64_const_ok_for_letter_p (INTVAL (value), 'J');
641 return false;
643 case 'Y':
644 /* A V2SF vector containing elements that satisfy 'G'. */
645 return
646 (GET_CODE (value) == CONST_VECTOR
647 && GET_MODE (value) == V2SFmode
648 && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 0), 'G')
649 && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 1), 'G'));
651 default:
652 return false;
656 /* Return 1 if the operands of a move are ok. */
659 ia64_move_ok (rtx dst, rtx src)
661 /* If we're under init_recog_no_volatile, we'll not be able to use
662 memory_operand. So check the code directly and don't worry about
663 the validity of the underlying address, which should have been
664 checked elsewhere anyway. */
665 if (GET_CODE (dst) != MEM)
666 return 1;
667 if (GET_CODE (src) == MEM)
668 return 0;
669 if (register_operand (src, VOIDmode))
670 return 1;
672 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
673 if (INTEGRAL_MODE_P (GET_MODE (dst)))
674 return src == const0_rtx;
675 else
676 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
680 addp4_optimize_ok (rtx op1, rtx op2)
682 return (basereg_operand (op1, GET_MODE(op1)) !=
683 basereg_operand (op2, GET_MODE(op2)));
686 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
687 Return the length of the field, or <= 0 on failure. */
690 ia64_depz_field_mask (rtx rop, rtx rshift)
692 unsigned HOST_WIDE_INT op = INTVAL (rop);
693 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
695 /* Get rid of the zero bits we're shifting in. */
696 op >>= shift;
698 /* We must now have a solid block of 1's at bit 0. */
699 return exact_log2 (op + 1);
702 /* Expand a symbolic constant load. */
704 void
705 ia64_expand_load_address (rtx dest, rtx src)
707 if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (src))
708 abort ();
709 if (GET_CODE (dest) != REG)
710 abort ();
712 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
713 having to pointer-extend the value afterward. Other forms of address
714 computation below are also more natural to compute as 64-bit quantities.
715 If we've been given an SImode destination register, change it. */
716 if (GET_MODE (dest) != Pmode)
717 dest = gen_rtx_REG (Pmode, REGNO (dest));
719 if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_SMALL_ADDR_P (src))
721 emit_insn (gen_rtx_SET (VOIDmode, dest, src));
722 return;
724 else if (TARGET_AUTO_PIC)
726 emit_insn (gen_load_gprel64 (dest, src));
727 return;
729 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
731 emit_insn (gen_load_fptr (dest, src));
732 return;
734 else if (sdata_symbolic_operand (src, VOIDmode))
736 emit_insn (gen_load_gprel (dest, src));
737 return;
740 if (GET_CODE (src) == CONST
741 && GET_CODE (XEXP (src, 0)) == PLUS
742 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
743 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x3fff) != 0)
745 rtx sym = XEXP (XEXP (src, 0), 0);
746 HOST_WIDE_INT ofs, hi, lo;
748 /* Split the offset into a sign extended 14-bit low part
749 and a complementary high part. */
750 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
751 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
752 hi = ofs - lo;
754 ia64_expand_load_address (dest, plus_constant (sym, hi));
755 emit_insn (gen_adddi3 (dest, dest, GEN_INT (lo)));
757 else
759 rtx tmp;
761 tmp = gen_rtx_HIGH (Pmode, src);
762 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
763 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
765 tmp = gen_rtx_LO_SUM (GET_MODE (dest), dest, src);
766 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
770 static GTY(()) rtx gen_tls_tga;
771 static rtx
772 gen_tls_get_addr (void)
774 if (!gen_tls_tga)
775 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
776 return gen_tls_tga;
779 static GTY(()) rtx thread_pointer_rtx;
780 static rtx
781 gen_thread_pointer (void)
783 if (!thread_pointer_rtx)
784 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
785 return thread_pointer_rtx;
788 static rtx
789 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1)
791 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
792 rtx orig_op0 = op0;
794 switch (tls_kind)
796 case TLS_MODEL_GLOBAL_DYNAMIC:
797 start_sequence ();
799 tga_op1 = gen_reg_rtx (Pmode);
800 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
801 tga_op1 = gen_const_mem (Pmode, tga_op1);
803 tga_op2 = gen_reg_rtx (Pmode);
804 emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
805 tga_op2 = gen_const_mem (Pmode, tga_op2);
807 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
808 LCT_CONST, Pmode, 2, tga_op1,
809 Pmode, tga_op2, Pmode);
811 insns = get_insns ();
812 end_sequence ();
814 if (GET_MODE (op0) != Pmode)
815 op0 = tga_ret;
816 emit_libcall_block (insns, op0, tga_ret, op1);
817 break;
819 case TLS_MODEL_LOCAL_DYNAMIC:
820 /* ??? This isn't the completely proper way to do local-dynamic
821 If the call to __tls_get_addr is used only by a single symbol,
822 then we should (somehow) move the dtprel to the second arg
823 to avoid the extra add. */
824 start_sequence ();
826 tga_op1 = gen_reg_rtx (Pmode);
827 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
828 tga_op1 = gen_const_mem (Pmode, tga_op1);
830 tga_op2 = const0_rtx;
832 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
833 LCT_CONST, Pmode, 2, tga_op1,
834 Pmode, tga_op2, Pmode);
836 insns = get_insns ();
837 end_sequence ();
839 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
840 UNSPEC_LD_BASE);
841 tmp = gen_reg_rtx (Pmode);
842 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
844 if (!register_operand (op0, Pmode))
845 op0 = gen_reg_rtx (Pmode);
846 if (TARGET_TLS64)
848 emit_insn (gen_load_dtprel (op0, op1));
849 emit_insn (gen_adddi3 (op0, tmp, op0));
851 else
852 emit_insn (gen_add_dtprel (op0, tmp, op1));
853 break;
855 case TLS_MODEL_INITIAL_EXEC:
856 tmp = gen_reg_rtx (Pmode);
857 emit_insn (gen_load_ltoff_tprel (tmp, op1));
858 tmp = gen_const_mem (Pmode, tmp);
859 tmp = force_reg (Pmode, tmp);
861 if (!register_operand (op0, Pmode))
862 op0 = gen_reg_rtx (Pmode);
863 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
864 break;
866 case TLS_MODEL_LOCAL_EXEC:
867 if (!register_operand (op0, Pmode))
868 op0 = gen_reg_rtx (Pmode);
869 if (TARGET_TLS64)
871 emit_insn (gen_load_tprel (op0, op1));
872 emit_insn (gen_adddi3 (op0, gen_thread_pointer (), op0));
874 else
875 emit_insn (gen_add_tprel (op0, gen_thread_pointer (), op1));
876 break;
878 default:
879 abort ();
882 if (orig_op0 == op0)
883 return NULL_RTX;
884 if (GET_MODE (orig_op0) == Pmode)
885 return op0;
886 return gen_lowpart (GET_MODE (orig_op0), op0);
890 ia64_expand_move (rtx op0, rtx op1)
892 enum machine_mode mode = GET_MODE (op0);
894 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
895 op1 = force_reg (mode, op1);
897 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
899 enum tls_model tls_kind;
900 if (GET_CODE (op1) == SYMBOL_REF
901 && (tls_kind = SYMBOL_REF_TLS_MODEL (op1)))
902 return ia64_expand_tls_address (tls_kind, op0, op1);
904 if (!TARGET_NO_PIC && reload_completed)
906 ia64_expand_load_address (op0, op1);
907 return NULL_RTX;
911 return op1;
914 /* Split a move from OP1 to OP0 conditional on COND. */
916 void
917 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
919 rtx insn, first = get_last_insn ();
921 emit_move_insn (op0, op1);
923 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
924 if (INSN_P (insn))
925 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
926 PATTERN (insn));
929 /* Split a post-reload TImode or TFmode reference into two DImode
930 components. This is made extra difficult by the fact that we do
931 not get any scratch registers to work with, because reload cannot
932 be prevented from giving us a scratch that overlaps the register
933 pair involved. So instead, when addressing memory, we tweak the
934 pointer register up and back down with POST_INCs. Or up and not
935 back down when we can get away with it.
937 REVERSED is true when the loads must be done in reversed order
938 (high word first) for correctness. DEAD is true when the pointer
939 dies with the second insn we generate and therefore the second
940 address must not carry a postmodify.
942 May return an insn which is to be emitted after the moves. */
944 static rtx
945 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
947 rtx fixup = 0;
949 switch (GET_CODE (in))
951 case REG:
952 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
953 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
954 break;
956 case CONST_INT:
957 case CONST_DOUBLE:
958 /* Cannot occur reversed. */
959 if (reversed) abort ();
961 if (GET_MODE (in) != TFmode)
962 split_double (in, &out[0], &out[1]);
963 else
964 /* split_double does not understand how to split a TFmode
965 quantity into a pair of DImode constants. */
967 REAL_VALUE_TYPE r;
968 unsigned HOST_WIDE_INT p[2];
969 long l[4]; /* TFmode is 128 bits */
971 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
972 real_to_target (l, &r, TFmode);
974 if (FLOAT_WORDS_BIG_ENDIAN)
976 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
977 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
979 else
981 p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
982 p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
984 out[0] = GEN_INT (p[0]);
985 out[1] = GEN_INT (p[1]);
987 break;
989 case MEM:
991 rtx base = XEXP (in, 0);
992 rtx offset;
994 switch (GET_CODE (base))
996 case REG:
997 if (!reversed)
999 out[0] = adjust_automodify_address
1000 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1001 out[1] = adjust_automodify_address
1002 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1004 else
1006 /* Reversal requires a pre-increment, which can only
1007 be done as a separate insn. */
1008 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1009 out[0] = adjust_automodify_address
1010 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1011 out[1] = adjust_address (in, DImode, 0);
1013 break;
1015 case POST_INC:
1016 if (reversed || dead) abort ();
1017 /* Just do the increment in two steps. */
1018 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1019 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1020 break;
1022 case POST_DEC:
1023 if (reversed || dead) abort ();
1024 /* Add 8, subtract 24. */
1025 base = XEXP (base, 0);
1026 out[0] = adjust_automodify_address
1027 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1028 out[1] = adjust_automodify_address
1029 (in, DImode,
1030 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1032 break;
1034 case POST_MODIFY:
1035 if (reversed || dead) abort ();
1036 /* Extract and adjust the modification. This case is
1037 trickier than the others, because we might have an
1038 index register, or we might have a combined offset that
1039 doesn't fit a signed 9-bit displacement field. We can
1040 assume the incoming expression is already legitimate. */
1041 offset = XEXP (base, 1);
1042 base = XEXP (base, 0);
1044 out[0] = adjust_automodify_address
1045 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1047 if (GET_CODE (XEXP (offset, 1)) == REG)
1049 /* Can't adjust the postmodify to match. Emit the
1050 original, then a separate addition insn. */
1051 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1052 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1054 else if (GET_CODE (XEXP (offset, 1)) != CONST_INT)
1055 abort ();
1056 else if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1058 /* Again the postmodify cannot be made to match, but
1059 in this case it's more efficient to get rid of the
1060 postmodify entirely and fix up with an add insn. */
1061 out[1] = adjust_automodify_address (in, DImode, base, 8);
1062 fixup = gen_adddi3 (base, base,
1063 GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1065 else
1067 /* Combined offset still fits in the displacement field.
1068 (We cannot overflow it at the high end.) */
1069 out[1] = adjust_automodify_address
1070 (in, DImode,
1071 gen_rtx_POST_MODIFY (Pmode, base,
1072 gen_rtx_PLUS (Pmode, base,
1073 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1076 break;
1078 default:
1079 abort ();
1081 break;
1084 default:
1085 abort ();
1088 return fixup;
1091 /* Split a TImode or TFmode move instruction after reload.
1092 This is used by *movtf_internal and *movti_internal. */
1093 void
1094 ia64_split_tmode_move (rtx operands[])
1096 rtx in[2], out[2], insn;
1097 rtx fixup[2];
1098 bool dead = false;
1099 bool reversed = false;
1101 /* It is possible for reload to decide to overwrite a pointer with
1102 the value it points to. In that case we have to do the loads in
1103 the appropriate order so that the pointer is not destroyed too
1104 early. Also we must not generate a postmodify for that second
1105 load, or rws_access_regno will abort. */
1106 if (GET_CODE (operands[1]) == MEM
1107 && reg_overlap_mentioned_p (operands[0], operands[1]))
1109 rtx base = XEXP (operands[1], 0);
1110 while (GET_CODE (base) != REG)
1111 base = XEXP (base, 0);
1113 if (REGNO (base) == REGNO (operands[0]))
1114 reversed = true;
1115 dead = true;
1117 /* Another reason to do the moves in reversed order is if the first
1118 element of the target register pair is also the second element of
1119 the source register pair. */
1120 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1121 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1122 reversed = true;
1124 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1125 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1127 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1128 if (GET_CODE (EXP) == MEM \
1129 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1130 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1131 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1132 REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
1133 XEXP (XEXP (EXP, 0), 0), \
1134 REG_NOTES (INSN))
1136 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1137 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1138 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1140 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1141 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1142 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1144 if (fixup[0])
1145 emit_insn (fixup[0]);
1146 if (fixup[1])
1147 emit_insn (fixup[1]);
1149 #undef MAYBE_ADD_REG_INC_NOTE
1152 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1153 through memory plus an extra GR scratch register. Except that you can
1154 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1155 SECONDARY_RELOAD_CLASS, but not both.
1157 We got into problems in the first place by allowing a construct like
1158 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1159 This solution attempts to prevent this situation from occurring. When
1160 we see something like the above, we spill the inner register to memory. */
1163 spill_xfmode_operand (rtx in, int force)
1165 if (GET_CODE (in) == SUBREG
1166 && GET_MODE (SUBREG_REG (in)) == TImode
1167 && GET_CODE (SUBREG_REG (in)) == REG)
1169 rtx memt = assign_stack_temp (TImode, 16, 0);
1170 emit_move_insn (memt, SUBREG_REG (in));
1171 return adjust_address (memt, XFmode, 0);
1173 else if (force && GET_CODE (in) == REG)
1175 rtx memx = assign_stack_temp (XFmode, 16, 0);
1176 emit_move_insn (memx, in);
1177 return memx;
1179 else
1180 return in;
1183 /* Emit comparison instruction if necessary, returning the expression
1184 that holds the compare result in the proper mode. */
1186 static GTY(()) rtx cmptf_libfunc;
1189 ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1191 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1192 rtx cmp;
1194 /* If we have a BImode input, then we already have a compare result, and
1195 do not need to emit another comparison. */
1196 if (GET_MODE (op0) == BImode)
1198 if ((code == NE || code == EQ) && op1 == const0_rtx)
1199 cmp = op0;
1200 else
1201 abort ();
1203 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1204 magic number as its third argument, that indicates what to do.
1205 The return value is an integer to be compared against zero. */
1206 else if (GET_MODE (op0) == TFmode)
1208 enum qfcmp_magic {
1209 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1210 QCMP_UNORD = 2,
1211 QCMP_EQ = 4,
1212 QCMP_LT = 8,
1213 QCMP_GT = 16
1214 } magic;
1215 enum rtx_code ncode;
1216 rtx ret, insns;
1217 if (!cmptf_libfunc || GET_MODE (op1) != TFmode)
1218 abort ();
1219 switch (code)
1221 /* 1 = equal, 0 = not equal. Equality operators do
1222 not raise FP_INVALID when given an SNaN operand. */
1223 case EQ: magic = QCMP_EQ; ncode = NE; break;
1224 case NE: magic = QCMP_EQ; ncode = EQ; break;
1225 /* isunordered() from C99. */
1226 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1227 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1228 /* Relational operators raise FP_INVALID when given
1229 an SNaN operand. */
1230 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1231 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1232 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1233 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1234 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1235 Expanders for buneq etc. weuld have to be added to ia64.md
1236 for this to be useful. */
1237 default: abort ();
1240 start_sequence ();
1242 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1243 op0, TFmode, op1, TFmode,
1244 GEN_INT (magic), DImode);
1245 cmp = gen_reg_rtx (BImode);
1246 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1247 gen_rtx_fmt_ee (ncode, BImode,
1248 ret, const0_rtx)));
1250 insns = get_insns ();
1251 end_sequence ();
1253 emit_libcall_block (insns, cmp, cmp,
1254 gen_rtx_fmt_ee (code, BImode, op0, op1));
1255 code = NE;
1257 else
1259 cmp = gen_reg_rtx (BImode);
1260 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1261 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1262 code = NE;
1265 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1268 /* Generate an integral vector comparison. */
1270 static bool
1271 ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1272 rtx dest, rtx op0, rtx op1)
1274 bool negate = false;
1275 rtx x;
1277 switch (code)
1279 case EQ:
1280 case GT:
1281 break;
1283 case NE:
1284 code = EQ;
1285 negate = true;
1286 break;
1288 case LE:
1289 code = GT;
1290 negate = true;
1291 break;
1293 case GE:
1294 negate = true;
1295 /* FALLTHRU */
1297 case LT:
1298 x = op0;
1299 op0 = op1;
1300 op1 = x;
1301 code = GT;
1302 break;
1304 case GTU:
1305 case GEU:
1306 case LTU:
1307 case LEU:
1309 rtx w0h, w0l, w1h, w1l, ch, cl;
1310 enum machine_mode wmode;
1311 rtx (*unpack_l) (rtx, rtx, rtx);
1312 rtx (*unpack_h) (rtx, rtx, rtx);
1313 rtx (*pack) (rtx, rtx, rtx);
1315 /* We don't have native unsigned comparisons, but we can generate
1316 them better than generic code can. */
1318 if (mode == V2SImode)
1319 abort ();
1320 else if (mode == V8QImode)
1322 wmode = V4HImode;
1323 pack = gen_pack2_sss;
1324 unpack_l = gen_unpack1_l;
1325 unpack_h = gen_unpack1_h;
1327 else if (mode == V4HImode)
1329 wmode = V2SImode;
1330 pack = gen_pack4_sss;
1331 unpack_l = gen_unpack2_l;
1332 unpack_h = gen_unpack2_h;
1334 else
1335 abort ();
1337 /* Unpack into wider vectors, zero extending the elements. */
1339 w0l = gen_reg_rtx (wmode);
1340 w0h = gen_reg_rtx (wmode);
1341 w1l = gen_reg_rtx (wmode);
1342 w1h = gen_reg_rtx (wmode);
1343 emit_insn (unpack_l (gen_lowpart (mode, w0l), op0, CONST0_RTX (mode)));
1344 emit_insn (unpack_h (gen_lowpart (mode, w0h), op0, CONST0_RTX (mode)));
1345 emit_insn (unpack_l (gen_lowpart (mode, w1l), op1, CONST0_RTX (mode)));
1346 emit_insn (unpack_h (gen_lowpart (mode, w1h), op1, CONST0_RTX (mode)));
1348 /* Compare in the wider mode. */
1350 cl = gen_reg_rtx (wmode);
1351 ch = gen_reg_rtx (wmode);
1352 code = signed_condition (code);
1353 ia64_expand_vecint_compare (code, wmode, cl, w0l, w1l);
1354 negate = ia64_expand_vecint_compare (code, wmode, ch, w0h, w1h);
1356 /* Repack into a single narrower vector. */
1358 emit_insn (pack (dest, cl, ch));
1360 return negate;
1362 default:
1363 abort ();
1366 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1367 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1369 return negate;
1372 static void
1373 ia64_expand_vcondu_v2si (enum rtx_code code, rtx operands[])
1375 rtx dl, dh, bl, bh, op1l, op1h, op2l, op2h, op4l, op4h, op5l, op5h, x;
1377 /* In this case, we extract the two SImode quantities and generate
1378 normal comparisons for each of them. */
1380 op1l = gen_lowpart (SImode, operands[1]);
1381 op2l = gen_lowpart (SImode, operands[2]);
1382 op4l = gen_lowpart (SImode, operands[4]);
1383 op5l = gen_lowpart (SImode, operands[5]);
1385 op1h = gen_reg_rtx (SImode);
1386 op2h = gen_reg_rtx (SImode);
1387 op4h = gen_reg_rtx (SImode);
1388 op5h = gen_reg_rtx (SImode);
1390 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op1h),
1391 gen_lowpart (DImode, operands[1]), GEN_INT (32)));
1392 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op2h),
1393 gen_lowpart (DImode, operands[2]), GEN_INT (32)));
1394 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op4h),
1395 gen_lowpart (DImode, operands[4]), GEN_INT (32)));
1396 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op5h),
1397 gen_lowpart (DImode, operands[5]), GEN_INT (32)));
1399 bl = gen_reg_rtx (BImode);
1400 x = gen_rtx_fmt_ee (code, BImode, op4l, op5l);
1401 emit_insn (gen_rtx_SET (VOIDmode, bl, x));
1403 bh = gen_reg_rtx (BImode);
1404 x = gen_rtx_fmt_ee (code, BImode, op4h, op5h);
1405 emit_insn (gen_rtx_SET (VOIDmode, bh, x));
1407 /* With the results of the comparisons, emit conditional moves. */
1409 dl = gen_reg_rtx (SImode);
1410 x = gen_rtx_IF_THEN_ELSE (SImode, bl, op1l, op2l);
1411 emit_insn (gen_rtx_SET (VOIDmode, dl, x));
1413 dh = gen_reg_rtx (SImode);
1414 x = gen_rtx_IF_THEN_ELSE (SImode, bh, op1h, op2h);
1415 emit_insn (gen_rtx_SET (VOIDmode, dh, x));
1417 /* Merge the two partial results back into a vector. */
1419 x = gen_rtx_VEC_CONCAT (V2SImode, dl, dh);
1420 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1423 /* Emit an integral vector conditional move. */
1425 void
1426 ia64_expand_vecint_cmov (rtx operands[])
1428 enum machine_mode mode = GET_MODE (operands[0]);
1429 enum rtx_code code = GET_CODE (operands[3]);
1430 bool negate;
1431 rtx cmp, x, ot, of;
1433 /* Since we don't have unsigned V2SImode comparisons, it's more efficient
1434 to special-case them entirely. */
1435 if (mode == V2SImode
1436 && (code == GTU || code == GEU || code == LEU || code == LTU))
1438 ia64_expand_vcondu_v2si (code, operands);
1439 return;
1442 cmp = gen_reg_rtx (mode);
1443 negate = ia64_expand_vecint_compare (code, mode, cmp,
1444 operands[4], operands[5]);
1446 ot = operands[1+negate];
1447 of = operands[2-negate];
1449 if (ot == CONST0_RTX (mode))
1451 if (of == CONST0_RTX (mode))
1453 emit_move_insn (operands[0], ot);
1454 return;
1457 x = gen_rtx_NOT (mode, cmp);
1458 x = gen_rtx_AND (mode, x, of);
1459 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1461 else if (of == CONST0_RTX (mode))
1463 x = gen_rtx_AND (mode, cmp, ot);
1464 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1466 else
1468 rtx t, f;
1470 t = gen_reg_rtx (mode);
1471 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1472 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1474 f = gen_reg_rtx (mode);
1475 x = gen_rtx_NOT (mode, cmp);
1476 x = gen_rtx_AND (mode, x, operands[2-negate]);
1477 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1479 x = gen_rtx_IOR (mode, t, f);
1480 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1484 /* Emit an integral vector min or max operation. Return true if all done. */
1486 bool
1487 ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1488 rtx operands[])
1490 rtx xops[5];
1492 /* These four combinations are supported directly. */
1493 if (mode == V8QImode && (code == UMIN || code == UMAX))
1494 return false;
1495 if (mode == V4HImode && (code == SMIN || code == SMAX))
1496 return false;
1498 /* Everything else implemented via vector comparisons. */
1499 xops[0] = operands[0];
1500 xops[4] = xops[1] = operands[1];
1501 xops[5] = xops[2] = operands[2];
1503 switch (code)
1505 case UMIN:
1506 code = LTU;
1507 break;
1508 case UMAX:
1509 code = GTU;
1510 break;
1511 case SMIN:
1512 code = LT;
1513 break;
1514 case SMAX:
1515 code = GT;
1516 break;
1517 default:
1518 abort ();
1520 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1522 ia64_expand_vecint_cmov (xops);
1523 return true;
1526 /* Emit the appropriate sequence for a call. */
1528 void
1529 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1530 int sibcall_p)
1532 rtx insn, b0;
1534 addr = XEXP (addr, 0);
1535 addr = convert_memory_address (DImode, addr);
1536 b0 = gen_rtx_REG (DImode, R_BR (0));
1538 /* ??? Should do this for functions known to bind local too. */
1539 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1541 if (sibcall_p)
1542 insn = gen_sibcall_nogp (addr);
1543 else if (! retval)
1544 insn = gen_call_nogp (addr, b0);
1545 else
1546 insn = gen_call_value_nogp (retval, addr, b0);
1547 insn = emit_call_insn (insn);
1549 else
1551 if (sibcall_p)
1552 insn = gen_sibcall_gp (addr);
1553 else if (! retval)
1554 insn = gen_call_gp (addr, b0);
1555 else
1556 insn = gen_call_value_gp (retval, addr, b0);
1557 insn = emit_call_insn (insn);
1559 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1562 if (sibcall_p)
1563 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1566 void
1567 ia64_reload_gp (void)
1569 rtx tmp;
1571 if (current_frame_info.reg_save_gp)
1572 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1573 else
1575 HOST_WIDE_INT offset;
1577 offset = (current_frame_info.spill_cfa_off
1578 + current_frame_info.spill_size);
1579 if (frame_pointer_needed)
1581 tmp = hard_frame_pointer_rtx;
1582 offset = -offset;
1584 else
1586 tmp = stack_pointer_rtx;
1587 offset = current_frame_info.total_size - offset;
1590 if (CONST_OK_FOR_I (offset))
1591 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1592 tmp, GEN_INT (offset)));
1593 else
1595 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1596 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1597 pic_offset_table_rtx, tmp));
1600 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1603 emit_move_insn (pic_offset_table_rtx, tmp);
1606 void
1607 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1608 rtx scratch_b, int noreturn_p, int sibcall_p)
1610 rtx insn;
1611 bool is_desc = false;
1613 /* If we find we're calling through a register, then we're actually
1614 calling through a descriptor, so load up the values. */
1615 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1617 rtx tmp;
1618 bool addr_dead_p;
1620 /* ??? We are currently constrained to *not* use peep2, because
1621 we can legitimately change the global lifetime of the GP
1622 (in the form of killing where previously live). This is
1623 because a call through a descriptor doesn't use the previous
1624 value of the GP, while a direct call does, and we do not
1625 commit to either form until the split here.
1627 That said, this means that we lack precise life info for
1628 whether ADDR is dead after this call. This is not terribly
1629 important, since we can fix things up essentially for free
1630 with the POST_DEC below, but it's nice to not use it when we
1631 can immediately tell it's not necessary. */
1632 addr_dead_p = ((noreturn_p || sibcall_p
1633 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1634 REGNO (addr)))
1635 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1637 /* Load the code address into scratch_b. */
1638 tmp = gen_rtx_POST_INC (Pmode, addr);
1639 tmp = gen_rtx_MEM (Pmode, tmp);
1640 emit_move_insn (scratch_r, tmp);
1641 emit_move_insn (scratch_b, scratch_r);
1643 /* Load the GP address. If ADDR is not dead here, then we must
1644 revert the change made above via the POST_INCREMENT. */
1645 if (!addr_dead_p)
1646 tmp = gen_rtx_POST_DEC (Pmode, addr);
1647 else
1648 tmp = addr;
1649 tmp = gen_rtx_MEM (Pmode, tmp);
1650 emit_move_insn (pic_offset_table_rtx, tmp);
1652 is_desc = true;
1653 addr = scratch_b;
1656 if (sibcall_p)
1657 insn = gen_sibcall_nogp (addr);
1658 else if (retval)
1659 insn = gen_call_value_nogp (retval, addr, retaddr);
1660 else
1661 insn = gen_call_nogp (addr, retaddr);
1662 emit_call_insn (insn);
1664 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1665 ia64_reload_gp ();
1668 /* Begin the assembly file. */
1670 static void
1671 ia64_file_start (void)
1673 default_file_start ();
1674 emit_safe_across_calls ();
1677 void
1678 emit_safe_across_calls (void)
1680 unsigned int rs, re;
1681 int out_state;
1683 rs = 1;
1684 out_state = 0;
1685 while (1)
1687 while (rs < 64 && call_used_regs[PR_REG (rs)])
1688 rs++;
1689 if (rs >= 64)
1690 break;
1691 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1692 continue;
1693 if (out_state == 0)
1695 fputs ("\t.pred.safe_across_calls ", asm_out_file);
1696 out_state = 1;
1698 else
1699 fputc (',', asm_out_file);
1700 if (re == rs + 1)
1701 fprintf (asm_out_file, "p%u", rs);
1702 else
1703 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
1704 rs = re + 1;
1706 if (out_state)
1707 fputc ('\n', asm_out_file);
1710 /* Helper function for ia64_compute_frame_size: find an appropriate general
1711 register to spill some special register to. SPECIAL_SPILL_MASK contains
1712 bits in GR0 to GR31 that have already been allocated by this routine.
1713 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1715 static int
1716 find_gr_spill (int try_locals)
1718 int regno;
1720 /* If this is a leaf function, first try an otherwise unused
1721 call-clobbered register. */
1722 if (current_function_is_leaf)
1724 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1725 if (! regs_ever_live[regno]
1726 && call_used_regs[regno]
1727 && ! fixed_regs[regno]
1728 && ! global_regs[regno]
1729 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1731 current_frame_info.gr_used_mask |= 1 << regno;
1732 return regno;
1736 if (try_locals)
1738 regno = current_frame_info.n_local_regs;
1739 /* If there is a frame pointer, then we can't use loc79, because
1740 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1741 reg_name switching code in ia64_expand_prologue. */
1742 if (regno < (80 - frame_pointer_needed))
1744 current_frame_info.n_local_regs = regno + 1;
1745 return LOC_REG (0) + regno;
1749 /* Failed to find a general register to spill to. Must use stack. */
1750 return 0;
1753 /* In order to make for nice schedules, we try to allocate every temporary
1754 to a different register. We must of course stay away from call-saved,
1755 fixed, and global registers. We must also stay away from registers
1756 allocated in current_frame_info.gr_used_mask, since those include regs
1757 used all through the prologue.
1759 Any register allocated here must be used immediately. The idea is to
1760 aid scheduling, not to solve data flow problems. */
1762 static int last_scratch_gr_reg;
1764 static int
1765 next_scratch_gr_reg (void)
1767 int i, regno;
1769 for (i = 0; i < 32; ++i)
1771 regno = (last_scratch_gr_reg + i + 1) & 31;
1772 if (call_used_regs[regno]
1773 && ! fixed_regs[regno]
1774 && ! global_regs[regno]
1775 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1777 last_scratch_gr_reg = regno;
1778 return regno;
1782 /* There must be _something_ available. */
1783 abort ();
1786 /* Helper function for ia64_compute_frame_size, called through
1787 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1789 static void
1790 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
1792 unsigned int regno = REGNO (reg);
1793 if (regno < 32)
1795 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1796 for (i = 0; i < n; ++i)
1797 current_frame_info.gr_used_mask |= 1 << (regno + i);
1801 /* Returns the number of bytes offset between the frame pointer and the stack
1802 pointer for the current function. SIZE is the number of bytes of space
1803 needed for local variables. */
1805 static void
1806 ia64_compute_frame_size (HOST_WIDE_INT size)
1808 HOST_WIDE_INT total_size;
1809 HOST_WIDE_INT spill_size = 0;
1810 HOST_WIDE_INT extra_spill_size = 0;
1811 HOST_WIDE_INT pretend_args_size;
1812 HARD_REG_SET mask;
1813 int n_spilled = 0;
1814 int spilled_gr_p = 0;
1815 int spilled_fr_p = 0;
1816 unsigned int regno;
1817 int i;
1819 if (current_frame_info.initialized)
1820 return;
1822 memset (&current_frame_info, 0, sizeof current_frame_info);
1823 CLEAR_HARD_REG_SET (mask);
1825 /* Don't allocate scratches to the return register. */
1826 diddle_return_value (mark_reg_gr_used_mask, NULL);
1828 /* Don't allocate scratches to the EH scratch registers. */
1829 if (cfun->machine->ia64_eh_epilogue_sp)
1830 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1831 if (cfun->machine->ia64_eh_epilogue_bsp)
1832 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1834 /* Find the size of the register stack frame. We have only 80 local
1835 registers, because we reserve 8 for the inputs and 8 for the
1836 outputs. */
1838 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1839 since we'll be adjusting that down later. */
1840 regno = LOC_REG (78) + ! frame_pointer_needed;
1841 for (; regno >= LOC_REG (0); regno--)
1842 if (regs_ever_live[regno])
1843 break;
1844 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1846 /* For functions marked with the syscall_linkage attribute, we must mark
1847 all eight input registers as in use, so that locals aren't visible to
1848 the caller. */
1850 if (cfun->machine->n_varargs > 0
1851 || lookup_attribute ("syscall_linkage",
1852 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1853 current_frame_info.n_input_regs = 8;
1854 else
1856 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1857 if (regs_ever_live[regno])
1858 break;
1859 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1862 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1863 if (regs_ever_live[regno])
1864 break;
1865 i = regno - OUT_REG (0) + 1;
1867 /* When -p profiling, we need one output register for the mcount argument.
1868 Likewise for -a profiling for the bb_init_func argument. For -ax
1869 profiling, we need two output registers for the two bb_init_trace_func
1870 arguments. */
1871 if (current_function_profile)
1872 i = MAX (i, 1);
1873 current_frame_info.n_output_regs = i;
1875 /* ??? No rotating register support yet. */
1876 current_frame_info.n_rotate_regs = 0;
1878 /* Discover which registers need spilling, and how much room that
1879 will take. Begin with floating point and general registers,
1880 which will always wind up on the stack. */
1882 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1883 if (regs_ever_live[regno] && ! call_used_regs[regno])
1885 SET_HARD_REG_BIT (mask, regno);
1886 spill_size += 16;
1887 n_spilled += 1;
1888 spilled_fr_p = 1;
1891 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1892 if (regs_ever_live[regno] && ! call_used_regs[regno])
1894 SET_HARD_REG_BIT (mask, regno);
1895 spill_size += 8;
1896 n_spilled += 1;
1897 spilled_gr_p = 1;
1900 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1901 if (regs_ever_live[regno] && ! call_used_regs[regno])
1903 SET_HARD_REG_BIT (mask, regno);
1904 spill_size += 8;
1905 n_spilled += 1;
1908 /* Now come all special registers that might get saved in other
1909 general registers. */
1911 if (frame_pointer_needed)
1913 current_frame_info.reg_fp = find_gr_spill (1);
1914 /* If we did not get a register, then we take LOC79. This is guaranteed
1915 to be free, even if regs_ever_live is already set, because this is
1916 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1917 as we don't count loc79 above. */
1918 if (current_frame_info.reg_fp == 0)
1920 current_frame_info.reg_fp = LOC_REG (79);
1921 current_frame_info.n_local_regs++;
1925 if (! current_function_is_leaf)
1927 /* Emit a save of BR0 if we call other functions. Do this even
1928 if this function doesn't return, as EH depends on this to be
1929 able to unwind the stack. */
1930 SET_HARD_REG_BIT (mask, BR_REG (0));
1932 current_frame_info.reg_save_b0 = find_gr_spill (1);
1933 if (current_frame_info.reg_save_b0 == 0)
1935 spill_size += 8;
1936 n_spilled += 1;
1939 /* Similarly for ar.pfs. */
1940 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1941 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1942 if (current_frame_info.reg_save_ar_pfs == 0)
1944 extra_spill_size += 8;
1945 n_spilled += 1;
1948 /* Similarly for gp. Note that if we're calling setjmp, the stacked
1949 registers are clobbered, so we fall back to the stack. */
1950 current_frame_info.reg_save_gp
1951 = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
1952 if (current_frame_info.reg_save_gp == 0)
1954 SET_HARD_REG_BIT (mask, GR_REG (1));
1955 spill_size += 8;
1956 n_spilled += 1;
1959 else
1961 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1963 SET_HARD_REG_BIT (mask, BR_REG (0));
1964 spill_size += 8;
1965 n_spilled += 1;
1968 if (regs_ever_live[AR_PFS_REGNUM])
1970 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1971 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1972 if (current_frame_info.reg_save_ar_pfs == 0)
1974 extra_spill_size += 8;
1975 n_spilled += 1;
1980 /* Unwind descriptor hackery: things are most efficient if we allocate
1981 consecutive GR save registers for RP, PFS, FP in that order. However,
1982 it is absolutely critical that FP get the only hard register that's
1983 guaranteed to be free, so we allocated it first. If all three did
1984 happen to be allocated hard regs, and are consecutive, rearrange them
1985 into the preferred order now. */
1986 if (current_frame_info.reg_fp != 0
1987 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1988 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1990 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1991 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1992 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1995 /* See if we need to store the predicate register block. */
1996 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1997 if (regs_ever_live[regno] && ! call_used_regs[regno])
1998 break;
1999 if (regno <= PR_REG (63))
2001 SET_HARD_REG_BIT (mask, PR_REG (0));
2002 current_frame_info.reg_save_pr = find_gr_spill (1);
2003 if (current_frame_info.reg_save_pr == 0)
2005 extra_spill_size += 8;
2006 n_spilled += 1;
2009 /* ??? Mark them all as used so that register renaming and such
2010 are free to use them. */
2011 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2012 regs_ever_live[regno] = 1;
2015 /* If we're forced to use st8.spill, we're forced to save and restore
2016 ar.unat as well. The check for existing liveness allows inline asm
2017 to touch ar.unat. */
2018 if (spilled_gr_p || cfun->machine->n_varargs
2019 || regs_ever_live[AR_UNAT_REGNUM])
2021 regs_ever_live[AR_UNAT_REGNUM] = 1;
2022 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2023 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
2024 if (current_frame_info.reg_save_ar_unat == 0)
2026 extra_spill_size += 8;
2027 n_spilled += 1;
2031 if (regs_ever_live[AR_LC_REGNUM])
2033 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2034 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
2035 if (current_frame_info.reg_save_ar_lc == 0)
2037 extra_spill_size += 8;
2038 n_spilled += 1;
2042 /* If we have an odd number of words of pretend arguments written to
2043 the stack, then the FR save area will be unaligned. We round the
2044 size of this area up to keep things 16 byte aligned. */
2045 if (spilled_fr_p)
2046 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2047 else
2048 pretend_args_size = current_function_pretend_args_size;
2050 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2051 + current_function_outgoing_args_size);
2052 total_size = IA64_STACK_ALIGN (total_size);
2054 /* We always use the 16-byte scratch area provided by the caller, but
2055 if we are a leaf function, there's no one to which we need to provide
2056 a scratch area. */
2057 if (current_function_is_leaf)
2058 total_size = MAX (0, total_size - 16);
2060 current_frame_info.total_size = total_size;
2061 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2062 current_frame_info.spill_size = spill_size;
2063 current_frame_info.extra_spill_size = extra_spill_size;
2064 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2065 current_frame_info.n_spilled = n_spilled;
2066 current_frame_info.initialized = reload_completed;
2069 /* Compute the initial difference between the specified pair of registers. */
2071 HOST_WIDE_INT
2072 ia64_initial_elimination_offset (int from, int to)
2074 HOST_WIDE_INT offset;
2076 ia64_compute_frame_size (get_frame_size ());
2077 switch (from)
2079 case FRAME_POINTER_REGNUM:
2080 if (to == HARD_FRAME_POINTER_REGNUM)
2082 if (current_function_is_leaf)
2083 offset = -current_frame_info.total_size;
2084 else
2085 offset = -(current_frame_info.total_size
2086 - current_function_outgoing_args_size - 16);
2088 else if (to == STACK_POINTER_REGNUM)
2090 if (current_function_is_leaf)
2091 offset = 0;
2092 else
2093 offset = 16 + current_function_outgoing_args_size;
2095 else
2096 abort ();
2097 break;
2099 case ARG_POINTER_REGNUM:
2100 /* Arguments start above the 16 byte save area, unless stdarg
2101 in which case we store through the 16 byte save area. */
2102 if (to == HARD_FRAME_POINTER_REGNUM)
2103 offset = 16 - current_function_pretend_args_size;
2104 else if (to == STACK_POINTER_REGNUM)
2105 offset = (current_frame_info.total_size
2106 + 16 - current_function_pretend_args_size);
2107 else
2108 abort ();
2109 break;
2111 default:
2112 abort ();
2115 return offset;
2118 /* If there are more than a trivial number of register spills, we use
2119 two interleaved iterators so that we can get two memory references
2120 per insn group.
2122 In order to simplify things in the prologue and epilogue expanders,
2123 we use helper functions to fix up the memory references after the
2124 fact with the appropriate offsets to a POST_MODIFY memory mode.
2125 The following data structure tracks the state of the two iterators
2126 while insns are being emitted. */
2128 struct spill_fill_data
2130 rtx init_after; /* point at which to emit initializations */
2131 rtx init_reg[2]; /* initial base register */
2132 rtx iter_reg[2]; /* the iterator registers */
2133 rtx *prev_addr[2]; /* address of last memory use */
2134 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2135 HOST_WIDE_INT prev_off[2]; /* last offset */
2136 int n_iter; /* number of iterators in use */
2137 int next_iter; /* next iterator to use */
2138 unsigned int save_gr_used_mask;
2141 static struct spill_fill_data spill_fill_data;
2143 static void
2144 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2146 int i;
2148 spill_fill_data.init_after = get_last_insn ();
2149 spill_fill_data.init_reg[0] = init_reg;
2150 spill_fill_data.init_reg[1] = init_reg;
2151 spill_fill_data.prev_addr[0] = NULL;
2152 spill_fill_data.prev_addr[1] = NULL;
2153 spill_fill_data.prev_insn[0] = NULL;
2154 spill_fill_data.prev_insn[1] = NULL;
2155 spill_fill_data.prev_off[0] = cfa_off;
2156 spill_fill_data.prev_off[1] = cfa_off;
2157 spill_fill_data.next_iter = 0;
2158 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2160 spill_fill_data.n_iter = 1 + (n_spills > 2);
2161 for (i = 0; i < spill_fill_data.n_iter; ++i)
2163 int regno = next_scratch_gr_reg ();
2164 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2165 current_frame_info.gr_used_mask |= 1 << regno;
2169 static void
2170 finish_spill_pointers (void)
2172 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2175 static rtx
2176 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2178 int iter = spill_fill_data.next_iter;
2179 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2180 rtx disp_rtx = GEN_INT (disp);
2181 rtx mem;
2183 if (spill_fill_data.prev_addr[iter])
2185 if (CONST_OK_FOR_N (disp))
2187 *spill_fill_data.prev_addr[iter]
2188 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2189 gen_rtx_PLUS (DImode,
2190 spill_fill_data.iter_reg[iter],
2191 disp_rtx));
2192 REG_NOTES (spill_fill_data.prev_insn[iter])
2193 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2194 REG_NOTES (spill_fill_data.prev_insn[iter]));
2196 else
2198 /* ??? Could use register post_modify for loads. */
2199 if (! CONST_OK_FOR_I (disp))
2201 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2202 emit_move_insn (tmp, disp_rtx);
2203 disp_rtx = tmp;
2205 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2206 spill_fill_data.iter_reg[iter], disp_rtx));
2209 /* Micro-optimization: if we've created a frame pointer, it's at
2210 CFA 0, which may allow the real iterator to be initialized lower,
2211 slightly increasing parallelism. Also, if there are few saves
2212 it may eliminate the iterator entirely. */
2213 else if (disp == 0
2214 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2215 && frame_pointer_needed)
2217 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2218 set_mem_alias_set (mem, get_varargs_alias_set ());
2219 return mem;
2221 else
2223 rtx seq, insn;
2225 if (disp == 0)
2226 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2227 spill_fill_data.init_reg[iter]);
2228 else
2230 start_sequence ();
2232 if (! CONST_OK_FOR_I (disp))
2234 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2235 emit_move_insn (tmp, disp_rtx);
2236 disp_rtx = tmp;
2239 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2240 spill_fill_data.init_reg[iter],
2241 disp_rtx));
2243 seq = get_insns ();
2244 end_sequence ();
2247 /* Careful for being the first insn in a sequence. */
2248 if (spill_fill_data.init_after)
2249 insn = emit_insn_after (seq, spill_fill_data.init_after);
2250 else
2252 rtx first = get_insns ();
2253 if (first)
2254 insn = emit_insn_before (seq, first);
2255 else
2256 insn = emit_insn (seq);
2258 spill_fill_data.init_after = insn;
2260 /* If DISP is 0, we may or may not have a further adjustment
2261 afterward. If we do, then the load/store insn may be modified
2262 to be a post-modify. If we don't, then this copy may be
2263 eliminated by copyprop_hardreg_forward, which makes this
2264 insn garbage, which runs afoul of the sanity check in
2265 propagate_one_insn. So mark this insn as legal to delete. */
2266 if (disp == 0)
2267 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2268 REG_NOTES (insn));
2271 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2273 /* ??? Not all of the spills are for varargs, but some of them are.
2274 The rest of the spills belong in an alias set of their own. But
2275 it doesn't actually hurt to include them here. */
2276 set_mem_alias_set (mem, get_varargs_alias_set ());
2278 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2279 spill_fill_data.prev_off[iter] = cfa_off;
2281 if (++iter >= spill_fill_data.n_iter)
2282 iter = 0;
2283 spill_fill_data.next_iter = iter;
2285 return mem;
2288 static void
2289 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2290 rtx frame_reg)
2292 int iter = spill_fill_data.next_iter;
2293 rtx mem, insn;
2295 mem = spill_restore_mem (reg, cfa_off);
2296 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2297 spill_fill_data.prev_insn[iter] = insn;
2299 if (frame_reg)
2301 rtx base;
2302 HOST_WIDE_INT off;
2304 RTX_FRAME_RELATED_P (insn) = 1;
2306 /* Don't even pretend that the unwind code can intuit its way
2307 through a pair of interleaved post_modify iterators. Just
2308 provide the correct answer. */
2310 if (frame_pointer_needed)
2312 base = hard_frame_pointer_rtx;
2313 off = - cfa_off;
2315 else
2317 base = stack_pointer_rtx;
2318 off = current_frame_info.total_size - cfa_off;
2321 REG_NOTES (insn)
2322 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2323 gen_rtx_SET (VOIDmode,
2324 gen_rtx_MEM (GET_MODE (reg),
2325 plus_constant (base, off)),
2326 frame_reg),
2327 REG_NOTES (insn));
2331 static void
2332 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
2334 int iter = spill_fill_data.next_iter;
2335 rtx insn;
2337 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2338 GEN_INT (cfa_off)));
2339 spill_fill_data.prev_insn[iter] = insn;
2342 /* Wrapper functions that discards the CONST_INT spill offset. These
2343 exist so that we can give gr_spill/gr_fill the offset they need and
2344 use a consistent function interface. */
2346 static rtx
2347 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2349 return gen_movdi (dest, src);
2352 static rtx
2353 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2355 return gen_fr_spill (dest, src);
2358 static rtx
2359 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2361 return gen_fr_restore (dest, src);
2364 /* Called after register allocation to add any instructions needed for the
2365 prologue. Using a prologue insn is favored compared to putting all of the
2366 instructions in output_function_prologue(), since it allows the scheduler
2367 to intermix instructions with the saves of the caller saved registers. In
2368 some cases, it might be necessary to emit a barrier instruction as the last
2369 insn to prevent such scheduling.
2371 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2372 so that the debug info generation code can handle them properly.
2374 The register save area is layed out like so:
2375 cfa+16
2376 [ varargs spill area ]
2377 [ fr register spill area ]
2378 [ br register spill area ]
2379 [ ar register spill area ]
2380 [ pr register spill area ]
2381 [ gr register spill area ] */
2383 /* ??? Get inefficient code when the frame size is larger than can fit in an
2384 adds instruction. */
2386 void
2387 ia64_expand_prologue (void)
2389 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2390 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2391 rtx reg, alt_reg;
2393 ia64_compute_frame_size (get_frame_size ());
2394 last_scratch_gr_reg = 15;
2396 /* If there is no epilogue, then we don't need some prologue insns.
2397 We need to avoid emitting the dead prologue insns, because flow
2398 will complain about them. */
2399 if (optimize)
2401 edge e;
2402 edge_iterator ei;
2404 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2405 if ((e->flags & EDGE_FAKE) == 0
2406 && (e->flags & EDGE_FALLTHRU) != 0)
2407 break;
2408 epilogue_p = (e != NULL);
2410 else
2411 epilogue_p = 1;
2413 /* Set the local, input, and output register names. We need to do this
2414 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2415 half. If we use in/loc/out register names, then we get assembler errors
2416 in crtn.S because there is no alloc insn or regstk directive in there. */
2417 if (! TARGET_REG_NAMES)
2419 int inputs = current_frame_info.n_input_regs;
2420 int locals = current_frame_info.n_local_regs;
2421 int outputs = current_frame_info.n_output_regs;
2423 for (i = 0; i < inputs; i++)
2424 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2425 for (i = 0; i < locals; i++)
2426 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2427 for (i = 0; i < outputs; i++)
2428 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2431 /* Set the frame pointer register name. The regnum is logically loc79,
2432 but of course we'll not have allocated that many locals. Rather than
2433 worrying about renumbering the existing rtxs, we adjust the name. */
2434 /* ??? This code means that we can never use one local register when
2435 there is a frame pointer. loc79 gets wasted in this case, as it is
2436 renamed to a register that will never be used. See also the try_locals
2437 code in find_gr_spill. */
2438 if (current_frame_info.reg_fp)
2440 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2441 reg_names[HARD_FRAME_POINTER_REGNUM]
2442 = reg_names[current_frame_info.reg_fp];
2443 reg_names[current_frame_info.reg_fp] = tmp;
2446 /* We don't need an alloc instruction if we've used no outputs or locals. */
2447 if (current_frame_info.n_local_regs == 0
2448 && current_frame_info.n_output_regs == 0
2449 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2450 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2452 /* If there is no alloc, but there are input registers used, then we
2453 need a .regstk directive. */
2454 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2455 ar_pfs_save_reg = NULL_RTX;
2457 else
2459 current_frame_info.need_regstk = 0;
2461 if (current_frame_info.reg_save_ar_pfs)
2462 regno = current_frame_info.reg_save_ar_pfs;
2463 else
2464 regno = next_scratch_gr_reg ();
2465 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2467 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2468 GEN_INT (current_frame_info.n_input_regs),
2469 GEN_INT (current_frame_info.n_local_regs),
2470 GEN_INT (current_frame_info.n_output_regs),
2471 GEN_INT (current_frame_info.n_rotate_regs)));
2472 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2475 /* Set up frame pointer, stack pointer, and spill iterators. */
2477 n_varargs = cfun->machine->n_varargs;
2478 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2479 stack_pointer_rtx, 0);
2481 if (frame_pointer_needed)
2483 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2484 RTX_FRAME_RELATED_P (insn) = 1;
2487 if (current_frame_info.total_size != 0)
2489 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2490 rtx offset;
2492 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2493 offset = frame_size_rtx;
2494 else
2496 regno = next_scratch_gr_reg ();
2497 offset = gen_rtx_REG (DImode, regno);
2498 emit_move_insn (offset, frame_size_rtx);
2501 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2502 stack_pointer_rtx, offset));
2504 if (! frame_pointer_needed)
2506 RTX_FRAME_RELATED_P (insn) = 1;
2507 if (GET_CODE (offset) != CONST_INT)
2509 REG_NOTES (insn)
2510 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2511 gen_rtx_SET (VOIDmode,
2512 stack_pointer_rtx,
2513 gen_rtx_PLUS (DImode,
2514 stack_pointer_rtx,
2515 frame_size_rtx)),
2516 REG_NOTES (insn));
2520 /* ??? At this point we must generate a magic insn that appears to
2521 modify the stack pointer, the frame pointer, and all spill
2522 iterators. This would allow the most scheduling freedom. For
2523 now, just hard stop. */
2524 emit_insn (gen_blockage ());
2527 /* Must copy out ar.unat before doing any integer spills. */
2528 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2530 if (current_frame_info.reg_save_ar_unat)
2531 ar_unat_save_reg
2532 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2533 else
2535 alt_regno = next_scratch_gr_reg ();
2536 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2537 current_frame_info.gr_used_mask |= 1 << alt_regno;
2540 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2541 insn = emit_move_insn (ar_unat_save_reg, reg);
2542 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2544 /* Even if we're not going to generate an epilogue, we still
2545 need to save the register so that EH works. */
2546 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2547 emit_insn (gen_prologue_use (ar_unat_save_reg));
2549 else
2550 ar_unat_save_reg = NULL_RTX;
2552 /* Spill all varargs registers. Do this before spilling any GR registers,
2553 since we want the UNAT bits for the GR registers to override the UNAT
2554 bits from varargs, which we don't care about. */
2556 cfa_off = -16;
2557 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2559 reg = gen_rtx_REG (DImode, regno);
2560 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2563 /* Locate the bottom of the register save area. */
2564 cfa_off = (current_frame_info.spill_cfa_off
2565 + current_frame_info.spill_size
2566 + current_frame_info.extra_spill_size);
2568 /* Save the predicate register block either in a register or in memory. */
2569 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2571 reg = gen_rtx_REG (DImode, PR_REG (0));
2572 if (current_frame_info.reg_save_pr != 0)
2574 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2575 insn = emit_move_insn (alt_reg, reg);
2577 /* ??? Denote pr spill/fill by a DImode move that modifies all
2578 64 hard registers. */
2579 RTX_FRAME_RELATED_P (insn) = 1;
2580 REG_NOTES (insn)
2581 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2582 gen_rtx_SET (VOIDmode, alt_reg, reg),
2583 REG_NOTES (insn));
2585 /* Even if we're not going to generate an epilogue, we still
2586 need to save the register so that EH works. */
2587 if (! epilogue_p)
2588 emit_insn (gen_prologue_use (alt_reg));
2590 else
2592 alt_regno = next_scratch_gr_reg ();
2593 alt_reg = gen_rtx_REG (DImode, alt_regno);
2594 insn = emit_move_insn (alt_reg, reg);
2595 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2596 cfa_off -= 8;
2600 /* Handle AR regs in numerical order. All of them get special handling. */
2601 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2602 && current_frame_info.reg_save_ar_unat == 0)
2604 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2605 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2606 cfa_off -= 8;
2609 /* The alloc insn already copied ar.pfs into a general register. The
2610 only thing we have to do now is copy that register to a stack slot
2611 if we'd not allocated a local register for the job. */
2612 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2613 && current_frame_info.reg_save_ar_pfs == 0)
2615 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2616 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2617 cfa_off -= 8;
2620 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2622 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2623 if (current_frame_info.reg_save_ar_lc != 0)
2625 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2626 insn = emit_move_insn (alt_reg, reg);
2627 RTX_FRAME_RELATED_P (insn) = 1;
2629 /* Even if we're not going to generate an epilogue, we still
2630 need to save the register so that EH works. */
2631 if (! epilogue_p)
2632 emit_insn (gen_prologue_use (alt_reg));
2634 else
2636 alt_regno = next_scratch_gr_reg ();
2637 alt_reg = gen_rtx_REG (DImode, alt_regno);
2638 emit_move_insn (alt_reg, reg);
2639 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2640 cfa_off -= 8;
2644 if (current_frame_info.reg_save_gp)
2646 insn = emit_move_insn (gen_rtx_REG (DImode,
2647 current_frame_info.reg_save_gp),
2648 pic_offset_table_rtx);
2649 /* We don't know for sure yet if this is actually needed, since
2650 we've not split the PIC call patterns. If all of the calls
2651 are indirect, and not followed by any uses of the gp, then
2652 this save is dead. Allow it to go away. */
2653 REG_NOTES (insn)
2654 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2657 /* We should now be at the base of the gr/br/fr spill area. */
2658 if (cfa_off != (current_frame_info.spill_cfa_off
2659 + current_frame_info.spill_size))
2660 abort ();
2662 /* Spill all general registers. */
2663 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2664 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2666 reg = gen_rtx_REG (DImode, regno);
2667 do_spill (gen_gr_spill, reg, cfa_off, reg);
2668 cfa_off -= 8;
2671 /* Handle BR0 specially -- it may be getting stored permanently in
2672 some GR register. */
2673 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2675 reg = gen_rtx_REG (DImode, BR_REG (0));
2676 if (current_frame_info.reg_save_b0 != 0)
2678 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2679 insn = emit_move_insn (alt_reg, reg);
2680 RTX_FRAME_RELATED_P (insn) = 1;
2682 /* Even if we're not going to generate an epilogue, we still
2683 need to save the register so that EH works. */
2684 if (! epilogue_p)
2685 emit_insn (gen_prologue_use (alt_reg));
2687 else
2689 alt_regno = next_scratch_gr_reg ();
2690 alt_reg = gen_rtx_REG (DImode, alt_regno);
2691 emit_move_insn (alt_reg, reg);
2692 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2693 cfa_off -= 8;
2697 /* Spill the rest of the BR registers. */
2698 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2699 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2701 alt_regno = next_scratch_gr_reg ();
2702 alt_reg = gen_rtx_REG (DImode, alt_regno);
2703 reg = gen_rtx_REG (DImode, regno);
2704 emit_move_insn (alt_reg, reg);
2705 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2706 cfa_off -= 8;
2709 /* Align the frame and spill all FR registers. */
2710 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2711 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2713 if (cfa_off & 15)
2714 abort ();
2715 reg = gen_rtx_REG (XFmode, regno);
2716 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2717 cfa_off -= 16;
2720 if (cfa_off != current_frame_info.spill_cfa_off)
2721 abort ();
2723 finish_spill_pointers ();
2726 /* Called after register allocation to add any instructions needed for the
2727 epilogue. Using an epilogue insn is favored compared to putting all of the
2728 instructions in output_function_prologue(), since it allows the scheduler
2729 to intermix instructions with the saves of the caller saved registers. In
2730 some cases, it might be necessary to emit a barrier instruction as the last
2731 insn to prevent such scheduling. */
2733 void
2734 ia64_expand_epilogue (int sibcall_p)
2736 rtx insn, reg, alt_reg, ar_unat_save_reg;
2737 int regno, alt_regno, cfa_off;
2739 ia64_compute_frame_size (get_frame_size ());
2741 /* If there is a frame pointer, then we use it instead of the stack
2742 pointer, so that the stack pointer does not need to be valid when
2743 the epilogue starts. See EXIT_IGNORE_STACK. */
2744 if (frame_pointer_needed)
2745 setup_spill_pointers (current_frame_info.n_spilled,
2746 hard_frame_pointer_rtx, 0);
2747 else
2748 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2749 current_frame_info.total_size);
2751 if (current_frame_info.total_size != 0)
2753 /* ??? At this point we must generate a magic insn that appears to
2754 modify the spill iterators and the frame pointer. This would
2755 allow the most scheduling freedom. For now, just hard stop. */
2756 emit_insn (gen_blockage ());
2759 /* Locate the bottom of the register save area. */
2760 cfa_off = (current_frame_info.spill_cfa_off
2761 + current_frame_info.spill_size
2762 + current_frame_info.extra_spill_size);
2764 /* Restore the predicate registers. */
2765 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2767 if (current_frame_info.reg_save_pr != 0)
2768 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2769 else
2771 alt_regno = next_scratch_gr_reg ();
2772 alt_reg = gen_rtx_REG (DImode, alt_regno);
2773 do_restore (gen_movdi_x, alt_reg, cfa_off);
2774 cfa_off -= 8;
2776 reg = gen_rtx_REG (DImode, PR_REG (0));
2777 emit_move_insn (reg, alt_reg);
2780 /* Restore the application registers. */
2782 /* Load the saved unat from the stack, but do not restore it until
2783 after the GRs have been restored. */
2784 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2786 if (current_frame_info.reg_save_ar_unat != 0)
2787 ar_unat_save_reg
2788 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2789 else
2791 alt_regno = next_scratch_gr_reg ();
2792 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2793 current_frame_info.gr_used_mask |= 1 << alt_regno;
2794 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2795 cfa_off -= 8;
2798 else
2799 ar_unat_save_reg = NULL_RTX;
2801 if (current_frame_info.reg_save_ar_pfs != 0)
2803 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2804 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2805 emit_move_insn (reg, alt_reg);
2807 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2809 alt_regno = next_scratch_gr_reg ();
2810 alt_reg = gen_rtx_REG (DImode, alt_regno);
2811 do_restore (gen_movdi_x, alt_reg, cfa_off);
2812 cfa_off -= 8;
2813 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2814 emit_move_insn (reg, alt_reg);
2817 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2819 if (current_frame_info.reg_save_ar_lc != 0)
2820 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2821 else
2823 alt_regno = next_scratch_gr_reg ();
2824 alt_reg = gen_rtx_REG (DImode, alt_regno);
2825 do_restore (gen_movdi_x, alt_reg, cfa_off);
2826 cfa_off -= 8;
2828 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2829 emit_move_insn (reg, alt_reg);
2832 /* We should now be at the base of the gr/br/fr spill area. */
2833 if (cfa_off != (current_frame_info.spill_cfa_off
2834 + current_frame_info.spill_size))
2835 abort ();
2837 /* The GP may be stored on the stack in the prologue, but it's
2838 never restored in the epilogue. Skip the stack slot. */
2839 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
2840 cfa_off -= 8;
2842 /* Restore all general registers. */
2843 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
2844 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2846 reg = gen_rtx_REG (DImode, regno);
2847 do_restore (gen_gr_restore, reg, cfa_off);
2848 cfa_off -= 8;
2851 /* Restore the branch registers. Handle B0 specially, as it may
2852 have gotten stored in some GR register. */
2853 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2855 if (current_frame_info.reg_save_b0 != 0)
2856 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2857 else
2859 alt_regno = next_scratch_gr_reg ();
2860 alt_reg = gen_rtx_REG (DImode, alt_regno);
2861 do_restore (gen_movdi_x, alt_reg, cfa_off);
2862 cfa_off -= 8;
2864 reg = gen_rtx_REG (DImode, BR_REG (0));
2865 emit_move_insn (reg, alt_reg);
2868 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2869 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2871 alt_regno = next_scratch_gr_reg ();
2872 alt_reg = gen_rtx_REG (DImode, alt_regno);
2873 do_restore (gen_movdi_x, alt_reg, cfa_off);
2874 cfa_off -= 8;
2875 reg = gen_rtx_REG (DImode, regno);
2876 emit_move_insn (reg, alt_reg);
2879 /* Restore floating point registers. */
2880 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2881 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2883 if (cfa_off & 15)
2884 abort ();
2885 reg = gen_rtx_REG (XFmode, regno);
2886 do_restore (gen_fr_restore_x, reg, cfa_off);
2887 cfa_off -= 16;
2890 /* Restore ar.unat for real. */
2891 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2893 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2894 emit_move_insn (reg, ar_unat_save_reg);
2897 if (cfa_off != current_frame_info.spill_cfa_off)
2898 abort ();
2900 finish_spill_pointers ();
2902 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2904 /* ??? At this point we must generate a magic insn that appears to
2905 modify the spill iterators, the stack pointer, and the frame
2906 pointer. This would allow the most scheduling freedom. For now,
2907 just hard stop. */
2908 emit_insn (gen_blockage ());
2911 if (cfun->machine->ia64_eh_epilogue_sp)
2912 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2913 else if (frame_pointer_needed)
2915 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2916 RTX_FRAME_RELATED_P (insn) = 1;
2918 else if (current_frame_info.total_size)
2920 rtx offset, frame_size_rtx;
2922 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2923 if (CONST_OK_FOR_I (current_frame_info.total_size))
2924 offset = frame_size_rtx;
2925 else
2927 regno = next_scratch_gr_reg ();
2928 offset = gen_rtx_REG (DImode, regno);
2929 emit_move_insn (offset, frame_size_rtx);
2932 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2933 offset));
2935 RTX_FRAME_RELATED_P (insn) = 1;
2936 if (GET_CODE (offset) != CONST_INT)
2938 REG_NOTES (insn)
2939 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2940 gen_rtx_SET (VOIDmode,
2941 stack_pointer_rtx,
2942 gen_rtx_PLUS (DImode,
2943 stack_pointer_rtx,
2944 frame_size_rtx)),
2945 REG_NOTES (insn));
2949 if (cfun->machine->ia64_eh_epilogue_bsp)
2950 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2952 if (! sibcall_p)
2953 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2954 else
2956 int fp = GR_REG (2);
2957 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2958 first available call clobbered register. If there was a frame_pointer
2959 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2960 so we have to make sure we're using the string "r2" when emitting
2961 the register name for the assembler. */
2962 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2963 fp = HARD_FRAME_POINTER_REGNUM;
2965 /* We must emit an alloc to force the input registers to become output
2966 registers. Otherwise, if the callee tries to pass its parameters
2967 through to another call without an intervening alloc, then these
2968 values get lost. */
2969 /* ??? We don't need to preserve all input registers. We only need to
2970 preserve those input registers used as arguments to the sibling call.
2971 It is unclear how to compute that number here. */
2972 if (current_frame_info.n_input_regs != 0)
2974 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
2975 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2976 const0_rtx, const0_rtx,
2977 n_inputs, const0_rtx));
2978 RTX_FRAME_RELATED_P (insn) = 1;
2983 /* Return 1 if br.ret can do all the work required to return from a
2984 function. */
2987 ia64_direct_return (void)
2989 if (reload_completed && ! frame_pointer_needed)
2991 ia64_compute_frame_size (get_frame_size ());
2993 return (current_frame_info.total_size == 0
2994 && current_frame_info.n_spilled == 0
2995 && current_frame_info.reg_save_b0 == 0
2996 && current_frame_info.reg_save_pr == 0
2997 && current_frame_info.reg_save_ar_pfs == 0
2998 && current_frame_info.reg_save_ar_unat == 0
2999 && current_frame_info.reg_save_ar_lc == 0);
3001 return 0;
3004 /* Return the magic cookie that we use to hold the return address
3005 during early compilation. */
3008 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3010 if (count != 0)
3011 return NULL;
3012 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3015 /* Split this value after reload, now that we know where the return
3016 address is saved. */
3018 void
3019 ia64_split_return_addr_rtx (rtx dest)
3021 rtx src;
3023 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3025 if (current_frame_info.reg_save_b0 != 0)
3026 src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3027 else
3029 HOST_WIDE_INT off;
3030 unsigned int regno;
3032 /* Compute offset from CFA for BR0. */
3033 /* ??? Must be kept in sync with ia64_expand_prologue. */
3034 off = (current_frame_info.spill_cfa_off
3035 + current_frame_info.spill_size);
3036 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3037 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3038 off -= 8;
3040 /* Convert CFA offset to a register based offset. */
3041 if (frame_pointer_needed)
3042 src = hard_frame_pointer_rtx;
3043 else
3045 src = stack_pointer_rtx;
3046 off += current_frame_info.total_size;
3049 /* Load address into scratch register. */
3050 if (CONST_OK_FOR_I (off))
3051 emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
3052 else
3054 emit_move_insn (dest, GEN_INT (off));
3055 emit_insn (gen_adddi3 (dest, src, dest));
3058 src = gen_rtx_MEM (Pmode, dest);
3061 else
3062 src = gen_rtx_REG (DImode, BR_REG (0));
3064 emit_move_insn (dest, src);
3068 ia64_hard_regno_rename_ok (int from, int to)
3070 /* Don't clobber any of the registers we reserved for the prologue. */
3071 if (to == current_frame_info.reg_fp
3072 || to == current_frame_info.reg_save_b0
3073 || to == current_frame_info.reg_save_pr
3074 || to == current_frame_info.reg_save_ar_pfs
3075 || to == current_frame_info.reg_save_ar_unat
3076 || to == current_frame_info.reg_save_ar_lc)
3077 return 0;
3079 if (from == current_frame_info.reg_fp
3080 || from == current_frame_info.reg_save_b0
3081 || from == current_frame_info.reg_save_pr
3082 || from == current_frame_info.reg_save_ar_pfs
3083 || from == current_frame_info.reg_save_ar_unat
3084 || from == current_frame_info.reg_save_ar_lc)
3085 return 0;
3087 /* Don't use output registers outside the register frame. */
3088 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3089 return 0;
3091 /* Retain even/oddness on predicate register pairs. */
3092 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3093 return (from & 1) == (to & 1);
3095 return 1;
3098 /* Target hook for assembling integer objects. Handle word-sized
3099 aligned objects and detect the cases when @fptr is needed. */
3101 static bool
3102 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3104 if (size == POINTER_SIZE / BITS_PER_UNIT
3105 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3106 && GET_CODE (x) == SYMBOL_REF
3107 && SYMBOL_REF_FUNCTION_P (x))
3109 static const char * const directive[2][2] = {
3110 /* 64-bit pointer */ /* 32-bit pointer */
3111 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3112 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3114 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
3115 output_addr_const (asm_out_file, x);
3116 fputs (")\n", asm_out_file);
3117 return true;
3119 return default_assemble_integer (x, size, aligned_p);
3122 /* Emit the function prologue. */
3124 static void
3125 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3127 int mask, grsave, grsave_prev;
3129 if (current_frame_info.need_regstk)
3130 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3131 current_frame_info.n_input_regs,
3132 current_frame_info.n_local_regs,
3133 current_frame_info.n_output_regs,
3134 current_frame_info.n_rotate_regs);
3136 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3137 return;
3139 /* Emit the .prologue directive. */
3141 mask = 0;
3142 grsave = grsave_prev = 0;
3143 if (current_frame_info.reg_save_b0 != 0)
3145 mask |= 8;
3146 grsave = grsave_prev = current_frame_info.reg_save_b0;
3148 if (current_frame_info.reg_save_ar_pfs != 0
3149 && (grsave_prev == 0
3150 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3152 mask |= 4;
3153 if (grsave_prev == 0)
3154 grsave = current_frame_info.reg_save_ar_pfs;
3155 grsave_prev = current_frame_info.reg_save_ar_pfs;
3157 if (current_frame_info.reg_fp != 0
3158 && (grsave_prev == 0
3159 || current_frame_info.reg_fp == grsave_prev + 1))
3161 mask |= 2;
3162 if (grsave_prev == 0)
3163 grsave = HARD_FRAME_POINTER_REGNUM;
3164 grsave_prev = current_frame_info.reg_fp;
3166 if (current_frame_info.reg_save_pr != 0
3167 && (grsave_prev == 0
3168 || current_frame_info.reg_save_pr == grsave_prev + 1))
3170 mask |= 1;
3171 if (grsave_prev == 0)
3172 grsave = current_frame_info.reg_save_pr;
3175 if (mask && TARGET_GNU_AS)
3176 fprintf (file, "\t.prologue %d, %d\n", mask,
3177 ia64_dbx_register_number (grsave));
3178 else
3179 fputs ("\t.prologue\n", file);
3181 /* Emit a .spill directive, if necessary, to relocate the base of
3182 the register spill area. */
3183 if (current_frame_info.spill_cfa_off != -16)
3184 fprintf (file, "\t.spill %ld\n",
3185 (long) (current_frame_info.spill_cfa_off
3186 + current_frame_info.spill_size));
3189 /* Emit the .body directive at the scheduled end of the prologue. */
3191 static void
3192 ia64_output_function_end_prologue (FILE *file)
3194 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3195 return;
3197 fputs ("\t.body\n", file);
3200 /* Emit the function epilogue. */
3202 static void
3203 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3204 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3206 int i;
3208 if (current_frame_info.reg_fp)
3210 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3211 reg_names[HARD_FRAME_POINTER_REGNUM]
3212 = reg_names[current_frame_info.reg_fp];
3213 reg_names[current_frame_info.reg_fp] = tmp;
3215 if (! TARGET_REG_NAMES)
3217 for (i = 0; i < current_frame_info.n_input_regs; i++)
3218 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3219 for (i = 0; i < current_frame_info.n_local_regs; i++)
3220 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3221 for (i = 0; i < current_frame_info.n_output_regs; i++)
3222 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3225 current_frame_info.initialized = 0;
3229 ia64_dbx_register_number (int regno)
3231 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3232 from its home at loc79 to something inside the register frame. We
3233 must perform the same renumbering here for the debug info. */
3234 if (current_frame_info.reg_fp)
3236 if (regno == HARD_FRAME_POINTER_REGNUM)
3237 regno = current_frame_info.reg_fp;
3238 else if (regno == current_frame_info.reg_fp)
3239 regno = HARD_FRAME_POINTER_REGNUM;
3242 if (IN_REGNO_P (regno))
3243 return 32 + regno - IN_REG (0);
3244 else if (LOC_REGNO_P (regno))
3245 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3246 else if (OUT_REGNO_P (regno))
3247 return (32 + current_frame_info.n_input_regs
3248 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3249 else
3250 return regno;
3253 void
3254 ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
3256 rtx addr_reg, eight = GEN_INT (8);
3258 /* The Intel assembler requires that the global __ia64_trampoline symbol
3259 be declared explicitly */
3260 if (!TARGET_GNU_AS)
3262 static bool declared_ia64_trampoline = false;
3264 if (!declared_ia64_trampoline)
3266 declared_ia64_trampoline = true;
3267 (*targetm.asm_out.globalize_label) (asm_out_file,
3268 "__ia64_trampoline");
3272 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
3273 addr = convert_memory_address (Pmode, addr);
3274 fnaddr = convert_memory_address (Pmode, fnaddr);
3275 static_chain = convert_memory_address (Pmode, static_chain);
3277 /* Load up our iterator. */
3278 addr_reg = gen_reg_rtx (Pmode);
3279 emit_move_insn (addr_reg, addr);
3281 /* The first two words are the fake descriptor:
3282 __ia64_trampoline, ADDR+16. */
3283 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3284 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3285 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3287 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3288 copy_to_reg (plus_constant (addr, 16)));
3289 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3291 /* The third word is the target descriptor. */
3292 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3293 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3295 /* The fourth word is the static chain. */
3296 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3299 /* Do any needed setup for a variadic function. CUM has not been updated
3300 for the last named argument which has type TYPE and mode MODE.
3302 We generate the actual spill instructions during prologue generation. */
3304 static void
3305 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3306 tree type, int * pretend_size,
3307 int second_time ATTRIBUTE_UNUSED)
3309 CUMULATIVE_ARGS next_cum = *cum;
3311 /* Skip the current argument. */
3312 ia64_function_arg_advance (&next_cum, mode, type, 1);
3314 if (next_cum.words < MAX_ARGUMENT_SLOTS)
3316 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
3317 *pretend_size = n * UNITS_PER_WORD;
3318 cfun->machine->n_varargs = n;
3322 /* Check whether TYPE is a homogeneous floating point aggregate. If
3323 it is, return the mode of the floating point type that appears
3324 in all leafs. If it is not, return VOIDmode.
3326 An aggregate is a homogeneous floating point aggregate is if all
3327 fields/elements in it have the same floating point type (e.g,
3328 SFmode). 128-bit quad-precision floats are excluded.
3330 Variable sized aggregates should never arrive here, since we should
3331 have already decided to pass them by reference. Top-level zero-sized
3332 aggregates are excluded because our parallels crash the middle-end. */
3334 static enum machine_mode
3335 hfa_element_mode (tree type, bool nested)
3337 enum machine_mode element_mode = VOIDmode;
3338 enum machine_mode mode;
3339 enum tree_code code = TREE_CODE (type);
3340 int know_element_mode = 0;
3341 tree t;
3343 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
3344 return VOIDmode;
3346 switch (code)
3348 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
3349 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
3350 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
3351 case LANG_TYPE: case FUNCTION_TYPE:
3352 return VOIDmode;
3354 /* Fortran complex types are supposed to be HFAs, so we need to handle
3355 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3356 types though. */
3357 case COMPLEX_TYPE:
3358 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3359 && TYPE_MODE (type) != TCmode)
3360 return GET_MODE_INNER (TYPE_MODE (type));
3361 else
3362 return VOIDmode;
3364 case REAL_TYPE:
3365 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3366 mode if this is contained within an aggregate. */
3367 if (nested && TYPE_MODE (type) != TFmode)
3368 return TYPE_MODE (type);
3369 else
3370 return VOIDmode;
3372 case ARRAY_TYPE:
3373 return hfa_element_mode (TREE_TYPE (type), 1);
3375 case RECORD_TYPE:
3376 case UNION_TYPE:
3377 case QUAL_UNION_TYPE:
3378 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3380 if (TREE_CODE (t) != FIELD_DECL)
3381 continue;
3383 mode = hfa_element_mode (TREE_TYPE (t), 1);
3384 if (know_element_mode)
3386 if (mode != element_mode)
3387 return VOIDmode;
3389 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3390 return VOIDmode;
3391 else
3393 know_element_mode = 1;
3394 element_mode = mode;
3397 return element_mode;
3399 default:
3400 /* If we reach here, we probably have some front-end specific type
3401 that the backend doesn't know about. This can happen via the
3402 aggregate_value_p call in init_function_start. All we can do is
3403 ignore unknown tree types. */
3404 return VOIDmode;
3407 return VOIDmode;
3410 /* Return the number of words required to hold a quantity of TYPE and MODE
3411 when passed as an argument. */
3412 static int
3413 ia64_function_arg_words (tree type, enum machine_mode mode)
3415 int words;
3417 if (mode == BLKmode)
3418 words = int_size_in_bytes (type);
3419 else
3420 words = GET_MODE_SIZE (mode);
3422 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
3425 /* Return the number of registers that should be skipped so the current
3426 argument (described by TYPE and WORDS) will be properly aligned.
3428 Integer and float arguments larger than 8 bytes start at the next
3429 even boundary. Aggregates larger than 8 bytes start at the next
3430 even boundary if the aggregate has 16 byte alignment. Note that
3431 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3432 but are still to be aligned in registers.
3434 ??? The ABI does not specify how to handle aggregates with
3435 alignment from 9 to 15 bytes, or greater than 16. We handle them
3436 all as if they had 16 byte alignment. Such aggregates can occur
3437 only if gcc extensions are used. */
3438 static int
3439 ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3441 if ((cum->words & 1) == 0)
3442 return 0;
3444 if (type
3445 && TREE_CODE (type) != INTEGER_TYPE
3446 && TREE_CODE (type) != REAL_TYPE)
3447 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3448 else
3449 return words > 1;
3452 /* Return rtx for register where argument is passed, or zero if it is passed
3453 on the stack. */
3454 /* ??? 128-bit quad-precision floats are always passed in general
3455 registers. */
3458 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3459 int named, int incoming)
3461 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3462 int words = ia64_function_arg_words (type, mode);
3463 int offset = ia64_function_arg_offset (cum, type, words);
3464 enum machine_mode hfa_mode = VOIDmode;
3466 /* If all argument slots are used, then it must go on the stack. */
3467 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3468 return 0;
3470 /* Check for and handle homogeneous FP aggregates. */
3471 if (type)
3472 hfa_mode = hfa_element_mode (type, 0);
3474 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3475 and unprototyped hfas are passed specially. */
3476 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3478 rtx loc[16];
3479 int i = 0;
3480 int fp_regs = cum->fp_regs;
3481 int int_regs = cum->words + offset;
3482 int hfa_size = GET_MODE_SIZE (hfa_mode);
3483 int byte_size;
3484 int args_byte_size;
3486 /* If prototyped, pass it in FR regs then GR regs.
3487 If not prototyped, pass it in both FR and GR regs.
3489 If this is an SFmode aggregate, then it is possible to run out of
3490 FR regs while GR regs are still left. In that case, we pass the
3491 remaining part in the GR regs. */
3493 /* Fill the FP regs. We do this always. We stop if we reach the end
3494 of the argument, the last FP register, or the last argument slot. */
3496 byte_size = ((mode == BLKmode)
3497 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3498 args_byte_size = int_regs * UNITS_PER_WORD;
3499 offset = 0;
3500 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3501 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3503 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3504 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3505 + fp_regs)),
3506 GEN_INT (offset));
3507 offset += hfa_size;
3508 args_byte_size += hfa_size;
3509 fp_regs++;
3512 /* If no prototype, then the whole thing must go in GR regs. */
3513 if (! cum->prototype)
3514 offset = 0;
3515 /* If this is an SFmode aggregate, then we might have some left over
3516 that needs to go in GR regs. */
3517 else if (byte_size != offset)
3518 int_regs += offset / UNITS_PER_WORD;
3520 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3522 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3524 enum machine_mode gr_mode = DImode;
3525 unsigned int gr_size;
3527 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3528 then this goes in a GR reg left adjusted/little endian, right
3529 adjusted/big endian. */
3530 /* ??? Currently this is handled wrong, because 4-byte hunks are
3531 always right adjusted/little endian. */
3532 if (offset & 0x4)
3533 gr_mode = SImode;
3534 /* If we have an even 4 byte hunk because the aggregate is a
3535 multiple of 4 bytes in size, then this goes in a GR reg right
3536 adjusted/little endian. */
3537 else if (byte_size - offset == 4)
3538 gr_mode = SImode;
3540 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3541 gen_rtx_REG (gr_mode, (basereg
3542 + int_regs)),
3543 GEN_INT (offset));
3545 gr_size = GET_MODE_SIZE (gr_mode);
3546 offset += gr_size;
3547 if (gr_size == UNITS_PER_WORD
3548 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
3549 int_regs++;
3550 else if (gr_size > UNITS_PER_WORD)
3551 int_regs += gr_size / UNITS_PER_WORD;
3553 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3556 /* Integral and aggregates go in general registers. If we have run out of
3557 FR registers, then FP values must also go in general registers. This can
3558 happen when we have a SFmode HFA. */
3559 else if (mode == TFmode || mode == TCmode
3560 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3562 int byte_size = ((mode == BLKmode)
3563 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3564 if (BYTES_BIG_ENDIAN
3565 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3566 && byte_size < UNITS_PER_WORD
3567 && byte_size > 0)
3569 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3570 gen_rtx_REG (DImode,
3571 (basereg + cum->words
3572 + offset)),
3573 const0_rtx);
3574 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3576 else
3577 return gen_rtx_REG (mode, basereg + cum->words + offset);
3581 /* If there is a prototype, then FP values go in a FR register when
3582 named, and in a GR register when unnamed. */
3583 else if (cum->prototype)
3585 if (named)
3586 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3587 /* In big-endian mode, an anonymous SFmode value must be represented
3588 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
3589 the value into the high half of the general register. */
3590 else if (BYTES_BIG_ENDIAN && mode == SFmode)
3591 return gen_rtx_PARALLEL (mode,
3592 gen_rtvec (1,
3593 gen_rtx_EXPR_LIST (VOIDmode,
3594 gen_rtx_REG (DImode, basereg + cum->words + offset),
3595 const0_rtx)));
3596 else
3597 return gen_rtx_REG (mode, basereg + cum->words + offset);
3599 /* If there is no prototype, then FP values go in both FR and GR
3600 registers. */
3601 else
3603 /* See comment above. */
3604 enum machine_mode inner_mode =
3605 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
3607 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3608 gen_rtx_REG (mode, (FR_ARG_FIRST
3609 + cum->fp_regs)),
3610 const0_rtx);
3611 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3612 gen_rtx_REG (inner_mode,
3613 (basereg + cum->words
3614 + offset)),
3615 const0_rtx);
3617 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3621 /* Return number of bytes, at the beginning of the argument, that must be
3622 put in registers. 0 is the argument is entirely in registers or entirely
3623 in memory. */
3625 static int
3626 ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3627 tree type, bool named ATTRIBUTE_UNUSED)
3629 int words = ia64_function_arg_words (type, mode);
3630 int offset = ia64_function_arg_offset (cum, type, words);
3632 /* If all argument slots are used, then it must go on the stack. */
3633 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3634 return 0;
3636 /* It doesn't matter whether the argument goes in FR or GR regs. If
3637 it fits within the 8 argument slots, then it goes entirely in
3638 registers. If it extends past the last argument slot, then the rest
3639 goes on the stack. */
3641 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3642 return 0;
3644 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
3647 /* Update CUM to point after this argument. This is patterned after
3648 ia64_function_arg. */
3650 void
3651 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3652 tree type, int named)
3654 int words = ia64_function_arg_words (type, mode);
3655 int offset = ia64_function_arg_offset (cum, type, words);
3656 enum machine_mode hfa_mode = VOIDmode;
3658 /* If all arg slots are already full, then there is nothing to do. */
3659 if (cum->words >= MAX_ARGUMENT_SLOTS)
3660 return;
3662 cum->words += words + offset;
3664 /* Check for and handle homogeneous FP aggregates. */
3665 if (type)
3666 hfa_mode = hfa_element_mode (type, 0);
3668 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3669 and unprototyped hfas are passed specially. */
3670 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3672 int fp_regs = cum->fp_regs;
3673 /* This is the original value of cum->words + offset. */
3674 int int_regs = cum->words - words;
3675 int hfa_size = GET_MODE_SIZE (hfa_mode);
3676 int byte_size;
3677 int args_byte_size;
3679 /* If prototyped, pass it in FR regs then GR regs.
3680 If not prototyped, pass it in both FR and GR regs.
3682 If this is an SFmode aggregate, then it is possible to run out of
3683 FR regs while GR regs are still left. In that case, we pass the
3684 remaining part in the GR regs. */
3686 /* Fill the FP regs. We do this always. We stop if we reach the end
3687 of the argument, the last FP register, or the last argument slot. */
3689 byte_size = ((mode == BLKmode)
3690 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3691 args_byte_size = int_regs * UNITS_PER_WORD;
3692 offset = 0;
3693 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3694 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3696 offset += hfa_size;
3697 args_byte_size += hfa_size;
3698 fp_regs++;
3701 cum->fp_regs = fp_regs;
3704 /* Integral and aggregates go in general registers. So do TFmode FP values.
3705 If we have run out of FR registers, then other FP values must also go in
3706 general registers. This can happen when we have a SFmode HFA. */
3707 else if (mode == TFmode || mode == TCmode
3708 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3709 cum->int_regs = cum->words;
3711 /* If there is a prototype, then FP values go in a FR register when
3712 named, and in a GR register when unnamed. */
3713 else if (cum->prototype)
3715 if (! named)
3716 cum->int_regs = cum->words;
3717 else
3718 /* ??? Complex types should not reach here. */
3719 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3721 /* If there is no prototype, then FP values go in both FR and GR
3722 registers. */
3723 else
3725 /* ??? Complex types should not reach here. */
3726 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3727 cum->int_regs = cum->words;
3731 /* Arguments with alignment larger than 8 bytes start at the next even
3732 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
3733 even though their normal alignment is 8 bytes. See ia64_function_arg. */
3736 ia64_function_arg_boundary (enum machine_mode mode, tree type)
3739 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
3740 return PARM_BOUNDARY * 2;
3742 if (type)
3744 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
3745 return PARM_BOUNDARY * 2;
3746 else
3747 return PARM_BOUNDARY;
3750 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
3751 return PARM_BOUNDARY * 2;
3752 else
3753 return PARM_BOUNDARY;
3756 /* Variable sized types are passed by reference. */
3757 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3759 static bool
3760 ia64_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3761 enum machine_mode mode ATTRIBUTE_UNUSED,
3762 tree type, bool named ATTRIBUTE_UNUSED)
3764 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3767 /* True if it is OK to do sibling call optimization for the specified
3768 call expression EXP. DECL will be the called function, or NULL if
3769 this is an indirect call. */
3770 static bool
3771 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3773 /* We can't perform a sibcall if the current function has the syscall_linkage
3774 attribute. */
3775 if (lookup_attribute ("syscall_linkage",
3776 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
3777 return false;
3779 /* We must always return with our current GP. This means we can
3780 only sibcall to functions defined in the current module. */
3781 return decl && (*targetm.binds_local_p) (decl);
3785 /* Implement va_arg. */
3787 static tree
3788 ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3790 /* Variable sized types are passed by reference. */
3791 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
3793 tree ptrtype = build_pointer_type (type);
3794 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
3795 return build_va_arg_indirect_ref (addr);
3798 /* Aggregate arguments with alignment larger than 8 bytes start at
3799 the next even boundary. Integer and floating point arguments
3800 do so if they are larger than 8 bytes, whether or not they are
3801 also aligned larger than 8 bytes. */
3802 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
3803 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3805 tree t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3806 build_int_cst (NULL_TREE, 2 * UNITS_PER_WORD - 1));
3807 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3808 build_int_cst (NULL_TREE, -2 * UNITS_PER_WORD));
3809 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3810 gimplify_and_add (t, pre_p);
3813 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3816 /* Return 1 if function return value returned in memory. Return 0 if it is
3817 in a register. */
3819 static bool
3820 ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
3822 enum machine_mode mode;
3823 enum machine_mode hfa_mode;
3824 HOST_WIDE_INT byte_size;
3826 mode = TYPE_MODE (valtype);
3827 byte_size = GET_MODE_SIZE (mode);
3828 if (mode == BLKmode)
3830 byte_size = int_size_in_bytes (valtype);
3831 if (byte_size < 0)
3832 return true;
3835 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3837 hfa_mode = hfa_element_mode (valtype, 0);
3838 if (hfa_mode != VOIDmode)
3840 int hfa_size = GET_MODE_SIZE (hfa_mode);
3842 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3843 return true;
3844 else
3845 return false;
3847 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3848 return true;
3849 else
3850 return false;
3853 /* Return rtx for register that holds the function return value. */
3856 ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
3858 enum machine_mode mode;
3859 enum machine_mode hfa_mode;
3861 mode = TYPE_MODE (valtype);
3862 hfa_mode = hfa_element_mode (valtype, 0);
3864 if (hfa_mode != VOIDmode)
3866 rtx loc[8];
3867 int i;
3868 int hfa_size;
3869 int byte_size;
3870 int offset;
3872 hfa_size = GET_MODE_SIZE (hfa_mode);
3873 byte_size = ((mode == BLKmode)
3874 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3875 offset = 0;
3876 for (i = 0; offset < byte_size; i++)
3878 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3879 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3880 GEN_INT (offset));
3881 offset += hfa_size;
3883 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3885 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
3886 return gen_rtx_REG (mode, FR_ARG_FIRST);
3887 else
3889 bool need_parallel = false;
3891 /* In big-endian mode, we need to manage the layout of aggregates
3892 in the registers so that we get the bits properly aligned in
3893 the highpart of the registers. */
3894 if (BYTES_BIG_ENDIAN
3895 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
3896 need_parallel = true;
3898 /* Something like struct S { long double x; char a[0] } is not an
3899 HFA structure, and therefore doesn't go in fp registers. But
3900 the middle-end will give it XFmode anyway, and XFmode values
3901 don't normally fit in integer registers. So we need to smuggle
3902 the value inside a parallel. */
3903 else if (mode == XFmode || mode == XCmode)
3904 need_parallel = true;
3906 if (need_parallel)
3908 rtx loc[8];
3909 int offset;
3910 int bytesize;
3911 int i;
3913 offset = 0;
3914 bytesize = int_size_in_bytes (valtype);
3915 /* An empty PARALLEL is invalid here, but the return value
3916 doesn't matter for empty structs. */
3917 if (bytesize == 0)
3918 return gen_rtx_REG (mode, GR_RET_FIRST);
3919 for (i = 0; offset < bytesize; i++)
3921 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3922 gen_rtx_REG (DImode,
3923 GR_RET_FIRST + i),
3924 GEN_INT (offset));
3925 offset += UNITS_PER_WORD;
3927 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3930 return gen_rtx_REG (mode, GR_RET_FIRST);
3934 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
3935 We need to emit DTP-relative relocations. */
3937 void
3938 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
3940 if (size != 8)
3941 abort ();
3942 fputs ("\tdata8.ua\t@dtprel(", file);
3943 output_addr_const (file, x);
3944 fputs (")", file);
3947 /* Print a memory address as an operand to reference that memory location. */
3949 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3950 also call this from ia64_print_operand for memory addresses. */
3952 void
3953 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
3954 rtx address ATTRIBUTE_UNUSED)
3958 /* Print an operand to an assembler instruction.
3959 C Swap and print a comparison operator.
3960 D Print an FP comparison operator.
3961 E Print 32 - constant, for SImode shifts as extract.
3962 e Print 64 - constant, for DImode rotates.
3963 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3964 a floating point register emitted normally.
3965 I Invert a predicate register by adding 1.
3966 J Select the proper predicate register for a condition.
3967 j Select the inverse predicate register for a condition.
3968 O Append .acq for volatile load.
3969 P Postincrement of a MEM.
3970 Q Append .rel for volatile store.
3971 S Shift amount for shladd instruction.
3972 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3973 for Intel assembler.
3974 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3975 for Intel assembler.
3976 r Print register name, or constant 0 as r0. HP compatibility for
3977 Linux kernel.
3978 v Print vector constant value as an 8-byte integer value. */
3980 void
3981 ia64_print_operand (FILE * file, rtx x, int code)
3983 const char *str;
3985 switch (code)
3987 case 0:
3988 /* Handled below. */
3989 break;
3991 case 'C':
3993 enum rtx_code c = swap_condition (GET_CODE (x));
3994 fputs (GET_RTX_NAME (c), file);
3995 return;
3998 case 'D':
3999 switch (GET_CODE (x))
4001 case NE:
4002 str = "neq";
4003 break;
4004 case UNORDERED:
4005 str = "unord";
4006 break;
4007 case ORDERED:
4008 str = "ord";
4009 break;
4010 default:
4011 str = GET_RTX_NAME (GET_CODE (x));
4012 break;
4014 fputs (str, file);
4015 return;
4017 case 'E':
4018 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4019 return;
4021 case 'e':
4022 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4023 return;
4025 case 'F':
4026 if (x == CONST0_RTX (GET_MODE (x)))
4027 str = reg_names [FR_REG (0)];
4028 else if (x == CONST1_RTX (GET_MODE (x)))
4029 str = reg_names [FR_REG (1)];
4030 else if (GET_CODE (x) == REG)
4031 str = reg_names [REGNO (x)];
4032 else
4033 abort ();
4034 fputs (str, file);
4035 return;
4037 case 'I':
4038 fputs (reg_names [REGNO (x) + 1], file);
4039 return;
4041 case 'J':
4042 case 'j':
4044 unsigned int regno = REGNO (XEXP (x, 0));
4045 if (GET_CODE (x) == EQ)
4046 regno += 1;
4047 if (code == 'j')
4048 regno ^= 1;
4049 fputs (reg_names [regno], file);
4051 return;
4053 case 'O':
4054 if (MEM_VOLATILE_P (x))
4055 fputs(".acq", file);
4056 return;
4058 case 'P':
4060 HOST_WIDE_INT value;
4062 switch (GET_CODE (XEXP (x, 0)))
4064 default:
4065 return;
4067 case POST_MODIFY:
4068 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4069 if (GET_CODE (x) == CONST_INT)
4070 value = INTVAL (x);
4071 else if (GET_CODE (x) == REG)
4073 fprintf (file, ", %s", reg_names[REGNO (x)]);
4074 return;
4076 else
4077 abort ();
4078 break;
4080 case POST_INC:
4081 value = GET_MODE_SIZE (GET_MODE (x));
4082 break;
4084 case POST_DEC:
4085 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4086 break;
4089 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4090 return;
4093 case 'Q':
4094 if (MEM_VOLATILE_P (x))
4095 fputs(".rel", file);
4096 return;
4098 case 'S':
4099 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4100 return;
4102 case 'T':
4103 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4105 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4106 return;
4108 break;
4110 case 'U':
4111 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4113 const char *prefix = "0x";
4114 if (INTVAL (x) & 0x80000000)
4116 fprintf (file, "0xffffffff");
4117 prefix = "";
4119 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4120 return;
4122 break;
4124 case 'r':
4125 /* If this operand is the constant zero, write it as register zero.
4126 Any register, zero, or CONST_INT value is OK here. */
4127 if (GET_CODE (x) == REG)
4128 fputs (reg_names[REGNO (x)], file);
4129 else if (x == CONST0_RTX (GET_MODE (x)))
4130 fputs ("r0", file);
4131 else if (GET_CODE (x) == CONST_INT)
4132 output_addr_const (file, x);
4133 else
4134 output_operand_lossage ("invalid %%r value");
4135 return;
4137 case 'v':
4138 gcc_assert (GET_CODE (x) == CONST_VECTOR);
4139 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
4140 break;
4142 case '+':
4144 const char *which;
4146 /* For conditional branches, returns or calls, substitute
4147 sptk, dptk, dpnt, or spnt for %s. */
4148 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4149 if (x)
4151 int pred_val = INTVAL (XEXP (x, 0));
4153 /* Guess top and bottom 10% statically predicted. */
4154 if (pred_val < REG_BR_PROB_BASE / 50)
4155 which = ".spnt";
4156 else if (pred_val < REG_BR_PROB_BASE / 2)
4157 which = ".dpnt";
4158 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
4159 which = ".dptk";
4160 else
4161 which = ".sptk";
4163 else if (GET_CODE (current_output_insn) == CALL_INSN)
4164 which = ".sptk";
4165 else
4166 which = ".dptk";
4168 fputs (which, file);
4169 return;
4172 case ',':
4173 x = current_insn_predicate;
4174 if (x)
4176 unsigned int regno = REGNO (XEXP (x, 0));
4177 if (GET_CODE (x) == EQ)
4178 regno += 1;
4179 fprintf (file, "(%s) ", reg_names [regno]);
4181 return;
4183 default:
4184 output_operand_lossage ("ia64_print_operand: unknown code");
4185 return;
4188 switch (GET_CODE (x))
4190 /* This happens for the spill/restore instructions. */
4191 case POST_INC:
4192 case POST_DEC:
4193 case POST_MODIFY:
4194 x = XEXP (x, 0);
4195 /* ... fall through ... */
4197 case REG:
4198 fputs (reg_names [REGNO (x)], file);
4199 break;
4201 case MEM:
4203 rtx addr = XEXP (x, 0);
4204 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
4205 addr = XEXP (addr, 0);
4206 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4207 break;
4210 default:
4211 output_addr_const (file, x);
4212 break;
4215 return;
4218 /* Compute a (partial) cost for rtx X. Return true if the complete
4219 cost has been computed, and false if subexpressions should be
4220 scanned. In either case, *TOTAL contains the cost result. */
4221 /* ??? This is incomplete. */
4223 static bool
4224 ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
4226 switch (code)
4228 case CONST_INT:
4229 switch (outer_code)
4231 case SET:
4232 *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4233 return true;
4234 case PLUS:
4235 if (CONST_OK_FOR_I (INTVAL (x)))
4236 *total = 0;
4237 else if (CONST_OK_FOR_J (INTVAL (x)))
4238 *total = 1;
4239 else
4240 *total = COSTS_N_INSNS (1);
4241 return true;
4242 default:
4243 if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4244 *total = 0;
4245 else
4246 *total = COSTS_N_INSNS (1);
4247 return true;
4250 case CONST_DOUBLE:
4251 *total = COSTS_N_INSNS (1);
4252 return true;
4254 case CONST:
4255 case SYMBOL_REF:
4256 case LABEL_REF:
4257 *total = COSTS_N_INSNS (3);
4258 return true;
4260 case MULT:
4261 /* For multiplies wider than HImode, we have to go to the FPU,
4262 which normally involves copies. Plus there's the latency
4263 of the multiply itself, and the latency of the instructions to
4264 transfer integer regs to FP regs. */
4265 /* ??? Check for FP mode. */
4266 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4267 *total = COSTS_N_INSNS (10);
4268 else
4269 *total = COSTS_N_INSNS (2);
4270 return true;
4272 case PLUS:
4273 case MINUS:
4274 case ASHIFT:
4275 case ASHIFTRT:
4276 case LSHIFTRT:
4277 *total = COSTS_N_INSNS (1);
4278 return true;
4280 case DIV:
4281 case UDIV:
4282 case MOD:
4283 case UMOD:
4284 /* We make divide expensive, so that divide-by-constant will be
4285 optimized to a multiply. */
4286 *total = COSTS_N_INSNS (60);
4287 return true;
4289 default:
4290 return false;
4294 /* Calculate the cost of moving data from a register in class FROM to
4295 one in class TO, using MODE. */
4298 ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
4299 enum reg_class to)
4301 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4302 if (to == ADDL_REGS)
4303 to = GR_REGS;
4304 if (from == ADDL_REGS)
4305 from = GR_REGS;
4307 /* All costs are symmetric, so reduce cases by putting the
4308 lower number class as the destination. */
4309 if (from < to)
4311 enum reg_class tmp = to;
4312 to = from, from = tmp;
4315 /* Moving from FR<->GR in XFmode must be more expensive than 2,
4316 so that we get secondary memory reloads. Between FR_REGS,
4317 we have to make this at least as expensive as MEMORY_MOVE_COST
4318 to avoid spectacularly poor register class preferencing. */
4319 if (mode == XFmode)
4321 if (to != GR_REGS || from != GR_REGS)
4322 return MEMORY_MOVE_COST (mode, to, 0);
4323 else
4324 return 3;
4327 switch (to)
4329 case PR_REGS:
4330 /* Moving between PR registers takes two insns. */
4331 if (from == PR_REGS)
4332 return 3;
4333 /* Moving between PR and anything but GR is impossible. */
4334 if (from != GR_REGS)
4335 return MEMORY_MOVE_COST (mode, to, 0);
4336 break;
4338 case BR_REGS:
4339 /* Moving between BR and anything but GR is impossible. */
4340 if (from != GR_REGS && from != GR_AND_BR_REGS)
4341 return MEMORY_MOVE_COST (mode, to, 0);
4342 break;
4344 case AR_I_REGS:
4345 case AR_M_REGS:
4346 /* Moving between AR and anything but GR is impossible. */
4347 if (from != GR_REGS)
4348 return MEMORY_MOVE_COST (mode, to, 0);
4349 break;
4351 case GR_REGS:
4352 case FR_REGS:
4353 case GR_AND_FR_REGS:
4354 case GR_AND_BR_REGS:
4355 case ALL_REGS:
4356 break;
4358 default:
4359 abort ();
4362 return 2;
4365 /* Implement PREFERRED_RELOAD_CLASS. Place additional restrictions on CLASS
4366 to use when copying X into that class. */
4368 enum reg_class
4369 ia64_preferred_reload_class (rtx x, enum reg_class class)
4371 switch (class)
4373 case FR_REGS:
4374 /* Don't allow volatile mem reloads into floating point registers.
4375 This is defined to force reload to choose the r/m case instead
4376 of the f/f case when reloading (set (reg fX) (mem/v)). */
4377 if (MEM_P (x) && MEM_VOLATILE_P (x))
4378 return NO_REGS;
4380 /* Force all unrecognized constants into the constant pool. */
4381 if (CONSTANT_P (x))
4382 return NO_REGS;
4383 break;
4385 case AR_M_REGS:
4386 case AR_I_REGS:
4387 if (!OBJECT_P (x))
4388 return NO_REGS;
4389 break;
4391 default:
4392 break;
4395 return class;
4398 /* This function returns the register class required for a secondary
4399 register when copying between one of the registers in CLASS, and X,
4400 using MODE. A return value of NO_REGS means that no secondary register
4401 is required. */
4403 enum reg_class
4404 ia64_secondary_reload_class (enum reg_class class,
4405 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
4407 int regno = -1;
4409 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4410 regno = true_regnum (x);
4412 switch (class)
4414 case BR_REGS:
4415 case AR_M_REGS:
4416 case AR_I_REGS:
4417 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4418 interaction. We end up with two pseudos with overlapping lifetimes
4419 both of which are equiv to the same constant, and both which need
4420 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
4421 changes depending on the path length, which means the qty_first_reg
4422 check in make_regs_eqv can give different answers at different times.
4423 At some point I'll probably need a reload_indi pattern to handle
4424 this.
4426 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4427 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
4428 non-general registers for good measure. */
4429 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4430 return GR_REGS;
4432 /* This is needed if a pseudo used as a call_operand gets spilled to a
4433 stack slot. */
4434 if (GET_CODE (x) == MEM)
4435 return GR_REGS;
4436 break;
4438 case FR_REGS:
4439 /* Need to go through general registers to get to other class regs. */
4440 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4441 return GR_REGS;
4443 /* This can happen when a paradoxical subreg is an operand to the
4444 muldi3 pattern. */
4445 /* ??? This shouldn't be necessary after instruction scheduling is
4446 enabled, because paradoxical subregs are not accepted by
4447 register_operand when INSN_SCHEDULING is defined. Or alternatively,
4448 stop the paradoxical subreg stupidity in the *_operand functions
4449 in recog.c. */
4450 if (GET_CODE (x) == MEM
4451 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4452 || GET_MODE (x) == QImode))
4453 return GR_REGS;
4455 /* This can happen because of the ior/and/etc patterns that accept FP
4456 registers as operands. If the third operand is a constant, then it
4457 needs to be reloaded into a FP register. */
4458 if (GET_CODE (x) == CONST_INT)
4459 return GR_REGS;
4461 /* This can happen because of register elimination in a muldi3 insn.
4462 E.g. `26107 * (unsigned long)&u'. */
4463 if (GET_CODE (x) == PLUS)
4464 return GR_REGS;
4465 break;
4467 case PR_REGS:
4468 /* ??? This happens if we cse/gcse a BImode value across a call,
4469 and the function has a nonlocal goto. This is because global
4470 does not allocate call crossing pseudos to hard registers when
4471 current_function_has_nonlocal_goto is true. This is relatively
4472 common for C++ programs that use exceptions. To reproduce,
4473 return NO_REGS and compile libstdc++. */
4474 if (GET_CODE (x) == MEM)
4475 return GR_REGS;
4477 /* This can happen when we take a BImode subreg of a DImode value,
4478 and that DImode value winds up in some non-GR register. */
4479 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4480 return GR_REGS;
4481 break;
4483 default:
4484 break;
4487 return NO_REGS;
4491 /* Emit text to declare externally defined variables and functions, because
4492 the Intel assembler does not support undefined externals. */
4494 void
4495 ia64_asm_output_external (FILE *file, tree decl, const char *name)
4497 int save_referenced;
4499 /* GNU as does not need anything here, but the HP linker does need
4500 something for external functions. */
4502 if (TARGET_GNU_AS
4503 && (!TARGET_HPUX_LD
4504 || TREE_CODE (decl) != FUNCTION_DECL
4505 || strstr (name, "__builtin_") == name))
4506 return;
4508 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4509 the linker when we do this, so we need to be careful not to do this for
4510 builtin functions which have no library equivalent. Unfortunately, we
4511 can't tell here whether or not a function will actually be called by
4512 expand_expr, so we pull in library functions even if we may not need
4513 them later. */
4514 if (! strcmp (name, "__builtin_next_arg")
4515 || ! strcmp (name, "alloca")
4516 || ! strcmp (name, "__builtin_constant_p")
4517 || ! strcmp (name, "__builtin_args_info"))
4518 return;
4520 if (TARGET_HPUX_LD)
4521 ia64_hpux_add_extern_decl (decl);
4522 else
4524 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4525 restore it. */
4526 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4527 if (TREE_CODE (decl) == FUNCTION_DECL)
4528 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4529 (*targetm.asm_out.globalize_label) (file, name);
4530 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4534 /* Parse the -mfixed-range= option string. */
4536 static void
4537 fix_range (const char *const_str)
4539 int i, first, last;
4540 char *str, *dash, *comma;
4542 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4543 REG2 are either register names or register numbers. The effect
4544 of this option is to mark the registers in the range from REG1 to
4545 REG2 as ``fixed'' so they won't be used by the compiler. This is
4546 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4548 i = strlen (const_str);
4549 str = (char *) alloca (i + 1);
4550 memcpy (str, const_str, i + 1);
4552 while (1)
4554 dash = strchr (str, '-');
4555 if (!dash)
4557 warning ("value of -mfixed-range must have form REG1-REG2");
4558 return;
4560 *dash = '\0';
4562 comma = strchr (dash + 1, ',');
4563 if (comma)
4564 *comma = '\0';
4566 first = decode_reg_name (str);
4567 if (first < 0)
4569 warning ("unknown register name: %s", str);
4570 return;
4573 last = decode_reg_name (dash + 1);
4574 if (last < 0)
4576 warning ("unknown register name: %s", dash + 1);
4577 return;
4580 *dash = '-';
4582 if (first > last)
4584 warning ("%s-%s is an empty range", str, dash + 1);
4585 return;
4588 for (i = first; i <= last; ++i)
4589 fixed_regs[i] = call_used_regs[i] = 1;
4591 if (!comma)
4592 break;
4594 *comma = ',';
4595 str = comma + 1;
4599 /* Implement TARGET_HANDLE_OPTION. */
4601 static bool
4602 ia64_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
4604 switch (code)
4606 case OPT_mfixed_range_:
4607 fix_range (arg);
4608 return true;
4610 case OPT_mtls_size_:
4612 char *end;
4613 unsigned long tmp = strtoul (arg, &end, 10);
4614 if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4615 error ("bad value %<%s%> for -mtls-size= switch", arg);
4616 else
4617 ia64_tls_size = tmp;
4618 return true;
4621 case OPT_mtune_:
4623 static struct pta
4625 const char *name; /* processor name or nickname. */
4626 enum processor_type processor;
4628 const processor_alias_table[] =
4630 {"itanium", PROCESSOR_ITANIUM},
4631 {"itanium1", PROCESSOR_ITANIUM},
4632 {"merced", PROCESSOR_ITANIUM},
4633 {"itanium2", PROCESSOR_ITANIUM2},
4634 {"mckinley", PROCESSOR_ITANIUM2},
4636 int const pta_size = ARRAY_SIZE (processor_alias_table);
4637 int i;
4639 for (i = 0; i < pta_size; i++)
4640 if (!strcmp (arg, processor_alias_table[i].name))
4642 ia64_tune = processor_alias_table[i].processor;
4643 break;
4645 if (i == pta_size)
4646 error ("bad value %<%s%> for -mtune= switch", arg);
4647 return true;
4650 default:
4651 return true;
4655 /* Handle TARGET_OPTIONS switches. */
4657 void
4658 ia64_override_options (void)
4660 if (TARGET_AUTO_PIC)
4661 target_flags |= MASK_CONST_GP;
4663 if (TARGET_INLINE_SQRT == INL_MIN_LAT)
4665 warning ("not yet implemented: latency-optimized inline square root");
4666 TARGET_INLINE_SQRT = INL_MAX_THR;
4669 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4670 flag_schedule_insns_after_reload = 0;
4672 /* Variable tracking should be run after all optimizations which change order
4673 of insns. It also needs a valid CFG. */
4674 ia64_flag_var_tracking = flag_var_tracking;
4675 flag_var_tracking = 0;
4677 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4679 init_machine_status = ia64_init_machine_status;
4682 static struct machine_function *
4683 ia64_init_machine_status (void)
4685 return ggc_alloc_cleared (sizeof (struct machine_function));
4688 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
4689 static enum attr_type ia64_safe_type (rtx);
4691 static enum attr_itanium_class
4692 ia64_safe_itanium_class (rtx insn)
4694 if (recog_memoized (insn) >= 0)
4695 return get_attr_itanium_class (insn);
4696 else
4697 return ITANIUM_CLASS_UNKNOWN;
4700 static enum attr_type
4701 ia64_safe_type (rtx insn)
4703 if (recog_memoized (insn) >= 0)
4704 return get_attr_type (insn);
4705 else
4706 return TYPE_UNKNOWN;
4709 /* The following collection of routines emit instruction group stop bits as
4710 necessary to avoid dependencies. */
4712 /* Need to track some additional registers as far as serialization is
4713 concerned so we can properly handle br.call and br.ret. We could
4714 make these registers visible to gcc, but since these registers are
4715 never explicitly used in gcc generated code, it seems wasteful to
4716 do so (plus it would make the call and return patterns needlessly
4717 complex). */
4718 #define REG_RP (BR_REG (0))
4719 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4720 /* This is used for volatile asms which may require a stop bit immediately
4721 before and after them. */
4722 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4723 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4724 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4726 /* For each register, we keep track of how it has been written in the
4727 current instruction group.
4729 If a register is written unconditionally (no qualifying predicate),
4730 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4732 If a register is written if its qualifying predicate P is true, we
4733 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4734 may be written again by the complement of P (P^1) and when this happens,
4735 WRITE_COUNT gets set to 2.
4737 The result of this is that whenever an insn attempts to write a register
4738 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4740 If a predicate register is written by a floating-point insn, we set
4741 WRITTEN_BY_FP to true.
4743 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4744 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4746 struct reg_write_state
4748 unsigned int write_count : 2;
4749 unsigned int first_pred : 16;
4750 unsigned int written_by_fp : 1;
4751 unsigned int written_by_and : 1;
4752 unsigned int written_by_or : 1;
4755 /* Cumulative info for the current instruction group. */
4756 struct reg_write_state rws_sum[NUM_REGS];
4757 /* Info for the current instruction. This gets copied to rws_sum after a
4758 stop bit is emitted. */
4759 struct reg_write_state rws_insn[NUM_REGS];
4761 /* Indicates whether this is the first instruction after a stop bit,
4762 in which case we don't need another stop bit. Without this, we hit
4763 the abort in ia64_variable_issue when scheduling an alloc. */
4764 static int first_instruction;
4766 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4767 RTL for one instruction. */
4768 struct reg_flags
4770 unsigned int is_write : 1; /* Is register being written? */
4771 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4772 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4773 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4774 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4775 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4778 static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
4779 static int rws_access_regno (int, struct reg_flags, int);
4780 static int rws_access_reg (rtx, struct reg_flags, int);
4781 static void update_set_flags (rtx, struct reg_flags *, int *, rtx *);
4782 static int set_src_needs_barrier (rtx, struct reg_flags, int, rtx);
4783 static int rtx_needs_barrier (rtx, struct reg_flags, int);
4784 static void init_insn_group_barriers (void);
4785 static int group_barrier_needed_p (rtx);
4786 static int safe_group_barrier_needed_p (rtx);
4788 /* Update *RWS for REGNO, which is being written by the current instruction,
4789 with predicate PRED, and associated register flags in FLAGS. */
4791 static void
4792 rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
4794 if (pred)
4795 rws[regno].write_count++;
4796 else
4797 rws[regno].write_count = 2;
4798 rws[regno].written_by_fp |= flags.is_fp;
4799 /* ??? Not tracking and/or across differing predicates. */
4800 rws[regno].written_by_and = flags.is_and;
4801 rws[regno].written_by_or = flags.is_or;
4802 rws[regno].first_pred = pred;
4805 /* Handle an access to register REGNO of type FLAGS using predicate register
4806 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4807 a dependency with an earlier instruction in the same group. */
4809 static int
4810 rws_access_regno (int regno, struct reg_flags flags, int pred)
4812 int need_barrier = 0;
4814 if (regno >= NUM_REGS)
4815 abort ();
4817 if (! PR_REGNO_P (regno))
4818 flags.is_and = flags.is_or = 0;
4820 if (flags.is_write)
4822 int write_count;
4824 /* One insn writes same reg multiple times? */
4825 if (rws_insn[regno].write_count > 0)
4826 abort ();
4828 /* Update info for current instruction. */
4829 rws_update (rws_insn, regno, flags, pred);
4830 write_count = rws_sum[regno].write_count;
4832 switch (write_count)
4834 case 0:
4835 /* The register has not been written yet. */
4836 rws_update (rws_sum, regno, flags, pred);
4837 break;
4839 case 1:
4840 /* The register has been written via a predicate. If this is
4841 not a complementary predicate, then we need a barrier. */
4842 /* ??? This assumes that P and P+1 are always complementary
4843 predicates for P even. */
4844 if (flags.is_and && rws_sum[regno].written_by_and)
4846 else if (flags.is_or && rws_sum[regno].written_by_or)
4848 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4849 need_barrier = 1;
4850 rws_update (rws_sum, regno, flags, pred);
4851 break;
4853 case 2:
4854 /* The register has been unconditionally written already. We
4855 need a barrier. */
4856 if (flags.is_and && rws_sum[regno].written_by_and)
4858 else if (flags.is_or && rws_sum[regno].written_by_or)
4860 else
4861 need_barrier = 1;
4862 rws_sum[regno].written_by_and = flags.is_and;
4863 rws_sum[regno].written_by_or = flags.is_or;
4864 break;
4866 default:
4867 abort ();
4870 else
4872 if (flags.is_branch)
4874 /* Branches have several RAW exceptions that allow to avoid
4875 barriers. */
4877 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4878 /* RAW dependencies on branch regs are permissible as long
4879 as the writer is a non-branch instruction. Since we
4880 never generate code that uses a branch register written
4881 by a branch instruction, handling this case is
4882 easy. */
4883 return 0;
4885 if (REGNO_REG_CLASS (regno) == PR_REGS
4886 && ! rws_sum[regno].written_by_fp)
4887 /* The predicates of a branch are available within the
4888 same insn group as long as the predicate was written by
4889 something other than a floating-point instruction. */
4890 return 0;
4893 if (flags.is_and && rws_sum[regno].written_by_and)
4894 return 0;
4895 if (flags.is_or && rws_sum[regno].written_by_or)
4896 return 0;
4898 switch (rws_sum[regno].write_count)
4900 case 0:
4901 /* The register has not been written yet. */
4902 break;
4904 case 1:
4905 /* The register has been written via a predicate. If this is
4906 not a complementary predicate, then we need a barrier. */
4907 /* ??? This assumes that P and P+1 are always complementary
4908 predicates for P even. */
4909 if ((rws_sum[regno].first_pred ^ 1) != pred)
4910 need_barrier = 1;
4911 break;
4913 case 2:
4914 /* The register has been unconditionally written already. We
4915 need a barrier. */
4916 need_barrier = 1;
4917 break;
4919 default:
4920 abort ();
4924 return need_barrier;
4927 static int
4928 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
4930 int regno = REGNO (reg);
4931 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4933 if (n == 1)
4934 return rws_access_regno (regno, flags, pred);
4935 else
4937 int need_barrier = 0;
4938 while (--n >= 0)
4939 need_barrier |= rws_access_regno (regno + n, flags, pred);
4940 return need_barrier;
4944 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4945 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4947 static void
4948 update_set_flags (rtx x, struct reg_flags *pflags, int *ppred, rtx *pcond)
4950 rtx src = SET_SRC (x);
4952 *pcond = 0;
4954 switch (GET_CODE (src))
4956 case CALL:
4957 return;
4959 case IF_THEN_ELSE:
4960 if (SET_DEST (x) == pc_rtx)
4961 /* X is a conditional branch. */
4962 return;
4963 else
4965 int is_complemented = 0;
4967 /* X is a conditional move. */
4968 rtx cond = XEXP (src, 0);
4969 if (GET_CODE (cond) == EQ)
4970 is_complemented = 1;
4971 cond = XEXP (cond, 0);
4972 if (GET_CODE (cond) != REG
4973 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4974 abort ();
4975 *pcond = cond;
4976 if (XEXP (src, 1) == SET_DEST (x)
4977 || XEXP (src, 2) == SET_DEST (x))
4979 /* X is a conditional move that conditionally writes the
4980 destination. */
4982 /* We need another complement in this case. */
4983 if (XEXP (src, 1) == SET_DEST (x))
4984 is_complemented = ! is_complemented;
4986 *ppred = REGNO (cond);
4987 if (is_complemented)
4988 ++*ppred;
4991 /* ??? If this is a conditional write to the dest, then this
4992 instruction does not actually read one source. This probably
4993 doesn't matter, because that source is also the dest. */
4994 /* ??? Multiple writes to predicate registers are allowed
4995 if they are all AND type compares, or if they are all OR
4996 type compares. We do not generate such instructions
4997 currently. */
4999 /* ... fall through ... */
5001 default:
5002 if (COMPARISON_P (src)
5003 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
5004 /* Set pflags->is_fp to 1 so that we know we're dealing
5005 with a floating point comparison when processing the
5006 destination of the SET. */
5007 pflags->is_fp = 1;
5009 /* Discover if this is a parallel comparison. We only handle
5010 and.orcm and or.andcm at present, since we must retain a
5011 strict inverse on the predicate pair. */
5012 else if (GET_CODE (src) == AND)
5013 pflags->is_and = 1;
5014 else if (GET_CODE (src) == IOR)
5015 pflags->is_or = 1;
5017 break;
5021 /* Subroutine of rtx_needs_barrier; this function determines whether the
5022 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5023 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5024 for this insn. */
5026 static int
5027 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred, rtx cond)
5029 int need_barrier = 0;
5030 rtx dst;
5031 rtx src = SET_SRC (x);
5033 if (GET_CODE (src) == CALL)
5034 /* We don't need to worry about the result registers that
5035 get written by subroutine call. */
5036 return rtx_needs_barrier (src, flags, pred);
5037 else if (SET_DEST (x) == pc_rtx)
5039 /* X is a conditional branch. */
5040 /* ??? This seems redundant, as the caller sets this bit for
5041 all JUMP_INSNs. */
5042 flags.is_branch = 1;
5043 return rtx_needs_barrier (src, flags, pred);
5046 need_barrier = rtx_needs_barrier (src, flags, pred);
5048 /* This instruction unconditionally uses a predicate register. */
5049 if (cond)
5050 need_barrier |= rws_access_reg (cond, flags, 0);
5052 dst = SET_DEST (x);
5053 if (GET_CODE (dst) == ZERO_EXTRACT)
5055 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5056 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5057 dst = XEXP (dst, 0);
5059 return need_barrier;
5062 /* Handle an access to rtx X of type FLAGS using predicate register
5063 PRED. Return 1 if this access creates a dependency with an earlier
5064 instruction in the same group. */
5066 static int
5067 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5069 int i, j;
5070 int is_complemented = 0;
5071 int need_barrier = 0;
5072 const char *format_ptr;
5073 struct reg_flags new_flags;
5074 rtx cond = 0;
5076 if (! x)
5077 return 0;
5079 new_flags = flags;
5081 switch (GET_CODE (x))
5083 case SET:
5084 update_set_flags (x, &new_flags, &pred, &cond);
5085 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
5086 if (GET_CODE (SET_SRC (x)) != CALL)
5088 new_flags.is_write = 1;
5089 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5091 break;
5093 case CALL:
5094 new_flags.is_write = 0;
5095 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5097 /* Avoid multiple register writes, in case this is a pattern with
5098 multiple CALL rtx. This avoids an abort in rws_access_reg. */
5099 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
5101 new_flags.is_write = 1;
5102 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5103 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5104 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5106 break;
5108 case COND_EXEC:
5109 /* X is a predicated instruction. */
5111 cond = COND_EXEC_TEST (x);
5112 if (pred)
5113 abort ();
5114 need_barrier = rtx_needs_barrier (cond, flags, 0);
5116 if (GET_CODE (cond) == EQ)
5117 is_complemented = 1;
5118 cond = XEXP (cond, 0);
5119 if (GET_CODE (cond) != REG
5120 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
5121 abort ();
5122 pred = REGNO (cond);
5123 if (is_complemented)
5124 ++pred;
5126 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5127 return need_barrier;
5129 case CLOBBER:
5130 case USE:
5131 /* Clobber & use are for earlier compiler-phases only. */
5132 break;
5134 case ASM_OPERANDS:
5135 case ASM_INPUT:
5136 /* We always emit stop bits for traditional asms. We emit stop bits
5137 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
5138 if (GET_CODE (x) != ASM_OPERANDS
5139 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5141 /* Avoid writing the register multiple times if we have multiple
5142 asm outputs. This avoids an abort in rws_access_reg. */
5143 if (! rws_insn[REG_VOLATILE].write_count)
5145 new_flags.is_write = 1;
5146 rws_access_regno (REG_VOLATILE, new_flags, pred);
5148 return 1;
5151 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5152 We cannot just fall through here since then we would be confused
5153 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5154 traditional asms unlike their normal usage. */
5156 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5157 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5158 need_barrier = 1;
5159 break;
5161 case PARALLEL:
5162 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5164 rtx pat = XVECEXP (x, 0, i);
5165 switch (GET_CODE (pat))
5167 case SET:
5168 update_set_flags (pat, &new_flags, &pred, &cond);
5169 need_barrier |= set_src_needs_barrier (pat, new_flags,
5170 pred, cond);
5171 break;
5173 case USE:
5174 case CALL:
5175 case ASM_OPERANDS:
5176 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5177 break;
5179 case CLOBBER:
5180 case RETURN:
5181 break;
5183 default:
5184 gcc_unreachable ();
5187 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5189 rtx pat = XVECEXP (x, 0, i);
5190 if (GET_CODE (pat) == SET)
5192 if (GET_CODE (SET_SRC (pat)) != CALL)
5194 new_flags.is_write = 1;
5195 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5196 pred);
5199 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5200 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5202 break;
5204 case SUBREG:
5205 x = SUBREG_REG (x);
5206 /* FALLTHRU */
5207 case REG:
5208 if (REGNO (x) == AR_UNAT_REGNUM)
5210 for (i = 0; i < 64; ++i)
5211 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5213 else
5214 need_barrier = rws_access_reg (x, flags, pred);
5215 break;
5217 case MEM:
5218 /* Find the regs used in memory address computation. */
5219 new_flags.is_write = 0;
5220 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5221 break;
5223 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
5224 case SYMBOL_REF: case LABEL_REF: case CONST:
5225 break;
5227 /* Operators with side-effects. */
5228 case POST_INC: case POST_DEC:
5229 if (GET_CODE (XEXP (x, 0)) != REG)
5230 abort ();
5232 new_flags.is_write = 0;
5233 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5234 new_flags.is_write = 1;
5235 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5236 break;
5238 case POST_MODIFY:
5239 if (GET_CODE (XEXP (x, 0)) != REG)
5240 abort ();
5242 new_flags.is_write = 0;
5243 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5244 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5245 new_flags.is_write = 1;
5246 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5247 break;
5249 /* Handle common unary and binary ops for efficiency. */
5250 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
5251 case MOD: case UDIV: case UMOD: case AND: case IOR:
5252 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
5253 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
5254 case NE: case EQ: case GE: case GT: case LE:
5255 case LT: case GEU: case GTU: case LEU: case LTU:
5256 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5257 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5258 break;
5260 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
5261 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
5262 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
5263 case SQRT: case FFS: case POPCOUNT:
5264 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5265 break;
5267 case VEC_SELECT:
5268 /* VEC_SELECT's second argument is a PARALLEL with integers that
5269 describe the elements selected. On ia64, those integers are
5270 always constants. Avoid walking the PARALLEL so that we don't
5271 get confused with "normal" parallels and abort. */
5272 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5273 break;
5275 case UNSPEC:
5276 switch (XINT (x, 1))
5278 case UNSPEC_LTOFF_DTPMOD:
5279 case UNSPEC_LTOFF_DTPREL:
5280 case UNSPEC_DTPREL:
5281 case UNSPEC_LTOFF_TPREL:
5282 case UNSPEC_TPREL:
5283 case UNSPEC_PRED_REL_MUTEX:
5284 case UNSPEC_PIC_CALL:
5285 case UNSPEC_MF:
5286 case UNSPEC_FETCHADD_ACQ:
5287 case UNSPEC_BSP_VALUE:
5288 case UNSPEC_FLUSHRS:
5289 case UNSPEC_BUNDLE_SELECTOR:
5290 break;
5292 case UNSPEC_GR_SPILL:
5293 case UNSPEC_GR_RESTORE:
5295 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5296 HOST_WIDE_INT bit = (offset >> 3) & 63;
5298 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5299 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
5300 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5301 new_flags, pred);
5302 break;
5305 case UNSPEC_FR_SPILL:
5306 case UNSPEC_FR_RESTORE:
5307 case UNSPEC_GETF_EXP:
5308 case UNSPEC_SETF_EXP:
5309 case UNSPEC_ADDP4:
5310 case UNSPEC_FR_SQRT_RECIP_APPROX:
5311 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5312 break;
5314 case UNSPEC_FR_RECIP_APPROX:
5315 case UNSPEC_SHRP:
5316 case UNSPEC_COPYSIGN:
5317 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5318 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5319 break;
5321 case UNSPEC_CMPXCHG_ACQ:
5322 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5323 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5324 break;
5326 default:
5327 abort ();
5329 break;
5331 case UNSPEC_VOLATILE:
5332 switch (XINT (x, 1))
5334 case UNSPECV_ALLOC:
5335 /* Alloc must always be the first instruction of a group.
5336 We force this by always returning true. */
5337 /* ??? We might get better scheduling if we explicitly check for
5338 input/local/output register dependencies, and modify the
5339 scheduler so that alloc is always reordered to the start of
5340 the current group. We could then eliminate all of the
5341 first_instruction code. */
5342 rws_access_regno (AR_PFS_REGNUM, flags, pred);
5344 new_flags.is_write = 1;
5345 rws_access_regno (REG_AR_CFM, new_flags, pred);
5346 return 1;
5348 case UNSPECV_SET_BSP:
5349 need_barrier = 1;
5350 break;
5352 case UNSPECV_BLOCKAGE:
5353 case UNSPECV_INSN_GROUP_BARRIER:
5354 case UNSPECV_BREAK:
5355 case UNSPECV_PSAC_ALL:
5356 case UNSPECV_PSAC_NORMAL:
5357 return 0;
5359 default:
5360 abort ();
5362 break;
5364 case RETURN:
5365 new_flags.is_write = 0;
5366 need_barrier = rws_access_regno (REG_RP, flags, pred);
5367 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5369 new_flags.is_write = 1;
5370 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5371 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5372 break;
5374 default:
5375 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5376 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5377 switch (format_ptr[i])
5379 case '0': /* unused field */
5380 case 'i': /* integer */
5381 case 'n': /* note */
5382 case 'w': /* wide integer */
5383 case 's': /* pointer to string */
5384 case 'S': /* optional pointer to string */
5385 break;
5387 case 'e':
5388 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5389 need_barrier = 1;
5390 break;
5392 case 'E':
5393 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5394 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5395 need_barrier = 1;
5396 break;
5398 default:
5399 abort ();
5401 break;
5403 return need_barrier;
5406 /* Clear out the state for group_barrier_needed_p at the start of a
5407 sequence of insns. */
5409 static void
5410 init_insn_group_barriers (void)
5412 memset (rws_sum, 0, sizeof (rws_sum));
5413 first_instruction = 1;
5416 /* Given the current state, recorded by previous calls to this function,
5417 determine whether a group barrier (a stop bit) is necessary before INSN.
5418 Return nonzero if so. */
5420 static int
5421 group_barrier_needed_p (rtx insn)
5423 rtx pat;
5424 int need_barrier = 0;
5425 struct reg_flags flags;
5427 memset (&flags, 0, sizeof (flags));
5428 switch (GET_CODE (insn))
5430 case NOTE:
5431 break;
5433 case BARRIER:
5434 /* A barrier doesn't imply an instruction group boundary. */
5435 break;
5437 case CODE_LABEL:
5438 memset (rws_insn, 0, sizeof (rws_insn));
5439 return 1;
5441 case CALL_INSN:
5442 flags.is_branch = 1;
5443 flags.is_sibcall = SIBLING_CALL_P (insn);
5444 memset (rws_insn, 0, sizeof (rws_insn));
5446 /* Don't bundle a call following another call. */
5447 if ((pat = prev_active_insn (insn))
5448 && GET_CODE (pat) == CALL_INSN)
5450 need_barrier = 1;
5451 break;
5454 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5455 break;
5457 case JUMP_INSN:
5458 flags.is_branch = 1;
5460 /* Don't bundle a jump following a call. */
5461 if ((pat = prev_active_insn (insn))
5462 && GET_CODE (pat) == CALL_INSN)
5464 need_barrier = 1;
5465 break;
5467 /* FALLTHRU */
5469 case INSN:
5470 if (GET_CODE (PATTERN (insn)) == USE
5471 || GET_CODE (PATTERN (insn)) == CLOBBER)
5472 /* Don't care about USE and CLOBBER "insns"---those are used to
5473 indicate to the optimizer that it shouldn't get rid of
5474 certain operations. */
5475 break;
5477 pat = PATTERN (insn);
5479 /* Ug. Hack hacks hacked elsewhere. */
5480 switch (recog_memoized (insn))
5482 /* We play dependency tricks with the epilogue in order
5483 to get proper schedules. Undo this for dv analysis. */
5484 case CODE_FOR_epilogue_deallocate_stack:
5485 case CODE_FOR_prologue_allocate_stack:
5486 pat = XVECEXP (pat, 0, 0);
5487 break;
5489 /* The pattern we use for br.cloop confuses the code above.
5490 The second element of the vector is representative. */
5491 case CODE_FOR_doloop_end_internal:
5492 pat = XVECEXP (pat, 0, 1);
5493 break;
5495 /* Doesn't generate code. */
5496 case CODE_FOR_pred_rel_mutex:
5497 case CODE_FOR_prologue_use:
5498 return 0;
5500 default:
5501 break;
5504 memset (rws_insn, 0, sizeof (rws_insn));
5505 need_barrier = rtx_needs_barrier (pat, flags, 0);
5507 /* Check to see if the previous instruction was a volatile
5508 asm. */
5509 if (! need_barrier)
5510 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5511 break;
5513 default:
5514 abort ();
5517 if (first_instruction && INSN_P (insn)
5518 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5519 && GET_CODE (PATTERN (insn)) != USE
5520 && GET_CODE (PATTERN (insn)) != CLOBBER)
5522 need_barrier = 0;
5523 first_instruction = 0;
5526 return need_barrier;
5529 /* Like group_barrier_needed_p, but do not clobber the current state. */
5531 static int
5532 safe_group_barrier_needed_p (rtx insn)
5534 struct reg_write_state rws_saved[NUM_REGS];
5535 int saved_first_instruction;
5536 int t;
5538 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5539 saved_first_instruction = first_instruction;
5541 t = group_barrier_needed_p (insn);
5543 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5544 first_instruction = saved_first_instruction;
5546 return t;
5549 /* Scan the current function and insert stop bits as necessary to
5550 eliminate dependencies. This function assumes that a final
5551 instruction scheduling pass has been run which has already
5552 inserted most of the necessary stop bits. This function only
5553 inserts new ones at basic block boundaries, since these are
5554 invisible to the scheduler. */
5556 static void
5557 emit_insn_group_barriers (FILE *dump)
5559 rtx insn;
5560 rtx last_label = 0;
5561 int insns_since_last_label = 0;
5563 init_insn_group_barriers ();
5565 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5567 if (GET_CODE (insn) == CODE_LABEL)
5569 if (insns_since_last_label)
5570 last_label = insn;
5571 insns_since_last_label = 0;
5573 else if (GET_CODE (insn) == NOTE
5574 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5576 if (insns_since_last_label)
5577 last_label = insn;
5578 insns_since_last_label = 0;
5580 else if (GET_CODE (insn) == INSN
5581 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5582 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5584 init_insn_group_barriers ();
5585 last_label = 0;
5587 else if (INSN_P (insn))
5589 insns_since_last_label = 1;
5591 if (group_barrier_needed_p (insn))
5593 if (last_label)
5595 if (dump)
5596 fprintf (dump, "Emitting stop before label %d\n",
5597 INSN_UID (last_label));
5598 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5599 insn = last_label;
5601 init_insn_group_barriers ();
5602 last_label = 0;
5609 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5610 This function has to emit all necessary group barriers. */
5612 static void
5613 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
5615 rtx insn;
5617 init_insn_group_barriers ();
5619 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5621 if (GET_CODE (insn) == BARRIER)
5623 rtx last = prev_active_insn (insn);
5625 if (! last)
5626 continue;
5627 if (GET_CODE (last) == JUMP_INSN
5628 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5629 last = prev_active_insn (last);
5630 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5631 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5633 init_insn_group_barriers ();
5635 else if (INSN_P (insn))
5637 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5638 init_insn_group_barriers ();
5639 else if (group_barrier_needed_p (insn))
5641 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5642 init_insn_group_barriers ();
5643 group_barrier_needed_p (insn);
5651 /* Instruction scheduling support. */
5653 #define NR_BUNDLES 10
5655 /* A list of names of all available bundles. */
5657 static const char *bundle_name [NR_BUNDLES] =
5659 ".mii",
5660 ".mmi",
5661 ".mfi",
5662 ".mmf",
5663 #if NR_BUNDLES == 10
5664 ".bbb",
5665 ".mbb",
5666 #endif
5667 ".mib",
5668 ".mmb",
5669 ".mfb",
5670 ".mlx"
5673 /* Nonzero if we should insert stop bits into the schedule. */
5675 int ia64_final_schedule = 0;
5677 /* Codes of the corresponding queried units: */
5679 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
5680 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
5682 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
5683 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
5685 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
5687 /* The following variable value is an insn group barrier. */
5689 static rtx dfa_stop_insn;
5691 /* The following variable value is the last issued insn. */
5693 static rtx last_scheduled_insn;
5695 /* The following variable value is size of the DFA state. */
5697 static size_t dfa_state_size;
5699 /* The following variable value is pointer to a DFA state used as
5700 temporary variable. */
5702 static state_t temp_dfa_state = NULL;
5704 /* The following variable value is DFA state after issuing the last
5705 insn. */
5707 static state_t prev_cycle_state = NULL;
5709 /* The following array element values are TRUE if the corresponding
5710 insn requires to add stop bits before it. */
5712 static char *stops_p;
5714 /* The following variable is used to set up the mentioned above array. */
5716 static int stop_before_p = 0;
5718 /* The following variable value is length of the arrays `clocks' and
5719 `add_cycles'. */
5721 static int clocks_length;
5723 /* The following array element values are cycles on which the
5724 corresponding insn will be issued. The array is used only for
5725 Itanium1. */
5727 static int *clocks;
5729 /* The following array element values are numbers of cycles should be
5730 added to improve insn scheduling for MM_insns for Itanium1. */
5732 static int *add_cycles;
5734 static rtx ia64_single_set (rtx);
5735 static void ia64_emit_insn_before (rtx, rtx);
5737 /* Map a bundle number to its pseudo-op. */
5739 const char *
5740 get_bundle_name (int b)
5742 return bundle_name[b];
5746 /* Return the maximum number of instructions a cpu can issue. */
5748 static int
5749 ia64_issue_rate (void)
5751 return 6;
5754 /* Helper function - like single_set, but look inside COND_EXEC. */
5756 static rtx
5757 ia64_single_set (rtx insn)
5759 rtx x = PATTERN (insn), ret;
5760 if (GET_CODE (x) == COND_EXEC)
5761 x = COND_EXEC_CODE (x);
5762 if (GET_CODE (x) == SET)
5763 return x;
5765 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5766 Although they are not classical single set, the second set is there just
5767 to protect it from moving past FP-relative stack accesses. */
5768 switch (recog_memoized (insn))
5770 case CODE_FOR_prologue_allocate_stack:
5771 case CODE_FOR_epilogue_deallocate_stack:
5772 ret = XVECEXP (x, 0, 0);
5773 break;
5775 default:
5776 ret = single_set_2 (insn, x);
5777 break;
5780 return ret;
5783 /* Adjust the cost of a scheduling dependency. Return the new cost of
5784 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5786 static int
5787 ia64_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
5789 enum attr_itanium_class dep_class;
5790 enum attr_itanium_class insn_class;
5792 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
5793 return cost;
5795 insn_class = ia64_safe_itanium_class (insn);
5796 dep_class = ia64_safe_itanium_class (dep_insn);
5797 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
5798 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
5799 return 0;
5801 return cost;
5804 /* Like emit_insn_before, but skip cycle_display notes.
5805 ??? When cycle display notes are implemented, update this. */
5807 static void
5808 ia64_emit_insn_before (rtx insn, rtx before)
5810 emit_insn_before (insn, before);
5813 /* The following function marks insns who produce addresses for load
5814 and store insns. Such insns will be placed into M slots because it
5815 decrease latency time for Itanium1 (see function
5816 `ia64_produce_address_p' and the DFA descriptions). */
5818 static void
5819 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
5821 rtx insn, link, next, next_tail;
5823 next_tail = NEXT_INSN (tail);
5824 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5825 if (INSN_P (insn))
5826 insn->call = 0;
5827 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5828 if (INSN_P (insn)
5829 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
5831 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
5833 next = XEXP (link, 0);
5834 if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
5835 || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
5836 && ia64_st_address_bypass_p (insn, next))
5837 break;
5838 else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
5839 || ia64_safe_itanium_class (next)
5840 == ITANIUM_CLASS_FLD)
5841 && ia64_ld_address_bypass_p (insn, next))
5842 break;
5844 insn->call = link != 0;
5848 /* We're beginning a new block. Initialize data structures as necessary. */
5850 static void
5851 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
5852 int sched_verbose ATTRIBUTE_UNUSED,
5853 int max_ready ATTRIBUTE_UNUSED)
5855 #ifdef ENABLE_CHECKING
5856 rtx insn;
5858 if (reload_completed)
5859 for (insn = NEXT_INSN (current_sched_info->prev_head);
5860 insn != current_sched_info->next_tail;
5861 insn = NEXT_INSN (insn))
5862 if (SCHED_GROUP_P (insn))
5863 abort ();
5864 #endif
5865 last_scheduled_insn = NULL_RTX;
5866 init_insn_group_barriers ();
5869 /* We are about to being issuing insns for this clock cycle.
5870 Override the default sort algorithm to better slot instructions. */
5872 static int
5873 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
5874 int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
5875 int reorder_type)
5877 int n_asms;
5878 int n_ready = *pn_ready;
5879 rtx *e_ready = ready + n_ready;
5880 rtx *insnp;
5882 if (sched_verbose)
5883 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
5885 if (reorder_type == 0)
5887 /* First, move all USEs, CLOBBERs and other crud out of the way. */
5888 n_asms = 0;
5889 for (insnp = ready; insnp < e_ready; insnp++)
5890 if (insnp < e_ready)
5892 rtx insn = *insnp;
5893 enum attr_type t = ia64_safe_type (insn);
5894 if (t == TYPE_UNKNOWN)
5896 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
5897 || asm_noperands (PATTERN (insn)) >= 0)
5899 rtx lowest = ready[n_asms];
5900 ready[n_asms] = insn;
5901 *insnp = lowest;
5902 n_asms++;
5904 else
5906 rtx highest = ready[n_ready - 1];
5907 ready[n_ready - 1] = insn;
5908 *insnp = highest;
5909 return 1;
5914 if (n_asms < n_ready)
5916 /* Some normal insns to process. Skip the asms. */
5917 ready += n_asms;
5918 n_ready -= n_asms;
5920 else if (n_ready > 0)
5921 return 1;
5924 if (ia64_final_schedule)
5926 int deleted = 0;
5927 int nr_need_stop = 0;
5929 for (insnp = ready; insnp < e_ready; insnp++)
5930 if (safe_group_barrier_needed_p (*insnp))
5931 nr_need_stop++;
5933 if (reorder_type == 1 && n_ready == nr_need_stop)
5934 return 0;
5935 if (reorder_type == 0)
5936 return 1;
5937 insnp = e_ready;
5938 /* Move down everything that needs a stop bit, preserving
5939 relative order. */
5940 while (insnp-- > ready + deleted)
5941 while (insnp >= ready + deleted)
5943 rtx insn = *insnp;
5944 if (! safe_group_barrier_needed_p (insn))
5945 break;
5946 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
5947 *ready = insn;
5948 deleted++;
5950 n_ready -= deleted;
5951 ready += deleted;
5954 return 1;
5957 /* We are about to being issuing insns for this clock cycle. Override
5958 the default sort algorithm to better slot instructions. */
5960 static int
5961 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
5962 int clock_var)
5964 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
5965 pn_ready, clock_var, 0);
5968 /* Like ia64_sched_reorder, but called after issuing each insn.
5969 Override the default sort algorithm to better slot instructions. */
5971 static int
5972 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
5973 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
5974 int *pn_ready, int clock_var)
5976 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
5977 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
5978 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
5979 clock_var, 1);
5982 /* We are about to issue INSN. Return the number of insns left on the
5983 ready queue that can be issued this cycle. */
5985 static int
5986 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
5987 int sched_verbose ATTRIBUTE_UNUSED,
5988 rtx insn ATTRIBUTE_UNUSED,
5989 int can_issue_more ATTRIBUTE_UNUSED)
5991 last_scheduled_insn = insn;
5992 memcpy (prev_cycle_state, curr_state, dfa_state_size);
5993 if (reload_completed)
5995 if (group_barrier_needed_p (insn))
5996 abort ();
5997 if (GET_CODE (insn) == CALL_INSN)
5998 init_insn_group_barriers ();
5999 stops_p [INSN_UID (insn)] = stop_before_p;
6000 stop_before_p = 0;
6002 return 1;
6005 /* We are choosing insn from the ready queue. Return nonzero if INSN
6006 can be chosen. */
6008 static int
6009 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
6011 if (insn == NULL_RTX || !INSN_P (insn))
6012 abort ();
6013 return (!reload_completed
6014 || !safe_group_barrier_needed_p (insn));
6017 /* The following variable value is pseudo-insn used by the DFA insn
6018 scheduler to change the DFA state when the simulated clock is
6019 increased. */
6021 static rtx dfa_pre_cycle_insn;
6023 /* We are about to being issuing INSN. Return nonzero if we cannot
6024 issue it on given cycle CLOCK and return zero if we should not sort
6025 the ready queue on the next clock start. */
6027 static int
6028 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
6029 int clock, int *sort_p)
6031 int setup_clocks_p = FALSE;
6033 if (insn == NULL_RTX || !INSN_P (insn))
6034 abort ();
6035 if ((reload_completed && safe_group_barrier_needed_p (insn))
6036 || (last_scheduled_insn
6037 && (GET_CODE (last_scheduled_insn) == CALL_INSN
6038 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6039 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6041 init_insn_group_barriers ();
6042 if (verbose && dump)
6043 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
6044 last_clock == clock ? " + cycle advance" : "");
6045 stop_before_p = 1;
6046 if (last_clock == clock)
6048 state_transition (curr_state, dfa_stop_insn);
6049 if (TARGET_EARLY_STOP_BITS)
6050 *sort_p = (last_scheduled_insn == NULL_RTX
6051 || GET_CODE (last_scheduled_insn) != CALL_INSN);
6052 else
6053 *sort_p = 0;
6054 return 1;
6056 else if (reload_completed)
6057 setup_clocks_p = TRUE;
6058 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6059 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
6060 state_reset (curr_state);
6061 else
6063 memcpy (curr_state, prev_cycle_state, dfa_state_size);
6064 state_transition (curr_state, dfa_stop_insn);
6065 state_transition (curr_state, dfa_pre_cycle_insn);
6066 state_transition (curr_state, NULL);
6069 else if (reload_completed)
6070 setup_clocks_p = TRUE;
6071 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
6072 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6073 && asm_noperands (PATTERN (insn)) < 0)
6075 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6077 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6079 rtx link;
6080 int d = -1;
6082 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6083 if (REG_NOTE_KIND (link) == 0)
6085 enum attr_itanium_class dep_class;
6086 rtx dep_insn = XEXP (link, 0);
6088 dep_class = ia64_safe_itanium_class (dep_insn);
6089 if ((dep_class == ITANIUM_CLASS_MMMUL
6090 || dep_class == ITANIUM_CLASS_MMSHF)
6091 && last_clock - clocks [INSN_UID (dep_insn)] < 4
6092 && (d < 0
6093 || last_clock - clocks [INSN_UID (dep_insn)] < d))
6094 d = last_clock - clocks [INSN_UID (dep_insn)];
6096 if (d >= 0)
6097 add_cycles [INSN_UID (insn)] = 3 - d;
6100 return 0;
6105 /* The following page contains abstract data `bundle states' which are
6106 used for bundling insns (inserting nops and template generation). */
6108 /* The following describes state of insn bundling. */
6110 struct bundle_state
6112 /* Unique bundle state number to identify them in the debugging
6113 output */
6114 int unique_num;
6115 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
6116 /* number nops before and after the insn */
6117 short before_nops_num, after_nops_num;
6118 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
6119 insn */
6120 int cost; /* cost of the state in cycles */
6121 int accumulated_insns_num; /* number of all previous insns including
6122 nops. L is considered as 2 insns */
6123 int branch_deviation; /* deviation of previous branches from 3rd slots */
6124 struct bundle_state *next; /* next state with the same insn_num */
6125 struct bundle_state *originator; /* originator (previous insn state) */
6126 /* All bundle states are in the following chain. */
6127 struct bundle_state *allocated_states_chain;
6128 /* The DFA State after issuing the insn and the nops. */
6129 state_t dfa_state;
6132 /* The following is map insn number to the corresponding bundle state. */
6134 static struct bundle_state **index_to_bundle_states;
6136 /* The unique number of next bundle state. */
6138 static int bundle_states_num;
6140 /* All allocated bundle states are in the following chain. */
6142 static struct bundle_state *allocated_bundle_states_chain;
6144 /* All allocated but not used bundle states are in the following
6145 chain. */
6147 static struct bundle_state *free_bundle_state_chain;
6150 /* The following function returns a free bundle state. */
6152 static struct bundle_state *
6153 get_free_bundle_state (void)
6155 struct bundle_state *result;
6157 if (free_bundle_state_chain != NULL)
6159 result = free_bundle_state_chain;
6160 free_bundle_state_chain = result->next;
6162 else
6164 result = xmalloc (sizeof (struct bundle_state));
6165 result->dfa_state = xmalloc (dfa_state_size);
6166 result->allocated_states_chain = allocated_bundle_states_chain;
6167 allocated_bundle_states_chain = result;
6169 result->unique_num = bundle_states_num++;
6170 return result;
6174 /* The following function frees given bundle state. */
6176 static void
6177 free_bundle_state (struct bundle_state *state)
6179 state->next = free_bundle_state_chain;
6180 free_bundle_state_chain = state;
6183 /* Start work with abstract data `bundle states'. */
6185 static void
6186 initiate_bundle_states (void)
6188 bundle_states_num = 0;
6189 free_bundle_state_chain = NULL;
6190 allocated_bundle_states_chain = NULL;
6193 /* Finish work with abstract data `bundle states'. */
6195 static void
6196 finish_bundle_states (void)
6198 struct bundle_state *curr_state, *next_state;
6200 for (curr_state = allocated_bundle_states_chain;
6201 curr_state != NULL;
6202 curr_state = next_state)
6204 next_state = curr_state->allocated_states_chain;
6205 free (curr_state->dfa_state);
6206 free (curr_state);
6210 /* Hash table of the bundle states. The key is dfa_state and insn_num
6211 of the bundle states. */
6213 static htab_t bundle_state_table;
6215 /* The function returns hash of BUNDLE_STATE. */
6217 static unsigned
6218 bundle_state_hash (const void *bundle_state)
6220 const struct bundle_state *state = (struct bundle_state *) bundle_state;
6221 unsigned result, i;
6223 for (result = i = 0; i < dfa_state_size; i++)
6224 result += (((unsigned char *) state->dfa_state) [i]
6225 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
6226 return result + state->insn_num;
6229 /* The function returns nonzero if the bundle state keys are equal. */
6231 static int
6232 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
6234 const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
6235 const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
6237 return (state1->insn_num == state2->insn_num
6238 && memcmp (state1->dfa_state, state2->dfa_state,
6239 dfa_state_size) == 0);
6242 /* The function inserts the BUNDLE_STATE into the hash table. The
6243 function returns nonzero if the bundle has been inserted into the
6244 table. The table contains the best bundle state with given key. */
6246 static int
6247 insert_bundle_state (struct bundle_state *bundle_state)
6249 void **entry_ptr;
6251 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
6252 if (*entry_ptr == NULL)
6254 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
6255 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
6256 *entry_ptr = (void *) bundle_state;
6257 return TRUE;
6259 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
6260 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
6261 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
6262 > bundle_state->accumulated_insns_num
6263 || (((struct bundle_state *)
6264 *entry_ptr)->accumulated_insns_num
6265 == bundle_state->accumulated_insns_num
6266 && ((struct bundle_state *)
6267 *entry_ptr)->branch_deviation
6268 > bundle_state->branch_deviation))))
6271 struct bundle_state temp;
6273 temp = *(struct bundle_state *) *entry_ptr;
6274 *(struct bundle_state *) *entry_ptr = *bundle_state;
6275 ((struct bundle_state *) *entry_ptr)->next = temp.next;
6276 *bundle_state = temp;
6278 return FALSE;
6281 /* Start work with the hash table. */
6283 static void
6284 initiate_bundle_state_table (void)
6286 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
6287 (htab_del) 0);
6290 /* Finish work with the hash table. */
6292 static void
6293 finish_bundle_state_table (void)
6295 htab_delete (bundle_state_table);
6300 /* The following variable is a insn `nop' used to check bundle states
6301 with different number of inserted nops. */
6303 static rtx ia64_nop;
6305 /* The following function tries to issue NOPS_NUM nops for the current
6306 state without advancing processor cycle. If it failed, the
6307 function returns FALSE and frees the current state. */
6309 static int
6310 try_issue_nops (struct bundle_state *curr_state, int nops_num)
6312 int i;
6314 for (i = 0; i < nops_num; i++)
6315 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6317 free_bundle_state (curr_state);
6318 return FALSE;
6320 return TRUE;
6323 /* The following function tries to issue INSN for the current
6324 state without advancing processor cycle. If it failed, the
6325 function returns FALSE and frees the current state. */
6327 static int
6328 try_issue_insn (struct bundle_state *curr_state, rtx insn)
6330 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6332 free_bundle_state (curr_state);
6333 return FALSE;
6335 return TRUE;
6338 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6339 starting with ORIGINATOR without advancing processor cycle. If
6340 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6341 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6342 If it was successful, the function creates new bundle state and
6343 insert into the hash table and into `index_to_bundle_states'. */
6345 static void
6346 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
6347 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
6349 struct bundle_state *curr_state;
6351 curr_state = get_free_bundle_state ();
6352 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6353 curr_state->insn = insn;
6354 curr_state->insn_num = originator->insn_num + 1;
6355 curr_state->cost = originator->cost;
6356 curr_state->originator = originator;
6357 curr_state->before_nops_num = before_nops_num;
6358 curr_state->after_nops_num = 0;
6359 curr_state->accumulated_insns_num
6360 = originator->accumulated_insns_num + before_nops_num;
6361 curr_state->branch_deviation = originator->branch_deviation;
6362 if (insn == NULL_RTX)
6363 abort ();
6364 else if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6366 if (GET_MODE (insn) == TImode)
6367 abort ();
6368 if (!try_issue_nops (curr_state, before_nops_num))
6369 return;
6370 if (!try_issue_insn (curr_state, insn))
6371 return;
6372 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6373 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6374 && curr_state->accumulated_insns_num % 3 != 0)
6376 free_bundle_state (curr_state);
6377 return;
6380 else if (GET_MODE (insn) != TImode)
6382 if (!try_issue_nops (curr_state, before_nops_num))
6383 return;
6384 if (!try_issue_insn (curr_state, insn))
6385 return;
6386 curr_state->accumulated_insns_num++;
6387 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6388 || asm_noperands (PATTERN (insn)) >= 0)
6389 abort ();
6390 if (ia64_safe_type (insn) == TYPE_L)
6391 curr_state->accumulated_insns_num++;
6393 else
6395 /* If this is an insn that must be first in a group, then don't allow
6396 nops to be emitted before it. Currently, alloc is the only such
6397 supported instruction. */
6398 /* ??? The bundling automatons should handle this for us, but they do
6399 not yet have support for the first_insn attribute. */
6400 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
6402 free_bundle_state (curr_state);
6403 return;
6406 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6407 state_transition (curr_state->dfa_state, NULL);
6408 curr_state->cost++;
6409 if (!try_issue_nops (curr_state, before_nops_num))
6410 return;
6411 if (!try_issue_insn (curr_state, insn))
6412 return;
6413 curr_state->accumulated_insns_num++;
6414 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6415 || asm_noperands (PATTERN (insn)) >= 0)
6417 /* Finish bundle containing asm insn. */
6418 curr_state->after_nops_num
6419 = 3 - curr_state->accumulated_insns_num % 3;
6420 curr_state->accumulated_insns_num
6421 += 3 - curr_state->accumulated_insns_num % 3;
6423 else if (ia64_safe_type (insn) == TYPE_L)
6424 curr_state->accumulated_insns_num++;
6426 if (ia64_safe_type (insn) == TYPE_B)
6427 curr_state->branch_deviation
6428 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6429 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6431 if (!only_bundle_end_p && insert_bundle_state (curr_state))
6433 state_t dfa_state;
6434 struct bundle_state *curr_state1;
6435 struct bundle_state *allocated_states_chain;
6437 curr_state1 = get_free_bundle_state ();
6438 dfa_state = curr_state1->dfa_state;
6439 allocated_states_chain = curr_state1->allocated_states_chain;
6440 *curr_state1 = *curr_state;
6441 curr_state1->dfa_state = dfa_state;
6442 curr_state1->allocated_states_chain = allocated_states_chain;
6443 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6444 dfa_state_size);
6445 curr_state = curr_state1;
6447 if (!try_issue_nops (curr_state,
6448 3 - curr_state->accumulated_insns_num % 3))
6449 return;
6450 curr_state->after_nops_num
6451 = 3 - curr_state->accumulated_insns_num % 3;
6452 curr_state->accumulated_insns_num
6453 += 3 - curr_state->accumulated_insns_num % 3;
6455 if (!insert_bundle_state (curr_state))
6456 free_bundle_state (curr_state);
6457 return;
6460 /* The following function returns position in the two window bundle
6461 for given STATE. */
6463 static int
6464 get_max_pos (state_t state)
6466 if (cpu_unit_reservation_p (state, pos_6))
6467 return 6;
6468 else if (cpu_unit_reservation_p (state, pos_5))
6469 return 5;
6470 else if (cpu_unit_reservation_p (state, pos_4))
6471 return 4;
6472 else if (cpu_unit_reservation_p (state, pos_3))
6473 return 3;
6474 else if (cpu_unit_reservation_p (state, pos_2))
6475 return 2;
6476 else if (cpu_unit_reservation_p (state, pos_1))
6477 return 1;
6478 else
6479 return 0;
6482 /* The function returns code of a possible template for given position
6483 and state. The function should be called only with 2 values of
6484 position equal to 3 or 6. We avoid generating F NOPs by putting
6485 templates containing F insns at the end of the template search
6486 because undocumented anomaly in McKinley derived cores which can
6487 cause stalls if an F-unit insn (including a NOP) is issued within a
6488 six-cycle window after reading certain application registers (such
6489 as ar.bsp). Furthermore, power-considerations also argue against
6490 the use of F-unit instructions unless they're really needed. */
6492 static int
6493 get_template (state_t state, int pos)
6495 switch (pos)
6497 case 3:
6498 if (cpu_unit_reservation_p (state, _0mmi_))
6499 return 1;
6500 else if (cpu_unit_reservation_p (state, _0mii_))
6501 return 0;
6502 else if (cpu_unit_reservation_p (state, _0mmb_))
6503 return 7;
6504 else if (cpu_unit_reservation_p (state, _0mib_))
6505 return 6;
6506 else if (cpu_unit_reservation_p (state, _0mbb_))
6507 return 5;
6508 else if (cpu_unit_reservation_p (state, _0bbb_))
6509 return 4;
6510 else if (cpu_unit_reservation_p (state, _0mmf_))
6511 return 3;
6512 else if (cpu_unit_reservation_p (state, _0mfi_))
6513 return 2;
6514 else if (cpu_unit_reservation_p (state, _0mfb_))
6515 return 8;
6516 else if (cpu_unit_reservation_p (state, _0mlx_))
6517 return 9;
6518 else
6519 abort ();
6520 case 6:
6521 if (cpu_unit_reservation_p (state, _1mmi_))
6522 return 1;
6523 else if (cpu_unit_reservation_p (state, _1mii_))
6524 return 0;
6525 else if (cpu_unit_reservation_p (state, _1mmb_))
6526 return 7;
6527 else if (cpu_unit_reservation_p (state, _1mib_))
6528 return 6;
6529 else if (cpu_unit_reservation_p (state, _1mbb_))
6530 return 5;
6531 else if (cpu_unit_reservation_p (state, _1bbb_))
6532 return 4;
6533 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6534 return 3;
6535 else if (cpu_unit_reservation_p (state, _1mfi_))
6536 return 2;
6537 else if (cpu_unit_reservation_p (state, _1mfb_))
6538 return 8;
6539 else if (cpu_unit_reservation_p (state, _1mlx_))
6540 return 9;
6541 else
6542 abort ();
6543 default:
6544 abort ();
6548 /* The following function returns an insn important for insn bundling
6549 followed by INSN and before TAIL. */
6551 static rtx
6552 get_next_important_insn (rtx insn, rtx tail)
6554 for (; insn && insn != tail; insn = NEXT_INSN (insn))
6555 if (INSN_P (insn)
6556 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6557 && GET_CODE (PATTERN (insn)) != USE
6558 && GET_CODE (PATTERN (insn)) != CLOBBER)
6559 return insn;
6560 return NULL_RTX;
6563 /* The following function does insn bundling. Bundling means
6564 inserting templates and nop insns to fit insn groups into permitted
6565 templates. Instruction scheduling uses NDFA (non-deterministic
6566 finite automata) encoding informations about the templates and the
6567 inserted nops. Nondeterminism of the automata permits follows
6568 all possible insn sequences very fast.
6570 Unfortunately it is not possible to get information about inserting
6571 nop insns and used templates from the automata states. The
6572 automata only says that we can issue an insn possibly inserting
6573 some nops before it and using some template. Therefore insn
6574 bundling in this function is implemented by using DFA
6575 (deterministic finite automata). We follows all possible insn
6576 sequences by inserting 0-2 nops (that is what the NDFA describe for
6577 insn scheduling) before/after each insn being bundled. We know the
6578 start of simulated processor cycle from insn scheduling (insn
6579 starting a new cycle has TImode).
6581 Simple implementation of insn bundling would create enormous
6582 number of possible insn sequences satisfying information about new
6583 cycle ticks taken from the insn scheduling. To make the algorithm
6584 practical we use dynamic programming. Each decision (about
6585 inserting nops and implicitly about previous decisions) is described
6586 by structure bundle_state (see above). If we generate the same
6587 bundle state (key is automaton state after issuing the insns and
6588 nops for it), we reuse already generated one. As consequence we
6589 reject some decisions which cannot improve the solution and
6590 reduce memory for the algorithm.
6592 When we reach the end of EBB (extended basic block), we choose the
6593 best sequence and then, moving back in EBB, insert templates for
6594 the best alternative. The templates are taken from querying
6595 automaton state for each insn in chosen bundle states.
6597 So the algorithm makes two (forward and backward) passes through
6598 EBB. There is an additional forward pass through EBB for Itanium1
6599 processor. This pass inserts more nops to make dependency between
6600 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
6602 static void
6603 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
6605 struct bundle_state *curr_state, *next_state, *best_state;
6606 rtx insn, next_insn;
6607 int insn_num;
6608 int i, bundle_end_p, only_bundle_end_p, asm_p;
6609 int pos = 0, max_pos, template0, template1;
6610 rtx b;
6611 rtx nop;
6612 enum attr_type type;
6614 insn_num = 0;
6615 /* Count insns in the EBB. */
6616 for (insn = NEXT_INSN (prev_head_insn);
6617 insn && insn != tail;
6618 insn = NEXT_INSN (insn))
6619 if (INSN_P (insn))
6620 insn_num++;
6621 if (insn_num == 0)
6622 return;
6623 bundling_p = 1;
6624 dfa_clean_insn_cache ();
6625 initiate_bundle_state_table ();
6626 index_to_bundle_states = xmalloc ((insn_num + 2)
6627 * sizeof (struct bundle_state *));
6628 /* First (forward) pass -- generation of bundle states. */
6629 curr_state = get_free_bundle_state ();
6630 curr_state->insn = NULL;
6631 curr_state->before_nops_num = 0;
6632 curr_state->after_nops_num = 0;
6633 curr_state->insn_num = 0;
6634 curr_state->cost = 0;
6635 curr_state->accumulated_insns_num = 0;
6636 curr_state->branch_deviation = 0;
6637 curr_state->next = NULL;
6638 curr_state->originator = NULL;
6639 state_reset (curr_state->dfa_state);
6640 index_to_bundle_states [0] = curr_state;
6641 insn_num = 0;
6642 /* Shift cycle mark if it is put on insn which could be ignored. */
6643 for (insn = NEXT_INSN (prev_head_insn);
6644 insn != tail;
6645 insn = NEXT_INSN (insn))
6646 if (INSN_P (insn)
6647 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6648 || GET_CODE (PATTERN (insn)) == USE
6649 || GET_CODE (PATTERN (insn)) == CLOBBER)
6650 && GET_MODE (insn) == TImode)
6652 PUT_MODE (insn, VOIDmode);
6653 for (next_insn = NEXT_INSN (insn);
6654 next_insn != tail;
6655 next_insn = NEXT_INSN (next_insn))
6656 if (INSN_P (next_insn)
6657 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
6658 && GET_CODE (PATTERN (next_insn)) != USE
6659 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
6661 PUT_MODE (next_insn, TImode);
6662 break;
6665 /* Froward pass: generation of bundle states. */
6666 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6667 insn != NULL_RTX;
6668 insn = next_insn)
6670 if (!INSN_P (insn)
6671 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6672 || GET_CODE (PATTERN (insn)) == USE
6673 || GET_CODE (PATTERN (insn)) == CLOBBER)
6674 abort ();
6675 type = ia64_safe_type (insn);
6676 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6677 insn_num++;
6678 index_to_bundle_states [insn_num] = NULL;
6679 for (curr_state = index_to_bundle_states [insn_num - 1];
6680 curr_state != NULL;
6681 curr_state = next_state)
6683 pos = curr_state->accumulated_insns_num % 3;
6684 next_state = curr_state->next;
6685 /* We must fill up the current bundle in order to start a
6686 subsequent asm insn in a new bundle. Asm insn is always
6687 placed in a separate bundle. */
6688 only_bundle_end_p
6689 = (next_insn != NULL_RTX
6690 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
6691 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
6692 /* We may fill up the current bundle if it is the cycle end
6693 without a group barrier. */
6694 bundle_end_p
6695 = (only_bundle_end_p || next_insn == NULL_RTX
6696 || (GET_MODE (next_insn) == TImode
6697 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
6698 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
6699 || type == TYPE_S
6700 /* We need to insert 2 nops for cases like M_MII. To
6701 guarantee issuing all insns on the same cycle for
6702 Itanium 1, we need to issue 2 nops after the first M
6703 insn (MnnMII where n is a nop insn). */
6704 || ((type == TYPE_M || type == TYPE_A)
6705 && ia64_tune == PROCESSOR_ITANIUM
6706 && !bundle_end_p && pos == 1))
6707 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
6708 only_bundle_end_p);
6709 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
6710 only_bundle_end_p);
6711 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
6712 only_bundle_end_p);
6714 if (index_to_bundle_states [insn_num] == NULL)
6715 abort ();
6716 for (curr_state = index_to_bundle_states [insn_num];
6717 curr_state != NULL;
6718 curr_state = curr_state->next)
6719 if (verbose >= 2 && dump)
6721 /* This structure is taken from generated code of the
6722 pipeline hazard recognizer (see file insn-attrtab.c).
6723 Please don't forget to change the structure if a new
6724 automaton is added to .md file. */
6725 struct DFA_chip
6727 unsigned short one_automaton_state;
6728 unsigned short oneb_automaton_state;
6729 unsigned short two_automaton_state;
6730 unsigned short twob_automaton_state;
6733 fprintf
6734 (dump,
6735 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6736 curr_state->unique_num,
6737 (curr_state->originator == NULL
6738 ? -1 : curr_state->originator->unique_num),
6739 curr_state->cost,
6740 curr_state->before_nops_num, curr_state->after_nops_num,
6741 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6742 (ia64_tune == PROCESSOR_ITANIUM
6743 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6744 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6745 INSN_UID (insn));
6748 if (index_to_bundle_states [insn_num] == NULL)
6749 /* We should find a solution because the 2nd insn scheduling has
6750 found one. */
6751 abort ();
6752 /* Find a state corresponding to the best insn sequence. */
6753 best_state = NULL;
6754 for (curr_state = index_to_bundle_states [insn_num];
6755 curr_state != NULL;
6756 curr_state = curr_state->next)
6757 /* We are just looking at the states with fully filled up last
6758 bundle. The first we prefer insn sequences with minimal cost
6759 then with minimal inserted nops and finally with branch insns
6760 placed in the 3rd slots. */
6761 if (curr_state->accumulated_insns_num % 3 == 0
6762 && (best_state == NULL || best_state->cost > curr_state->cost
6763 || (best_state->cost == curr_state->cost
6764 && (curr_state->accumulated_insns_num
6765 < best_state->accumulated_insns_num
6766 || (curr_state->accumulated_insns_num
6767 == best_state->accumulated_insns_num
6768 && curr_state->branch_deviation
6769 < best_state->branch_deviation)))))
6770 best_state = curr_state;
6771 /* Second (backward) pass: adding nops and templates. */
6772 insn_num = best_state->before_nops_num;
6773 template0 = template1 = -1;
6774 for (curr_state = best_state;
6775 curr_state->originator != NULL;
6776 curr_state = curr_state->originator)
6778 insn = curr_state->insn;
6779 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
6780 || asm_noperands (PATTERN (insn)) >= 0);
6781 insn_num++;
6782 if (verbose >= 2 && dump)
6784 struct DFA_chip
6786 unsigned short one_automaton_state;
6787 unsigned short oneb_automaton_state;
6788 unsigned short two_automaton_state;
6789 unsigned short twob_automaton_state;
6792 fprintf
6793 (dump,
6794 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6795 curr_state->unique_num,
6796 (curr_state->originator == NULL
6797 ? -1 : curr_state->originator->unique_num),
6798 curr_state->cost,
6799 curr_state->before_nops_num, curr_state->after_nops_num,
6800 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6801 (ia64_tune == PROCESSOR_ITANIUM
6802 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6803 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6804 INSN_UID (insn));
6806 /* Find the position in the current bundle window. The window can
6807 contain at most two bundles. Two bundle window means that
6808 the processor will make two bundle rotation. */
6809 max_pos = get_max_pos (curr_state->dfa_state);
6810 if (max_pos == 6
6811 /* The following (negative template number) means that the
6812 processor did one bundle rotation. */
6813 || (max_pos == 3 && template0 < 0))
6815 /* We are at the end of the window -- find template(s) for
6816 its bundle(s). */
6817 pos = max_pos;
6818 if (max_pos == 3)
6819 template0 = get_template (curr_state->dfa_state, 3);
6820 else
6822 template1 = get_template (curr_state->dfa_state, 3);
6823 template0 = get_template (curr_state->dfa_state, 6);
6826 if (max_pos > 3 && template1 < 0)
6827 /* It may happen when we have the stop inside a bundle. */
6829 if (pos > 3)
6830 abort ();
6831 template1 = get_template (curr_state->dfa_state, 3);
6832 pos += 3;
6834 if (!asm_p)
6835 /* Emit nops after the current insn. */
6836 for (i = 0; i < curr_state->after_nops_num; i++)
6838 nop = gen_nop ();
6839 emit_insn_after (nop, insn);
6840 pos--;
6841 if (pos < 0)
6842 abort ();
6843 if (pos % 3 == 0)
6845 /* We are at the start of a bundle: emit the template
6846 (it should be defined). */
6847 if (template0 < 0)
6848 abort ();
6849 b = gen_bundle_selector (GEN_INT (template0));
6850 ia64_emit_insn_before (b, nop);
6851 /* If we have two bundle window, we make one bundle
6852 rotation. Otherwise template0 will be undefined
6853 (negative value). */
6854 template0 = template1;
6855 template1 = -1;
6858 /* Move the position backward in the window. Group barrier has
6859 no slot. Asm insn takes all bundle. */
6860 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
6861 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6862 && asm_noperands (PATTERN (insn)) < 0)
6863 pos--;
6864 /* Long insn takes 2 slots. */
6865 if (ia64_safe_type (insn) == TYPE_L)
6866 pos--;
6867 if (pos < 0)
6868 abort ();
6869 if (pos % 3 == 0
6870 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
6871 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6872 && asm_noperands (PATTERN (insn)) < 0)
6874 /* The current insn is at the bundle start: emit the
6875 template. */
6876 if (template0 < 0)
6877 abort ();
6878 b = gen_bundle_selector (GEN_INT (template0));
6879 ia64_emit_insn_before (b, insn);
6880 b = PREV_INSN (insn);
6881 insn = b;
6882 /* See comment above in analogous place for emitting nops
6883 after the insn. */
6884 template0 = template1;
6885 template1 = -1;
6887 /* Emit nops after the current insn. */
6888 for (i = 0; i < curr_state->before_nops_num; i++)
6890 nop = gen_nop ();
6891 ia64_emit_insn_before (nop, insn);
6892 nop = PREV_INSN (insn);
6893 insn = nop;
6894 pos--;
6895 if (pos < 0)
6896 abort ();
6897 if (pos % 3 == 0)
6899 /* See comment above in analogous place for emitting nops
6900 after the insn. */
6901 if (template0 < 0)
6902 abort ();
6903 b = gen_bundle_selector (GEN_INT (template0));
6904 ia64_emit_insn_before (b, insn);
6905 b = PREV_INSN (insn);
6906 insn = b;
6907 template0 = template1;
6908 template1 = -1;
6912 if (ia64_tune == PROCESSOR_ITANIUM)
6913 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
6914 Itanium1 has a strange design, if the distance between an insn
6915 and dependent MM-insn is less 4 then we have a 6 additional
6916 cycles stall. So we make the distance equal to 4 cycles if it
6917 is less. */
6918 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6919 insn != NULL_RTX;
6920 insn = next_insn)
6922 if (!INSN_P (insn)
6923 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6924 || GET_CODE (PATTERN (insn)) == USE
6925 || GET_CODE (PATTERN (insn)) == CLOBBER)
6926 abort ();
6927 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6928 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
6929 /* We found a MM-insn which needs additional cycles. */
6931 rtx last;
6932 int i, j, n;
6933 int pred_stop_p;
6935 /* Now we are searching for a template of the bundle in
6936 which the MM-insn is placed and the position of the
6937 insn in the bundle (0, 1, 2). Also we are searching
6938 for that there is a stop before the insn. */
6939 last = prev_active_insn (insn);
6940 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
6941 if (pred_stop_p)
6942 last = prev_active_insn (last);
6943 n = 0;
6944 for (;; last = prev_active_insn (last))
6945 if (recog_memoized (last) == CODE_FOR_bundle_selector)
6947 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
6948 if (template0 == 9)
6949 /* The insn is in MLX bundle. Change the template
6950 onto MFI because we will add nops before the
6951 insn. It simplifies subsequent code a lot. */
6952 PATTERN (last)
6953 = gen_bundle_selector (const2_rtx); /* -> MFI */
6954 break;
6956 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
6957 && (ia64_safe_itanium_class (last)
6958 != ITANIUM_CLASS_IGNORE))
6959 n++;
6960 /* Some check of correctness: the stop is not at the
6961 bundle start, there are no more 3 insns in the bundle,
6962 and the MM-insn is not at the start of bundle with
6963 template MLX. */
6964 if ((pred_stop_p && n == 0) || n > 2
6965 || (template0 == 9 && n != 0))
6966 abort ();
6967 /* Put nops after the insn in the bundle. */
6968 for (j = 3 - n; j > 0; j --)
6969 ia64_emit_insn_before (gen_nop (), insn);
6970 /* It takes into account that we will add more N nops
6971 before the insn lately -- please see code below. */
6972 add_cycles [INSN_UID (insn)]--;
6973 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
6974 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6975 insn);
6976 if (pred_stop_p)
6977 add_cycles [INSN_UID (insn)]--;
6978 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
6980 /* Insert "MII;" template. */
6981 ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
6982 insn);
6983 ia64_emit_insn_before (gen_nop (), insn);
6984 ia64_emit_insn_before (gen_nop (), insn);
6985 if (i > 1)
6987 /* To decrease code size, we use "MI;I;"
6988 template. */
6989 ia64_emit_insn_before
6990 (gen_insn_group_barrier (GEN_INT (3)), insn);
6991 i--;
6993 ia64_emit_insn_before (gen_nop (), insn);
6994 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6995 insn);
6997 /* Put the MM-insn in the same slot of a bundle with the
6998 same template as the original one. */
6999 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
7000 insn);
7001 /* To put the insn in the same slot, add necessary number
7002 of nops. */
7003 for (j = n; j > 0; j --)
7004 ia64_emit_insn_before (gen_nop (), insn);
7005 /* Put the stop if the original bundle had it. */
7006 if (pred_stop_p)
7007 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7008 insn);
7011 free (index_to_bundle_states);
7012 finish_bundle_state_table ();
7013 bundling_p = 0;
7014 dfa_clean_insn_cache ();
7017 /* The following function is called at the end of scheduling BB or
7018 EBB. After reload, it inserts stop bits and does insn bundling. */
7020 static void
7021 ia64_sched_finish (FILE *dump, int sched_verbose)
7023 if (sched_verbose)
7024 fprintf (dump, "// Finishing schedule.\n");
7025 if (!reload_completed)
7026 return;
7027 if (reload_completed)
7029 final_emit_insn_group_barriers (dump);
7030 bundling (dump, sched_verbose, current_sched_info->prev_head,
7031 current_sched_info->next_tail);
7032 if (sched_verbose && dump)
7033 fprintf (dump, "// finishing %d-%d\n",
7034 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
7035 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
7037 return;
7041 /* The following function inserts stop bits in scheduled BB or EBB. */
7043 static void
7044 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7046 rtx insn;
7047 int need_barrier_p = 0;
7048 rtx prev_insn = NULL_RTX;
7050 init_insn_group_barriers ();
7052 for (insn = NEXT_INSN (current_sched_info->prev_head);
7053 insn != current_sched_info->next_tail;
7054 insn = NEXT_INSN (insn))
7056 if (GET_CODE (insn) == BARRIER)
7058 rtx last = prev_active_insn (insn);
7060 if (! last)
7061 continue;
7062 if (GET_CODE (last) == JUMP_INSN
7063 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
7064 last = prev_active_insn (last);
7065 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7066 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7068 init_insn_group_barriers ();
7069 need_barrier_p = 0;
7070 prev_insn = NULL_RTX;
7072 else if (INSN_P (insn))
7074 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7076 init_insn_group_barriers ();
7077 need_barrier_p = 0;
7078 prev_insn = NULL_RTX;
7080 else if (need_barrier_p || group_barrier_needed_p (insn))
7082 if (TARGET_EARLY_STOP_BITS)
7084 rtx last;
7086 for (last = insn;
7087 last != current_sched_info->prev_head;
7088 last = PREV_INSN (last))
7089 if (INSN_P (last) && GET_MODE (last) == TImode
7090 && stops_p [INSN_UID (last)])
7091 break;
7092 if (last == current_sched_info->prev_head)
7093 last = insn;
7094 last = prev_active_insn (last);
7095 if (last
7096 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
7097 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
7098 last);
7099 init_insn_group_barriers ();
7100 for (last = NEXT_INSN (last);
7101 last != insn;
7102 last = NEXT_INSN (last))
7103 if (INSN_P (last))
7104 group_barrier_needed_p (last);
7106 else
7108 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7109 insn);
7110 init_insn_group_barriers ();
7112 group_barrier_needed_p (insn);
7113 prev_insn = NULL_RTX;
7115 else if (recog_memoized (insn) >= 0)
7116 prev_insn = insn;
7117 need_barrier_p = (GET_CODE (insn) == CALL_INSN
7118 || GET_CODE (PATTERN (insn)) == ASM_INPUT
7119 || asm_noperands (PATTERN (insn)) >= 0);
7126 /* If the following function returns TRUE, we will use the the DFA
7127 insn scheduler. */
7129 static int
7130 ia64_first_cycle_multipass_dfa_lookahead (void)
7132 return (reload_completed ? 6 : 4);
7135 /* The following function initiates variable `dfa_pre_cycle_insn'. */
7137 static void
7138 ia64_init_dfa_pre_cycle_insn (void)
7140 if (temp_dfa_state == NULL)
7142 dfa_state_size = state_size ();
7143 temp_dfa_state = xmalloc (dfa_state_size);
7144 prev_cycle_state = xmalloc (dfa_state_size);
7146 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
7147 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
7148 recog_memoized (dfa_pre_cycle_insn);
7149 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
7150 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
7151 recog_memoized (dfa_stop_insn);
7154 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
7155 used by the DFA insn scheduler. */
7157 static rtx
7158 ia64_dfa_pre_cycle_insn (void)
7160 return dfa_pre_cycle_insn;
7163 /* The following function returns TRUE if PRODUCER (of type ilog or
7164 ld) produces address for CONSUMER (of type st or stf). */
7167 ia64_st_address_bypass_p (rtx producer, rtx consumer)
7169 rtx dest, reg, mem;
7171 if (producer == NULL_RTX || consumer == NULL_RTX)
7172 abort ();
7173 dest = ia64_single_set (producer);
7174 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7175 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7176 abort ();
7177 if (GET_CODE (reg) == SUBREG)
7178 reg = SUBREG_REG (reg);
7179 dest = ia64_single_set (consumer);
7180 if (dest == NULL_RTX || (mem = SET_DEST (dest)) == NULL_RTX
7181 || GET_CODE (mem) != MEM)
7182 abort ();
7183 return reg_mentioned_p (reg, mem);
7186 /* The following function returns TRUE if PRODUCER (of type ilog or
7187 ld) produces address for CONSUMER (of type ld or fld). */
7190 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
7192 rtx dest, src, reg, mem;
7194 if (producer == NULL_RTX || consumer == NULL_RTX)
7195 abort ();
7196 dest = ia64_single_set (producer);
7197 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7198 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7199 abort ();
7200 if (GET_CODE (reg) == SUBREG)
7201 reg = SUBREG_REG (reg);
7202 src = ia64_single_set (consumer);
7203 if (src == NULL_RTX || (mem = SET_SRC (src)) == NULL_RTX)
7204 abort ();
7205 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
7206 mem = XVECEXP (mem, 0, 0);
7207 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
7208 mem = XEXP (mem, 0);
7210 /* Note that LO_SUM is used for GOT loads. */
7211 if (GET_CODE (mem) != LO_SUM && GET_CODE (mem) != MEM)
7212 abort ();
7214 return reg_mentioned_p (reg, mem);
7217 /* The following function returns TRUE if INSN produces address for a
7218 load/store insn. We will place such insns into M slot because it
7219 decreases its latency time. */
7222 ia64_produce_address_p (rtx insn)
7224 return insn->call;
7228 /* Emit pseudo-ops for the assembler to describe predicate relations.
7229 At present this assumes that we only consider predicate pairs to
7230 be mutex, and that the assembler can deduce proper values from
7231 straight-line code. */
7233 static void
7234 emit_predicate_relation_info (void)
7236 basic_block bb;
7238 FOR_EACH_BB_REVERSE (bb)
7240 int r;
7241 rtx head = BB_HEAD (bb);
7243 /* We only need such notes at code labels. */
7244 if (GET_CODE (head) != CODE_LABEL)
7245 continue;
7246 if (GET_CODE (NEXT_INSN (head)) == NOTE
7247 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
7248 head = NEXT_INSN (head);
7250 for (r = PR_REG (0); r < PR_REG (64); r += 2)
7251 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
7253 rtx p = gen_rtx_REG (BImode, r);
7254 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
7255 if (head == BB_END (bb))
7256 BB_END (bb) = n;
7257 head = n;
7261 /* Look for conditional calls that do not return, and protect predicate
7262 relations around them. Otherwise the assembler will assume the call
7263 returns, and complain about uses of call-clobbered predicates after
7264 the call. */
7265 FOR_EACH_BB_REVERSE (bb)
7267 rtx insn = BB_HEAD (bb);
7269 while (1)
7271 if (GET_CODE (insn) == CALL_INSN
7272 && GET_CODE (PATTERN (insn)) == COND_EXEC
7273 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
7275 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
7276 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
7277 if (BB_HEAD (bb) == insn)
7278 BB_HEAD (bb) = b;
7279 if (BB_END (bb) == insn)
7280 BB_END (bb) = a;
7283 if (insn == BB_END (bb))
7284 break;
7285 insn = NEXT_INSN (insn);
7290 /* Perform machine dependent operations on the rtl chain INSNS. */
7292 static void
7293 ia64_reorg (void)
7295 /* We are freeing block_for_insn in the toplev to keep compatibility
7296 with old MDEP_REORGS that are not CFG based. Recompute it now. */
7297 compute_bb_for_insn ();
7299 /* If optimizing, we'll have split before scheduling. */
7300 if (optimize == 0)
7301 split_all_insns (0);
7303 /* ??? update_life_info_in_dirty_blocks fails to terminate during
7304 non-optimizing bootstrap. */
7305 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7307 if (ia64_flag_schedule_insns2)
7309 timevar_push (TV_SCHED2);
7310 ia64_final_schedule = 1;
7312 initiate_bundle_states ();
7313 ia64_nop = make_insn_raw (gen_nop ());
7314 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
7315 recog_memoized (ia64_nop);
7316 clocks_length = get_max_uid () + 1;
7317 stops_p = xcalloc (1, clocks_length);
7318 if (ia64_tune == PROCESSOR_ITANIUM)
7320 clocks = xcalloc (clocks_length, sizeof (int));
7321 add_cycles = xcalloc (clocks_length, sizeof (int));
7323 if (ia64_tune == PROCESSOR_ITANIUM2)
7325 pos_1 = get_cpu_unit_code ("2_1");
7326 pos_2 = get_cpu_unit_code ("2_2");
7327 pos_3 = get_cpu_unit_code ("2_3");
7328 pos_4 = get_cpu_unit_code ("2_4");
7329 pos_5 = get_cpu_unit_code ("2_5");
7330 pos_6 = get_cpu_unit_code ("2_6");
7331 _0mii_ = get_cpu_unit_code ("2b_0mii.");
7332 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7333 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7334 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7335 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7336 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7337 _0mib_ = get_cpu_unit_code ("2b_0mib.");
7338 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7339 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7340 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7341 _1mii_ = get_cpu_unit_code ("2b_1mii.");
7342 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7343 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7344 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7345 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7346 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7347 _1mib_ = get_cpu_unit_code ("2b_1mib.");
7348 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7349 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7350 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7352 else
7354 pos_1 = get_cpu_unit_code ("1_1");
7355 pos_2 = get_cpu_unit_code ("1_2");
7356 pos_3 = get_cpu_unit_code ("1_3");
7357 pos_4 = get_cpu_unit_code ("1_4");
7358 pos_5 = get_cpu_unit_code ("1_5");
7359 pos_6 = get_cpu_unit_code ("1_6");
7360 _0mii_ = get_cpu_unit_code ("1b_0mii.");
7361 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7362 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7363 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7364 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7365 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7366 _0mib_ = get_cpu_unit_code ("1b_0mib.");
7367 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7368 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7369 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7370 _1mii_ = get_cpu_unit_code ("1b_1mii.");
7371 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7372 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7373 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7374 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7375 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7376 _1mib_ = get_cpu_unit_code ("1b_1mib.");
7377 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7378 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7379 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7381 schedule_ebbs (dump_file);
7382 finish_bundle_states ();
7383 if (ia64_tune == PROCESSOR_ITANIUM)
7385 free (add_cycles);
7386 free (clocks);
7388 free (stops_p);
7389 emit_insn_group_barriers (dump_file);
7391 ia64_final_schedule = 0;
7392 timevar_pop (TV_SCHED2);
7394 else
7395 emit_all_insn_group_barriers (dump_file);
7397 /* A call must not be the last instruction in a function, so that the
7398 return address is still within the function, so that unwinding works
7399 properly. Note that IA-64 differs from dwarf2 on this point. */
7400 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7402 rtx insn;
7403 int saw_stop = 0;
7405 insn = get_last_insn ();
7406 if (! INSN_P (insn))
7407 insn = prev_active_insn (insn);
7408 /* Skip over insns that expand to nothing. */
7409 while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
7411 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7412 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7413 saw_stop = 1;
7414 insn = prev_active_insn (insn);
7416 if (GET_CODE (insn) == CALL_INSN)
7418 if (! saw_stop)
7419 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7420 emit_insn (gen_break_f ());
7421 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7425 emit_predicate_relation_info ();
7427 if (ia64_flag_var_tracking)
7429 timevar_push (TV_VAR_TRACKING);
7430 variable_tracking_main ();
7431 timevar_pop (TV_VAR_TRACKING);
7435 /* Return true if REGNO is used by the epilogue. */
7438 ia64_epilogue_uses (int regno)
7440 switch (regno)
7442 case R_GR (1):
7443 /* With a call to a function in another module, we will write a new
7444 value to "gp". After returning from such a call, we need to make
7445 sure the function restores the original gp-value, even if the
7446 function itself does not use the gp anymore. */
7447 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7449 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7450 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7451 /* For functions defined with the syscall_linkage attribute, all
7452 input registers are marked as live at all function exits. This
7453 prevents the register allocator from using the input registers,
7454 which in turn makes it possible to restart a system call after
7455 an interrupt without having to save/restore the input registers.
7456 This also prevents kernel data from leaking to application code. */
7457 return lookup_attribute ("syscall_linkage",
7458 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7460 case R_BR (0):
7461 /* Conditional return patterns can't represent the use of `b0' as
7462 the return address, so we force the value live this way. */
7463 return 1;
7465 case AR_PFS_REGNUM:
7466 /* Likewise for ar.pfs, which is used by br.ret. */
7467 return 1;
7469 default:
7470 return 0;
7474 /* Return true if REGNO is used by the frame unwinder. */
7477 ia64_eh_uses (int regno)
7479 if (! reload_completed)
7480 return 0;
7482 if (current_frame_info.reg_save_b0
7483 && regno == current_frame_info.reg_save_b0)
7484 return 1;
7485 if (current_frame_info.reg_save_pr
7486 && regno == current_frame_info.reg_save_pr)
7487 return 1;
7488 if (current_frame_info.reg_save_ar_pfs
7489 && regno == current_frame_info.reg_save_ar_pfs)
7490 return 1;
7491 if (current_frame_info.reg_save_ar_unat
7492 && regno == current_frame_info.reg_save_ar_unat)
7493 return 1;
7494 if (current_frame_info.reg_save_ar_lc
7495 && regno == current_frame_info.reg_save_ar_lc)
7496 return 1;
7498 return 0;
7501 /* Return true if this goes in small data/bss. */
7503 /* ??? We could also support own long data here. Generating movl/add/ld8
7504 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7505 code faster because there is one less load. This also includes incomplete
7506 types which can't go in sdata/sbss. */
7508 static bool
7509 ia64_in_small_data_p (tree exp)
7511 if (TARGET_NO_SDATA)
7512 return false;
7514 /* We want to merge strings, so we never consider them small data. */
7515 if (TREE_CODE (exp) == STRING_CST)
7516 return false;
7518 /* Functions are never small data. */
7519 if (TREE_CODE (exp) == FUNCTION_DECL)
7520 return false;
7522 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7524 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7526 if (strcmp (section, ".sdata") == 0
7527 || strncmp (section, ".sdata.", 7) == 0
7528 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
7529 || strcmp (section, ".sbss") == 0
7530 || strncmp (section, ".sbss.", 6) == 0
7531 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
7532 return true;
7534 else
7536 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7538 /* If this is an incomplete type with size 0, then we can't put it
7539 in sdata because it might be too big when completed. */
7540 if (size > 0 && size <= ia64_section_threshold)
7541 return true;
7544 return false;
7547 /* Output assembly directives for prologue regions. */
7549 /* The current basic block number. */
7551 static bool last_block;
7553 /* True if we need a copy_state command at the start of the next block. */
7555 static bool need_copy_state;
7557 /* The function emits unwind directives for the start of an epilogue. */
7559 static void
7560 process_epilogue (void)
7562 /* If this isn't the last block of the function, then we need to label the
7563 current state, and copy it back in at the start of the next block. */
7565 if (!last_block)
7567 fprintf (asm_out_file, "\t.label_state %d\n",
7568 ++cfun->machine->state_num);
7569 need_copy_state = true;
7572 fprintf (asm_out_file, "\t.restore sp\n");
7575 /* This function processes a SET pattern looking for specific patterns
7576 which result in emitting an assembly directive required for unwinding. */
7578 static int
7579 process_set (FILE *asm_out_file, rtx pat)
7581 rtx src = SET_SRC (pat);
7582 rtx dest = SET_DEST (pat);
7583 int src_regno, dest_regno;
7585 /* Look for the ALLOC insn. */
7586 if (GET_CODE (src) == UNSPEC_VOLATILE
7587 && XINT (src, 1) == UNSPECV_ALLOC
7588 && GET_CODE (dest) == REG)
7590 dest_regno = REGNO (dest);
7592 /* If this is the final destination for ar.pfs, then this must
7593 be the alloc in the prologue. */
7594 if (dest_regno == current_frame_info.reg_save_ar_pfs)
7595 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7596 ia64_dbx_register_number (dest_regno));
7597 else
7599 /* This must be an alloc before a sibcall. We must drop the
7600 old frame info. The easiest way to drop the old frame
7601 info is to ensure we had a ".restore sp" directive
7602 followed by a new prologue. If the procedure doesn't
7603 have a memory-stack frame, we'll issue a dummy ".restore
7604 sp" now. */
7605 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
7606 /* if haven't done process_epilogue() yet, do it now */
7607 process_epilogue ();
7608 fprintf (asm_out_file, "\t.prologue\n");
7610 return 1;
7613 /* Look for SP = .... */
7614 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7616 if (GET_CODE (src) == PLUS)
7618 rtx op0 = XEXP (src, 0);
7619 rtx op1 = XEXP (src, 1);
7620 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7622 if (INTVAL (op1) < 0)
7623 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
7624 -INTVAL (op1));
7625 else
7626 process_epilogue ();
7628 else
7629 abort ();
7631 else if (GET_CODE (src) == REG
7632 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7633 process_epilogue ();
7634 else
7635 abort ();
7637 return 1;
7640 /* Register move we need to look at. */
7641 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7643 src_regno = REGNO (src);
7644 dest_regno = REGNO (dest);
7646 switch (src_regno)
7648 case BR_REG (0):
7649 /* Saving return address pointer. */
7650 if (dest_regno != current_frame_info.reg_save_b0)
7651 abort ();
7652 fprintf (asm_out_file, "\t.save rp, r%d\n",
7653 ia64_dbx_register_number (dest_regno));
7654 return 1;
7656 case PR_REG (0):
7657 if (dest_regno != current_frame_info.reg_save_pr)
7658 abort ();
7659 fprintf (asm_out_file, "\t.save pr, r%d\n",
7660 ia64_dbx_register_number (dest_regno));
7661 return 1;
7663 case AR_UNAT_REGNUM:
7664 if (dest_regno != current_frame_info.reg_save_ar_unat)
7665 abort ();
7666 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7667 ia64_dbx_register_number (dest_regno));
7668 return 1;
7670 case AR_LC_REGNUM:
7671 if (dest_regno != current_frame_info.reg_save_ar_lc)
7672 abort ();
7673 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7674 ia64_dbx_register_number (dest_regno));
7675 return 1;
7677 case STACK_POINTER_REGNUM:
7678 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7679 || ! frame_pointer_needed)
7680 abort ();
7681 fprintf (asm_out_file, "\t.vframe r%d\n",
7682 ia64_dbx_register_number (dest_regno));
7683 return 1;
7685 default:
7686 /* Everything else should indicate being stored to memory. */
7687 abort ();
7691 /* Memory store we need to look at. */
7692 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7694 long off;
7695 rtx base;
7696 const char *saveop;
7698 if (GET_CODE (XEXP (dest, 0)) == REG)
7700 base = XEXP (dest, 0);
7701 off = 0;
7703 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7704 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7706 base = XEXP (XEXP (dest, 0), 0);
7707 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7709 else
7710 abort ();
7712 if (base == hard_frame_pointer_rtx)
7714 saveop = ".savepsp";
7715 off = - off;
7717 else if (base == stack_pointer_rtx)
7718 saveop = ".savesp";
7719 else
7720 abort ();
7722 src_regno = REGNO (src);
7723 switch (src_regno)
7725 case BR_REG (0):
7726 if (current_frame_info.reg_save_b0 != 0)
7727 abort ();
7728 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7729 return 1;
7731 case PR_REG (0):
7732 if (current_frame_info.reg_save_pr != 0)
7733 abort ();
7734 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7735 return 1;
7737 case AR_LC_REGNUM:
7738 if (current_frame_info.reg_save_ar_lc != 0)
7739 abort ();
7740 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7741 return 1;
7743 case AR_PFS_REGNUM:
7744 if (current_frame_info.reg_save_ar_pfs != 0)
7745 abort ();
7746 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7747 return 1;
7749 case AR_UNAT_REGNUM:
7750 if (current_frame_info.reg_save_ar_unat != 0)
7751 abort ();
7752 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7753 return 1;
7755 case GR_REG (4):
7756 case GR_REG (5):
7757 case GR_REG (6):
7758 case GR_REG (7):
7759 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7760 1 << (src_regno - GR_REG (4)));
7761 return 1;
7763 case BR_REG (1):
7764 case BR_REG (2):
7765 case BR_REG (3):
7766 case BR_REG (4):
7767 case BR_REG (5):
7768 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7769 1 << (src_regno - BR_REG (1)));
7770 return 1;
7772 case FR_REG (2):
7773 case FR_REG (3):
7774 case FR_REG (4):
7775 case FR_REG (5):
7776 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7777 1 << (src_regno - FR_REG (2)));
7778 return 1;
7780 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7781 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7782 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7783 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7784 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7785 1 << (src_regno - FR_REG (12)));
7786 return 1;
7788 default:
7789 return 0;
7793 return 0;
7797 /* This function looks at a single insn and emits any directives
7798 required to unwind this insn. */
7799 void
7800 process_for_unwind_directive (FILE *asm_out_file, rtx insn)
7802 if (flag_unwind_tables
7803 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7805 rtx pat;
7807 if (GET_CODE (insn) == NOTE
7808 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7810 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7812 /* Restore unwind state from immediately before the epilogue. */
7813 if (need_copy_state)
7815 fprintf (asm_out_file, "\t.body\n");
7816 fprintf (asm_out_file, "\t.copy_state %d\n",
7817 cfun->machine->state_num);
7818 need_copy_state = false;
7822 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7823 return;
7825 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7826 if (pat)
7827 pat = XEXP (pat, 0);
7828 else
7829 pat = PATTERN (insn);
7831 switch (GET_CODE (pat))
7833 case SET:
7834 process_set (asm_out_file, pat);
7835 break;
7837 case PARALLEL:
7839 int par_index;
7840 int limit = XVECLEN (pat, 0);
7841 for (par_index = 0; par_index < limit; par_index++)
7843 rtx x = XVECEXP (pat, 0, par_index);
7844 if (GET_CODE (x) == SET)
7845 process_set (asm_out_file, x);
7847 break;
7850 default:
7851 abort ();
7857 void
7858 ia64_init_builtins (void)
7860 tree psi_type_node = build_pointer_type (integer_type_node);
7861 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7863 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7864 tree si_ftype_psi_si_si
7865 = build_function_type_list (integer_type_node,
7866 psi_type_node, integer_type_node,
7867 integer_type_node, NULL_TREE);
7869 /* __sync_val_compare_and_swap_di */
7870 tree di_ftype_pdi_di_di
7871 = build_function_type_list (long_integer_type_node,
7872 pdi_type_node, long_integer_type_node,
7873 long_integer_type_node, NULL_TREE);
7874 /* __sync_bool_compare_and_swap_di */
7875 tree si_ftype_pdi_di_di
7876 = build_function_type_list (integer_type_node,
7877 pdi_type_node, long_integer_type_node,
7878 long_integer_type_node, NULL_TREE);
7879 /* __sync_synchronize */
7880 tree void_ftype_void
7881 = build_function_type (void_type_node, void_list_node);
7883 /* __sync_lock_test_and_set_si */
7884 tree si_ftype_psi_si
7885 = build_function_type_list (integer_type_node,
7886 psi_type_node, integer_type_node, NULL_TREE);
7888 /* __sync_lock_test_and_set_di */
7889 tree di_ftype_pdi_di
7890 = build_function_type_list (long_integer_type_node,
7891 pdi_type_node, long_integer_type_node,
7892 NULL_TREE);
7894 /* __sync_lock_release_si */
7895 tree void_ftype_psi
7896 = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
7898 /* __sync_lock_release_di */
7899 tree void_ftype_pdi
7900 = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
7902 tree fpreg_type;
7903 tree float80_type;
7905 /* The __fpreg type. */
7906 fpreg_type = make_node (REAL_TYPE);
7907 /* ??? The back end should know to load/save __fpreg variables using
7908 the ldf.fill and stf.spill instructions. */
7909 TYPE_PRECISION (fpreg_type) = 80;
7910 layout_type (fpreg_type);
7911 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
7913 /* The __float80 type. */
7914 float80_type = make_node (REAL_TYPE);
7915 TYPE_PRECISION (float80_type) = 80;
7916 layout_type (float80_type);
7917 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
7919 /* The __float128 type. */
7920 if (!TARGET_HPUX)
7922 tree float128_type = make_node (REAL_TYPE);
7923 TYPE_PRECISION (float128_type) = 128;
7924 layout_type (float128_type);
7925 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
7927 else
7928 /* Under HPUX, this is a synonym for "long double". */
7929 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
7930 "__float128");
7932 #define def_builtin(name, type, code) \
7933 lang_hooks.builtin_function ((name), (type), (code), BUILT_IN_MD, \
7934 NULL, NULL_TREE)
7936 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7937 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7938 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7939 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7940 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7941 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7942 def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di,
7943 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7945 def_builtin ("__sync_synchronize", void_ftype_void,
7946 IA64_BUILTIN_SYNCHRONIZE);
7948 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7949 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7950 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7951 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7952 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7953 IA64_BUILTIN_LOCK_RELEASE_SI);
7954 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7955 IA64_BUILTIN_LOCK_RELEASE_DI);
7957 def_builtin ("__builtin_ia64_bsp",
7958 build_function_type (ptr_type_node, void_list_node),
7959 IA64_BUILTIN_BSP);
7961 def_builtin ("__builtin_ia64_flushrs",
7962 build_function_type (void_type_node, void_list_node),
7963 IA64_BUILTIN_FLUSHRS);
7965 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7966 IA64_BUILTIN_FETCH_AND_ADD_SI);
7967 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7968 IA64_BUILTIN_FETCH_AND_SUB_SI);
7969 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7970 IA64_BUILTIN_FETCH_AND_OR_SI);
7971 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7972 IA64_BUILTIN_FETCH_AND_AND_SI);
7973 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7974 IA64_BUILTIN_FETCH_AND_XOR_SI);
7975 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7976 IA64_BUILTIN_FETCH_AND_NAND_SI);
7978 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7979 IA64_BUILTIN_ADD_AND_FETCH_SI);
7980 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7981 IA64_BUILTIN_SUB_AND_FETCH_SI);
7982 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7983 IA64_BUILTIN_OR_AND_FETCH_SI);
7984 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7985 IA64_BUILTIN_AND_AND_FETCH_SI);
7986 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7987 IA64_BUILTIN_XOR_AND_FETCH_SI);
7988 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7989 IA64_BUILTIN_NAND_AND_FETCH_SI);
7991 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7992 IA64_BUILTIN_FETCH_AND_ADD_DI);
7993 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7994 IA64_BUILTIN_FETCH_AND_SUB_DI);
7995 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7996 IA64_BUILTIN_FETCH_AND_OR_DI);
7997 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7998 IA64_BUILTIN_FETCH_AND_AND_DI);
7999 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
8000 IA64_BUILTIN_FETCH_AND_XOR_DI);
8001 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
8002 IA64_BUILTIN_FETCH_AND_NAND_DI);
8004 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
8005 IA64_BUILTIN_ADD_AND_FETCH_DI);
8006 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
8007 IA64_BUILTIN_SUB_AND_FETCH_DI);
8008 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
8009 IA64_BUILTIN_OR_AND_FETCH_DI);
8010 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
8011 IA64_BUILTIN_AND_AND_FETCH_DI);
8012 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
8013 IA64_BUILTIN_XOR_AND_FETCH_DI);
8014 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
8015 IA64_BUILTIN_NAND_AND_FETCH_DI);
8017 #undef def_builtin
8020 /* Expand fetch_and_op intrinsics. The basic code sequence is:
8023 tmp = [ptr];
8024 do {
8025 ret = tmp;
8026 ar.ccv = tmp;
8027 tmp <op>= value;
8028 cmpxchgsz.acq tmp = [ptr], tmp
8029 } while (tmp != ret)
8032 static rtx
8033 ia64_expand_fetch_and_op (optab binoptab, enum machine_mode mode,
8034 tree arglist, rtx target)
8036 rtx ret, label, tmp, ccv, insn, mem, value;
8037 tree arg0, arg1;
8039 arg0 = TREE_VALUE (arglist);
8040 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8041 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
8042 #ifdef POINTERS_EXTEND_UNSIGNED
8043 if (GET_MODE(mem) != Pmode)
8044 mem = convert_memory_address (Pmode, mem);
8045 #endif
8046 value = expand_expr (arg1, NULL_RTX, mode, 0);
8048 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
8049 MEM_VOLATILE_P (mem) = 1;
8051 if (target && register_operand (target, mode))
8052 ret = target;
8053 else
8054 ret = gen_reg_rtx (mode);
8056 emit_insn (gen_mf ());
8058 /* Special case for fetchadd instructions. */
8059 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
8061 if (mode == SImode)
8062 insn = gen_fetchadd_acq_si (ret, mem, value);
8063 else
8064 insn = gen_fetchadd_acq_di (ret, mem, value);
8065 emit_insn (insn);
8066 return ret;
8069 tmp = gen_reg_rtx (mode);
8070 /* ar.ccv must always be loaded with a zero-extended DImode value. */
8071 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8072 emit_move_insn (tmp, mem);
8074 label = gen_label_rtx ();
8075 emit_label (label);
8076 emit_move_insn (ret, tmp);
8077 convert_move (ccv, tmp, /*unsignedp=*/1);
8079 /* Perform the specific operation. Special case NAND by noticing
8080 one_cmpl_optab instead. */
8081 if (binoptab == one_cmpl_optab)
8083 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
8084 binoptab = and_optab;
8086 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
8088 if (mode == SImode)
8089 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
8090 else
8091 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
8092 emit_insn (insn);
8094 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
8096 return ret;
8099 /* Expand op_and_fetch intrinsics. The basic code sequence is:
8102 tmp = [ptr];
8103 do {
8104 old = tmp;
8105 ar.ccv = tmp;
8106 ret = tmp <op> value;
8107 cmpxchgsz.acq tmp = [ptr], ret
8108 } while (tmp != old)
8111 static rtx
8112 ia64_expand_op_and_fetch (optab binoptab, enum machine_mode mode,
8113 tree arglist, rtx target)
8115 rtx old, label, tmp, ret, ccv, insn, mem, value;
8116 tree arg0, arg1;
8118 arg0 = TREE_VALUE (arglist);
8119 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8120 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
8121 #ifdef POINTERS_EXTEND_UNSIGNED
8122 if (GET_MODE(mem) != Pmode)
8123 mem = convert_memory_address (Pmode, mem);
8124 #endif
8126 value = expand_expr (arg1, NULL_RTX, mode, 0);
8128 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
8129 MEM_VOLATILE_P (mem) = 1;
8131 if (target && ! register_operand (target, mode))
8132 target = NULL_RTX;
8134 emit_insn (gen_mf ());
8135 tmp = gen_reg_rtx (mode);
8136 old = gen_reg_rtx (mode);
8137 /* ar.ccv must always be loaded with a zero-extended DImode value. */
8138 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8140 emit_move_insn (tmp, mem);
8142 label = gen_label_rtx ();
8143 emit_label (label);
8144 emit_move_insn (old, tmp);
8145 convert_move (ccv, tmp, /*unsignedp=*/1);
8147 /* Perform the specific operation. Special case NAND by noticing
8148 one_cmpl_optab instead. */
8149 if (binoptab == one_cmpl_optab)
8151 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
8152 binoptab = and_optab;
8154 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
8156 if (mode == SImode)
8157 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
8158 else
8159 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
8160 emit_insn (insn);
8162 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
8164 return ret;
8167 /* Expand val_ and bool_compare_and_swap. For val_ we want:
8169 ar.ccv = oldval
8171 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
8172 return ret
8174 For bool_ it's the same except return ret == oldval.
8177 static rtx
8178 ia64_expand_compare_and_swap (enum machine_mode rmode, enum machine_mode mode,
8179 int boolp, tree arglist, rtx target)
8181 tree arg0, arg1, arg2;
8182 rtx mem, old, new, ccv, tmp, insn;
8184 arg0 = TREE_VALUE (arglist);
8185 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8186 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8187 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8188 old = expand_expr (arg1, NULL_RTX, mode, 0);
8189 new = expand_expr (arg2, NULL_RTX, mode, 0);
8191 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8192 MEM_VOLATILE_P (mem) = 1;
8194 if (GET_MODE (old) != mode)
8195 old = convert_to_mode (mode, old, /*unsignedp=*/1);
8196 if (GET_MODE (new) != mode)
8197 new = convert_to_mode (mode, new, /*unsignedp=*/1);
8199 if (! register_operand (old, mode))
8200 old = copy_to_mode_reg (mode, old);
8201 if (! register_operand (new, mode))
8202 new = copy_to_mode_reg (mode, new);
8204 if (! boolp && target && register_operand (target, mode))
8205 tmp = target;
8206 else
8207 tmp = gen_reg_rtx (mode);
8209 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8210 convert_move (ccv, old, /*unsignedp=*/1);
8211 emit_insn (gen_mf ());
8212 if (mode == SImode)
8213 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
8214 else
8215 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
8216 emit_insn (insn);
8218 if (boolp)
8220 if (! target)
8221 target = gen_reg_rtx (rmode);
8222 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
8224 else
8225 return tmp;
8228 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
8230 static rtx
8231 ia64_expand_lock_test_and_set (enum machine_mode mode, tree arglist,
8232 rtx target)
8234 tree arg0, arg1;
8235 rtx mem, new, ret, insn;
8237 arg0 = TREE_VALUE (arglist);
8238 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8239 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8240 new = expand_expr (arg1, NULL_RTX, mode, 0);
8242 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8243 MEM_VOLATILE_P (mem) = 1;
8244 if (! register_operand (new, mode))
8245 new = copy_to_mode_reg (mode, new);
8247 if (target && register_operand (target, mode))
8248 ret = target;
8249 else
8250 ret = gen_reg_rtx (mode);
8252 if (mode == SImode)
8253 insn = gen_xchgsi (ret, mem, new);
8254 else
8255 insn = gen_xchgdi (ret, mem, new);
8256 emit_insn (insn);
8258 return ret;
8261 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
8263 static rtx
8264 ia64_expand_lock_release (enum machine_mode mode, tree arglist,
8265 rtx target ATTRIBUTE_UNUSED)
8267 tree arg0;
8268 rtx mem;
8270 arg0 = TREE_VALUE (arglist);
8271 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8273 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8274 MEM_VOLATILE_P (mem) = 1;
8276 emit_move_insn (mem, const0_rtx);
8278 return const0_rtx;
8282 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
8283 enum machine_mode mode ATTRIBUTE_UNUSED,
8284 int ignore ATTRIBUTE_UNUSED)
8286 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8287 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8288 tree arglist = TREE_OPERAND (exp, 1);
8289 enum machine_mode rmode = VOIDmode;
8291 switch (fcode)
8293 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8294 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8295 mode = SImode;
8296 rmode = SImode;
8297 break;
8299 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8300 case IA64_BUILTIN_LOCK_RELEASE_SI:
8301 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8302 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8303 case IA64_BUILTIN_FETCH_AND_OR_SI:
8304 case IA64_BUILTIN_FETCH_AND_AND_SI:
8305 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8306 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8307 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8308 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8309 case IA64_BUILTIN_OR_AND_FETCH_SI:
8310 case IA64_BUILTIN_AND_AND_FETCH_SI:
8311 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8312 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8313 mode = SImode;
8314 break;
8316 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8317 mode = DImode;
8318 rmode = SImode;
8319 break;
8321 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8322 mode = DImode;
8323 rmode = DImode;
8324 break;
8326 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8327 case IA64_BUILTIN_LOCK_RELEASE_DI:
8328 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8329 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8330 case IA64_BUILTIN_FETCH_AND_OR_DI:
8331 case IA64_BUILTIN_FETCH_AND_AND_DI:
8332 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8333 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8334 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8335 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8336 case IA64_BUILTIN_OR_AND_FETCH_DI:
8337 case IA64_BUILTIN_AND_AND_FETCH_DI:
8338 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8339 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8340 mode = DImode;
8341 break;
8343 default:
8344 break;
8347 switch (fcode)
8349 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8350 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8351 return ia64_expand_compare_and_swap (rmode, mode, 1, arglist,
8352 target);
8354 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8355 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8356 return ia64_expand_compare_and_swap (rmode, mode, 0, arglist,
8357 target);
8359 case IA64_BUILTIN_SYNCHRONIZE:
8360 emit_insn (gen_mf ());
8361 return const0_rtx;
8363 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8364 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8365 return ia64_expand_lock_test_and_set (mode, arglist, target);
8367 case IA64_BUILTIN_LOCK_RELEASE_SI:
8368 case IA64_BUILTIN_LOCK_RELEASE_DI:
8369 return ia64_expand_lock_release (mode, arglist, target);
8371 case IA64_BUILTIN_BSP:
8372 if (! target || ! register_operand (target, DImode))
8373 target = gen_reg_rtx (DImode);
8374 emit_insn (gen_bsp_value (target));
8375 #ifdef POINTERS_EXTEND_UNSIGNED
8376 target = convert_memory_address (ptr_mode, target);
8377 #endif
8378 return target;
8380 case IA64_BUILTIN_FLUSHRS:
8381 emit_insn (gen_flushrs ());
8382 return const0_rtx;
8384 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8385 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8386 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8388 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8389 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8390 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8392 case IA64_BUILTIN_FETCH_AND_OR_SI:
8393 case IA64_BUILTIN_FETCH_AND_OR_DI:
8394 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8396 case IA64_BUILTIN_FETCH_AND_AND_SI:
8397 case IA64_BUILTIN_FETCH_AND_AND_DI:
8398 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8400 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8401 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8402 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8404 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8405 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8406 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8408 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8409 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8410 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8412 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8413 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8414 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8416 case IA64_BUILTIN_OR_AND_FETCH_SI:
8417 case IA64_BUILTIN_OR_AND_FETCH_DI:
8418 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8420 case IA64_BUILTIN_AND_AND_FETCH_SI:
8421 case IA64_BUILTIN_AND_AND_FETCH_DI:
8422 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8424 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8425 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8426 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8428 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8429 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8430 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8432 default:
8433 break;
8436 return NULL_RTX;
8439 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8440 most significant bits of the stack slot. */
8442 enum direction
8443 ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
8445 /* Exception to normal case for structures/unions/etc. */
8447 if (type && AGGREGATE_TYPE_P (type)
8448 && int_size_in_bytes (type) < UNITS_PER_WORD)
8449 return upward;
8451 /* Fall back to the default. */
8452 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
8455 /* Linked list of all external functions that are to be emitted by GCC.
8456 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8457 order to avoid putting out names that are never really used. */
8459 struct extern_func_list GTY(())
8461 struct extern_func_list *next;
8462 tree decl;
8465 static GTY(()) struct extern_func_list *extern_func_head;
8467 static void
8468 ia64_hpux_add_extern_decl (tree decl)
8470 struct extern_func_list *p = ggc_alloc (sizeof (struct extern_func_list));
8472 p->decl = decl;
8473 p->next = extern_func_head;
8474 extern_func_head = p;
8477 /* Print out the list of used global functions. */
8479 static void
8480 ia64_hpux_file_end (void)
8482 struct extern_func_list *p;
8484 for (p = extern_func_head; p; p = p->next)
8486 tree decl = p->decl;
8487 tree id = DECL_ASSEMBLER_NAME (decl);
8489 if (!id)
8490 abort ();
8492 if (!TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (id))
8494 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
8496 TREE_ASM_WRITTEN (decl) = 1;
8497 (*targetm.asm_out.globalize_label) (asm_out_file, name);
8498 fputs (TYPE_ASM_OP, asm_out_file);
8499 assemble_name (asm_out_file, name);
8500 fprintf (asm_out_file, "," TYPE_OPERAND_FMT "\n", "function");
8504 extern_func_head = 0;
8507 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
8508 modes of word_mode and larger. Rename the TFmode libfuncs using the
8509 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
8510 backward compatibility. */
8512 static void
8513 ia64_init_libfuncs (void)
8515 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
8516 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
8517 set_optab_libfunc (smod_optab, SImode, "__modsi3");
8518 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
8520 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
8521 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
8522 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
8523 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
8524 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
8526 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
8527 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
8528 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
8529 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
8530 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
8531 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
8533 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
8534 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
8535 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
8536 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
8538 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
8539 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
8542 /* Rename all the TFmode libfuncs using the HPUX conventions. */
8544 static void
8545 ia64_hpux_init_libfuncs (void)
8547 ia64_init_libfuncs ();
8549 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
8550 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
8551 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
8553 /* ia64_expand_compare uses this. */
8554 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
8556 /* These should never be used. */
8557 set_optab_libfunc (eq_optab, TFmode, 0);
8558 set_optab_libfunc (ne_optab, TFmode, 0);
8559 set_optab_libfunc (gt_optab, TFmode, 0);
8560 set_optab_libfunc (ge_optab, TFmode, 0);
8561 set_optab_libfunc (lt_optab, TFmode, 0);
8562 set_optab_libfunc (le_optab, TFmode, 0);
8565 /* Rename the division and modulus functions in VMS. */
8567 static void
8568 ia64_vms_init_libfuncs (void)
8570 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
8571 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
8572 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
8573 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
8574 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
8575 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
8576 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
8577 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
8580 /* Rename the TFmode libfuncs available from soft-fp in glibc using
8581 the HPUX conventions. */
8583 static void
8584 ia64_sysv4_init_libfuncs (void)
8586 ia64_init_libfuncs ();
8588 /* These functions are not part of the HPUX TFmode interface. We
8589 use them instead of _U_Qfcmp, which doesn't work the way we
8590 expect. */
8591 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
8592 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
8593 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
8594 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
8595 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
8596 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
8598 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
8599 glibc doesn't have them. */
8602 /* Switch to the section to which we should output X. The only thing
8603 special we do here is to honor small data. */
8605 static void
8606 ia64_select_rtx_section (enum machine_mode mode, rtx x,
8607 unsigned HOST_WIDE_INT align)
8609 if (GET_MODE_SIZE (mode) > 0
8610 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8611 sdata_section ();
8612 else
8613 default_elf_select_rtx_section (mode, x, align);
8616 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8617 Pretend flag_pic is always set. */
8619 static void
8620 ia64_rwreloc_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
8622 default_elf_select_section_1 (exp, reloc, align, true);
8625 static void
8626 ia64_rwreloc_unique_section (tree decl, int reloc)
8628 default_unique_section_1 (decl, reloc, true);
8631 static void
8632 ia64_rwreloc_select_rtx_section (enum machine_mode mode, rtx x,
8633 unsigned HOST_WIDE_INT align)
8635 int save_pic = flag_pic;
8636 flag_pic = 1;
8637 ia64_select_rtx_section (mode, x, align);
8638 flag_pic = save_pic;
8641 #ifndef TARGET_RWRELOC
8642 #define TARGET_RWRELOC flag_pic
8643 #endif
8645 static unsigned int
8646 ia64_section_type_flags (tree decl, const char *name, int reloc)
8648 unsigned int flags = 0;
8650 if (strcmp (name, ".sdata") == 0
8651 || strncmp (name, ".sdata.", 7) == 0
8652 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
8653 || strncmp (name, ".sdata2.", 8) == 0
8654 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
8655 || strcmp (name, ".sbss") == 0
8656 || strncmp (name, ".sbss.", 6) == 0
8657 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
8658 flags = SECTION_SMALL;
8660 flags |= default_section_type_flags_1 (decl, name, reloc, TARGET_RWRELOC);
8661 return flags;
8664 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
8665 structure type and that the address of that type should be passed
8666 in out0, rather than in r8. */
8668 static bool
8669 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
8671 tree ret_type = TREE_TYPE (fntype);
8673 /* The Itanium C++ ABI requires that out0, rather than r8, be used
8674 as the structure return address parameter, if the return value
8675 type has a non-trivial copy constructor or destructor. It is not
8676 clear if this same convention should be used for other
8677 programming languages. Until G++ 3.4, we incorrectly used r8 for
8678 these return values. */
8679 return (abi_version_at_least (2)
8680 && ret_type
8681 && TYPE_MODE (ret_type) == BLKmode
8682 && TREE_ADDRESSABLE (ret_type)
8683 && strcmp (lang_hooks.name, "GNU C++") == 0);
8686 /* Output the assembler code for a thunk function. THUNK_DECL is the
8687 declaration for the thunk function itself, FUNCTION is the decl for
8688 the target function. DELTA is an immediate constant offset to be
8689 added to THIS. If VCALL_OFFSET is nonzero, the word at
8690 *(*this + vcall_offset) should be added to THIS. */
8692 static void
8693 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
8694 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8695 tree function)
8697 rtx this, insn, funexp;
8698 unsigned int this_parmno;
8699 unsigned int this_regno;
8701 reload_completed = 1;
8702 epilogue_completed = 1;
8703 no_new_pseudos = 1;
8704 reset_block_changes ();
8706 /* Set things up as ia64_expand_prologue might. */
8707 last_scratch_gr_reg = 15;
8709 memset (&current_frame_info, 0, sizeof (current_frame_info));
8710 current_frame_info.spill_cfa_off = -16;
8711 current_frame_info.n_input_regs = 1;
8712 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8714 /* Mark the end of the (empty) prologue. */
8715 emit_note (NOTE_INSN_PROLOGUE_END);
8717 /* Figure out whether "this" will be the first parameter (the
8718 typical case) or the second parameter (as happens when the
8719 virtual function returns certain class objects). */
8720 this_parmno
8721 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
8722 ? 1 : 0);
8723 this_regno = IN_REG (this_parmno);
8724 if (!TARGET_REG_NAMES)
8725 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
8727 this = gen_rtx_REG (Pmode, this_regno);
8728 if (TARGET_ILP32)
8730 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
8731 REG_POINTER (tmp) = 1;
8732 if (delta && CONST_OK_FOR_I (delta))
8734 emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
8735 delta = 0;
8737 else
8738 emit_insn (gen_ptr_extend (this, tmp));
8741 /* Apply the constant offset, if required. */
8742 if (delta)
8744 rtx delta_rtx = GEN_INT (delta);
8746 if (!CONST_OK_FOR_I (delta))
8748 rtx tmp = gen_rtx_REG (Pmode, 2);
8749 emit_move_insn (tmp, delta_rtx);
8750 delta_rtx = tmp;
8752 emit_insn (gen_adddi3 (this, this, delta_rtx));
8755 /* Apply the offset from the vtable, if required. */
8756 if (vcall_offset)
8758 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8759 rtx tmp = gen_rtx_REG (Pmode, 2);
8761 if (TARGET_ILP32)
8763 rtx t = gen_rtx_REG (ptr_mode, 2);
8764 REG_POINTER (t) = 1;
8765 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
8766 if (CONST_OK_FOR_I (vcall_offset))
8768 emit_insn (gen_ptr_extend_plus_imm (tmp, t,
8769 vcall_offset_rtx));
8770 vcall_offset = 0;
8772 else
8773 emit_insn (gen_ptr_extend (tmp, t));
8775 else
8776 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8778 if (vcall_offset)
8780 if (!CONST_OK_FOR_J (vcall_offset))
8782 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8783 emit_move_insn (tmp2, vcall_offset_rtx);
8784 vcall_offset_rtx = tmp2;
8786 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8789 if (TARGET_ILP32)
8790 emit_move_insn (gen_rtx_REG (ptr_mode, 2),
8791 gen_rtx_MEM (ptr_mode, tmp));
8792 else
8793 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8795 emit_insn (gen_adddi3 (this, this, tmp));
8798 /* Generate a tail call to the target function. */
8799 if (! TREE_USED (function))
8801 assemble_external (function);
8802 TREE_USED (function) = 1;
8804 funexp = XEXP (DECL_RTL (function), 0);
8805 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8806 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8807 insn = get_last_insn ();
8808 SIBLING_CALL_P (insn) = 1;
8810 /* Code generation for calls relies on splitting. */
8811 reload_completed = 1;
8812 epilogue_completed = 1;
8813 try_split (PATTERN (insn), insn, 0);
8815 emit_barrier ();
8817 /* Run just enough of rest_of_compilation to get the insns emitted.
8818 There's not really enough bulk here to make other passes such as
8819 instruction scheduling worth while. Note that use_thunk calls
8820 assemble_start_function and assemble_end_function. */
8822 insn_locators_initialize ();
8823 emit_all_insn_group_barriers (NULL);
8824 insn = get_insns ();
8825 shorten_branches (insn);
8826 final_start_function (insn, file, 1);
8827 final (insn, file, 1);
8828 final_end_function ();
8830 reload_completed = 0;
8831 epilogue_completed = 0;
8832 no_new_pseudos = 0;
8835 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
8837 static rtx
8838 ia64_struct_value_rtx (tree fntype,
8839 int incoming ATTRIBUTE_UNUSED)
8841 if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
8842 return NULL_RTX;
8843 return gen_rtx_REG (Pmode, GR_REG (8));
8846 static bool
8847 ia64_scalar_mode_supported_p (enum machine_mode mode)
8849 switch (mode)
8851 case QImode:
8852 case HImode:
8853 case SImode:
8854 case DImode:
8855 case TImode:
8856 return true;
8858 case SFmode:
8859 case DFmode:
8860 case XFmode:
8861 return true;
8863 case TFmode:
8864 return TARGET_HPUX;
8866 default:
8867 return false;
8871 static bool
8872 ia64_vector_mode_supported_p (enum machine_mode mode)
8874 switch (mode)
8876 case V8QImode:
8877 case V4HImode:
8878 case V2SImode:
8879 return true;
8881 case V2SFmode:
8882 return true;
8884 default:
8885 return false;
8889 #include "gt-ia64.h"