1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
42 #include "basic-block.h"
44 #include "sched-int.h"
47 #include "target-def.h"
50 /* This is used for communication between ASM_OUTPUT_LABEL and
51 ASM_OUTPUT_LABELREF. */
52 int ia64_asm_output_label
= 0;
54 /* Define the information needed to generate branch and scc insns. This is
55 stored from the compare operation. */
56 struct rtx_def
* ia64_compare_op0
;
57 struct rtx_def
* ia64_compare_op1
;
59 /* Register names for ia64_expand_prologue. */
60 static const char * const ia64_reg_numbers
[96] =
61 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
62 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
63 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
64 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
65 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
66 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
67 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
68 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
69 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
70 "r104","r105","r106","r107","r108","r109","r110","r111",
71 "r112","r113","r114","r115","r116","r117","r118","r119",
72 "r120","r121","r122","r123","r124","r125","r126","r127"};
74 /* ??? These strings could be shared with REGISTER_NAMES. */
75 static const char * const ia64_input_reg_names
[8] =
76 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
78 /* ??? These strings could be shared with REGISTER_NAMES. */
79 static const char * const ia64_local_reg_names
[80] =
80 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
81 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
82 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
83 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
84 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
85 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
86 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
87 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
88 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
89 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
91 /* ??? These strings could be shared with REGISTER_NAMES. */
92 static const char * const ia64_output_reg_names
[8] =
93 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
95 /* String used with the -mfixed-range= option. */
96 const char *ia64_fixed_range_string
;
98 /* Determines whether we run our final scheduling pass or not. We always
99 avoid the normal second scheduling pass. */
100 static int ia64_flag_schedule_insns2
;
102 /* Variables which are this size or smaller are put in the sdata/sbss
105 unsigned int ia64_section_threshold
;
107 static int find_gr_spill
PARAMS ((int));
108 static int next_scratch_gr_reg
PARAMS ((void));
109 static void mark_reg_gr_used_mask
PARAMS ((rtx
, void *));
110 static void ia64_compute_frame_size
PARAMS ((HOST_WIDE_INT
));
111 static void setup_spill_pointers
PARAMS ((int, rtx
, HOST_WIDE_INT
));
112 static void finish_spill_pointers
PARAMS ((void));
113 static rtx spill_restore_mem
PARAMS ((rtx
, HOST_WIDE_INT
));
114 static void do_spill
PARAMS ((rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
, rtx
));
115 static void do_restore
PARAMS ((rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
));
116 static rtx gen_movdi_x
PARAMS ((rtx
, rtx
, rtx
));
117 static rtx gen_fr_spill_x
PARAMS ((rtx
, rtx
, rtx
));
118 static rtx gen_fr_restore_x
PARAMS ((rtx
, rtx
, rtx
));
120 static enum machine_mode hfa_element_mode
PARAMS ((tree
, int));
121 static void fix_range
PARAMS ((const char *));
122 static void ia64_add_gc_roots
PARAMS ((void));
123 static void ia64_init_machine_status
PARAMS ((struct function
*));
124 static void ia64_mark_machine_status
PARAMS ((struct function
*));
125 static void ia64_free_machine_status
PARAMS ((struct function
*));
126 static void emit_insn_group_barriers
PARAMS ((FILE *, rtx
));
127 static void emit_all_insn_group_barriers
PARAMS ((FILE *, rtx
));
128 static void emit_predicate_relation_info
PARAMS ((void));
129 static void process_epilogue
PARAMS ((void));
130 static int process_set
PARAMS ((FILE *, rtx
));
132 static rtx ia64_expand_fetch_and_op
PARAMS ((optab
, enum machine_mode
,
134 static rtx ia64_expand_op_and_fetch
PARAMS ((optab
, enum machine_mode
,
136 static rtx ia64_expand_compare_and_swap
PARAMS ((enum machine_mode
, int,
138 static rtx ia64_expand_lock_test_and_set
PARAMS ((enum machine_mode
,
140 static rtx ia64_expand_lock_release
PARAMS ((enum machine_mode
, tree
, rtx
));
141 static bool ia64_assemble_integer
PARAMS ((rtx
, unsigned int, int));
142 static void ia64_output_function_prologue
PARAMS ((FILE *, HOST_WIDE_INT
));
143 static void ia64_output_function_epilogue
PARAMS ((FILE *, HOST_WIDE_INT
));
144 static void ia64_output_function_end_prologue
PARAMS ((FILE *));
146 static int ia64_issue_rate
PARAMS ((void));
147 static int ia64_adjust_cost
PARAMS ((rtx
, rtx
, rtx
, int));
148 static void ia64_sched_init
PARAMS ((FILE *, int, int));
149 static void ia64_sched_finish
PARAMS ((FILE *, int));
150 static int ia64_internal_sched_reorder
PARAMS ((FILE *, int, rtx
*,
152 static int ia64_sched_reorder
PARAMS ((FILE *, int, rtx
*, int *, int));
153 static int ia64_sched_reorder2
PARAMS ((FILE *, int, rtx
*, int *, int));
154 static int ia64_variable_issue
PARAMS ((FILE *, int, rtx
, int));
157 /* Table of valid machine attributes. */
158 static const struct attribute_spec ia64_attribute_table
[] =
160 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
161 { "syscall_linkage", 0, 0, false, true, true, NULL
},
162 { NULL
, 0, 0, false, false, false, NULL
}
165 /* Initialize the GCC target structure. */
166 #undef TARGET_ATTRIBUTE_TABLE
167 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
169 #undef TARGET_INIT_BUILTINS
170 #define TARGET_INIT_BUILTINS ia64_init_builtins
172 #undef TARGET_EXPAND_BUILTIN
173 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
175 #undef TARGET_ASM_BYTE_OP
176 #define TARGET_ASM_BYTE_OP "\tdata1\t"
177 #undef TARGET_ASM_ALIGNED_HI_OP
178 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
179 #undef TARGET_ASM_ALIGNED_SI_OP
180 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
181 #undef TARGET_ASM_ALIGNED_DI_OP
182 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
183 #undef TARGET_ASM_UNALIGNED_HI_OP
184 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
185 #undef TARGET_ASM_UNALIGNED_SI_OP
186 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
187 #undef TARGET_ASM_UNALIGNED_DI_OP
188 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
189 #undef TARGET_ASM_INTEGER
190 #define TARGET_ASM_INTEGER ia64_assemble_integer
192 #undef TARGET_ASM_FUNCTION_PROLOGUE
193 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
194 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
195 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
196 #undef TARGET_ASM_FUNCTION_EPILOGUE
197 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
199 #undef TARGET_SCHED_ADJUST_COST
200 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
201 #undef TARGET_SCHED_ISSUE_RATE
202 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
203 #undef TARGET_SCHED_VARIABLE_ISSUE
204 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
205 #undef TARGET_SCHED_INIT
206 #define TARGET_SCHED_INIT ia64_sched_init
207 #undef TARGET_SCHED_FINISH
208 #define TARGET_SCHED_FINISH ia64_sched_finish
209 #undef TARGET_SCHED_REORDER
210 #define TARGET_SCHED_REORDER ia64_sched_reorder
211 #undef TARGET_SCHED_REORDER2
212 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
214 struct gcc_target targetm
= TARGET_INITIALIZER
;
216 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
219 call_operand (op
, mode
)
221 enum machine_mode mode
;
223 if (mode
!= GET_MODE (op
))
226 return (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == REG
227 || (GET_CODE (op
) == SUBREG
&& GET_CODE (XEXP (op
, 0)) == REG
));
230 /* Return 1 if OP refers to a symbol in the sdata section. */
233 sdata_symbolic_operand (op
, mode
)
235 enum machine_mode mode ATTRIBUTE_UNUSED
;
237 switch (GET_CODE (op
))
240 if (GET_CODE (XEXP (op
, 0)) != PLUS
241 || GET_CODE (XEXP (XEXP (op
, 0), 0)) != SYMBOL_REF
)
243 op
= XEXP (XEXP (op
, 0), 0);
247 if (CONSTANT_POOL_ADDRESS_P (op
))
248 return GET_MODE_SIZE (get_pool_mode (op
)) <= ia64_section_threshold
;
250 return XSTR (op
, 0)[0] == SDATA_NAME_FLAG_CHAR
;
259 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
262 got_symbolic_operand (op
, mode
)
264 enum machine_mode mode ATTRIBUTE_UNUSED
;
266 switch (GET_CODE (op
))
270 if (GET_CODE (op
) != PLUS
)
272 if (GET_CODE (XEXP (op
, 0)) != SYMBOL_REF
)
275 if (GET_CODE (op
) != CONST_INT
)
280 /* Ok if we're not using GOT entries at all. */
281 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
284 /* "Ok" while emitting rtl, since otherwise we won't be provided
285 with the entire offset during emission, which makes it very
286 hard to split the offset into high and low parts. */
287 if (rtx_equal_function_value_matters
)
290 /* Force the low 14 bits of the constant to zero so that we do not
291 use up so many GOT entries. */
292 return (INTVAL (op
) & 0x3fff) == 0;
304 /* Return 1 if OP refers to a symbol. */
307 symbolic_operand (op
, mode
)
309 enum machine_mode mode ATTRIBUTE_UNUSED
;
311 switch (GET_CODE (op
))
324 /* Return 1 if OP refers to a function. */
327 function_operand (op
, mode
)
329 enum machine_mode mode ATTRIBUTE_UNUSED
;
331 if (GET_CODE (op
) == SYMBOL_REF
&& SYMBOL_REF_FLAG (op
))
337 /* Return 1 if OP is setjmp or a similar function. */
339 /* ??? This is an unsatisfying solution. Should rethink. */
342 setjmp_operand (op
, mode
)
344 enum machine_mode mode ATTRIBUTE_UNUSED
;
349 if (GET_CODE (op
) != SYMBOL_REF
)
354 /* The following code is borrowed from special_function_p in calls.c. */
356 /* Disregard prefix _, __ or __x. */
359 if (name
[1] == '_' && name
[2] == 'x')
361 else if (name
[1] == '_')
371 && (! strcmp (name
, "setjmp")
372 || ! strcmp (name
, "setjmp_syscall")))
374 && ! strcmp (name
, "sigsetjmp"))
376 && ! strcmp (name
, "savectx")));
378 else if ((name
[0] == 'q' && name
[1] == 's'
379 && ! strcmp (name
, "qsetjmp"))
380 || (name
[0] == 'v' && name
[1] == 'f'
381 && ! strcmp (name
, "vfork")))
387 /* Return 1 if OP is a general operand, but when pic exclude symbolic
390 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
391 from PREDICATE_CODES. */
394 move_operand (op
, mode
)
396 enum machine_mode mode
;
398 if (! TARGET_NO_PIC
&& symbolic_operand (op
, mode
))
401 return general_operand (op
, mode
);
404 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
407 gr_register_operand (op
, mode
)
409 enum machine_mode mode
;
411 if (! register_operand (op
, mode
))
413 if (GET_CODE (op
) == SUBREG
)
414 op
= SUBREG_REG (op
);
415 if (GET_CODE (op
) == REG
)
417 unsigned int regno
= REGNO (op
);
418 if (regno
< FIRST_PSEUDO_REGISTER
)
419 return GENERAL_REGNO_P (regno
);
424 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
427 fr_register_operand (op
, mode
)
429 enum machine_mode mode
;
431 if (! register_operand (op
, mode
))
433 if (GET_CODE (op
) == SUBREG
)
434 op
= SUBREG_REG (op
);
435 if (GET_CODE (op
) == REG
)
437 unsigned int regno
= REGNO (op
);
438 if (regno
< FIRST_PSEUDO_REGISTER
)
439 return FR_REGNO_P (regno
);
444 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
447 grfr_register_operand (op
, mode
)
449 enum machine_mode mode
;
451 if (! register_operand (op
, mode
))
453 if (GET_CODE (op
) == SUBREG
)
454 op
= SUBREG_REG (op
);
455 if (GET_CODE (op
) == REG
)
457 unsigned int regno
= REGNO (op
);
458 if (regno
< FIRST_PSEUDO_REGISTER
)
459 return GENERAL_REGNO_P (regno
) || FR_REGNO_P (regno
);
464 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
467 gr_nonimmediate_operand (op
, mode
)
469 enum machine_mode mode
;
471 if (! nonimmediate_operand (op
, mode
))
473 if (GET_CODE (op
) == SUBREG
)
474 op
= SUBREG_REG (op
);
475 if (GET_CODE (op
) == REG
)
477 unsigned int regno
= REGNO (op
);
478 if (regno
< FIRST_PSEUDO_REGISTER
)
479 return GENERAL_REGNO_P (regno
);
484 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
487 fr_nonimmediate_operand (op
, mode
)
489 enum machine_mode mode
;
491 if (! nonimmediate_operand (op
, mode
))
493 if (GET_CODE (op
) == SUBREG
)
494 op
= SUBREG_REG (op
);
495 if (GET_CODE (op
) == REG
)
497 unsigned int regno
= REGNO (op
);
498 if (regno
< FIRST_PSEUDO_REGISTER
)
499 return FR_REGNO_P (regno
);
504 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
507 grfr_nonimmediate_operand (op
, mode
)
509 enum machine_mode mode
;
511 if (! nonimmediate_operand (op
, mode
))
513 if (GET_CODE (op
) == SUBREG
)
514 op
= SUBREG_REG (op
);
515 if (GET_CODE (op
) == REG
)
517 unsigned int regno
= REGNO (op
);
518 if (regno
< FIRST_PSEUDO_REGISTER
)
519 return GENERAL_REGNO_P (regno
) || FR_REGNO_P (regno
);
524 /* Return 1 if OP is a GR register operand, or zero. */
527 gr_reg_or_0_operand (op
, mode
)
529 enum machine_mode mode
;
531 return (op
== const0_rtx
|| gr_register_operand (op
, mode
));
534 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
537 gr_reg_or_5bit_operand (op
, mode
)
539 enum machine_mode mode
;
541 return ((GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 32)
542 || GET_CODE (op
) == CONSTANT_P_RTX
543 || gr_register_operand (op
, mode
));
546 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
549 gr_reg_or_6bit_operand (op
, mode
)
551 enum machine_mode mode
;
553 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_M (INTVAL (op
)))
554 || GET_CODE (op
) == CONSTANT_P_RTX
555 || gr_register_operand (op
, mode
));
558 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
561 gr_reg_or_8bit_operand (op
, mode
)
563 enum machine_mode mode
;
565 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
)))
566 || GET_CODE (op
) == CONSTANT_P_RTX
567 || gr_register_operand (op
, mode
));
570 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
573 grfr_reg_or_8bit_operand (op
, mode
)
575 enum machine_mode mode
;
577 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
)))
578 || GET_CODE (op
) == CONSTANT_P_RTX
579 || grfr_register_operand (op
, mode
));
582 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
586 gr_reg_or_8bit_adjusted_operand (op
, mode
)
588 enum machine_mode mode
;
590 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_L (INTVAL (op
)))
591 || GET_CODE (op
) == CONSTANT_P_RTX
592 || gr_register_operand (op
, mode
));
595 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
596 immediate and an 8 bit adjusted immediate operand. This is necessary
597 because when we emit a compare, we don't know what the condition will be,
598 so we need the union of the immediates accepted by GT and LT. */
601 gr_reg_or_8bit_and_adjusted_operand (op
, mode
)
603 enum machine_mode mode
;
605 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
))
606 && CONST_OK_FOR_L (INTVAL (op
)))
607 || GET_CODE (op
) == CONSTANT_P_RTX
608 || gr_register_operand (op
, mode
));
611 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
614 gr_reg_or_14bit_operand (op
, mode
)
616 enum machine_mode mode
;
618 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_I (INTVAL (op
)))
619 || GET_CODE (op
) == CONSTANT_P_RTX
620 || gr_register_operand (op
, mode
));
623 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
626 gr_reg_or_22bit_operand (op
, mode
)
628 enum machine_mode mode
;
630 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_J (INTVAL (op
)))
631 || GET_CODE (op
) == CONSTANT_P_RTX
632 || gr_register_operand (op
, mode
));
635 /* Return 1 if OP is a 6 bit immediate operand. */
638 shift_count_operand (op
, mode
)
640 enum machine_mode mode ATTRIBUTE_UNUSED
;
642 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_M (INTVAL (op
)))
643 || GET_CODE (op
) == CONSTANT_P_RTX
);
646 /* Return 1 if OP is a 5 bit immediate operand. */
649 shift_32bit_count_operand (op
, mode
)
651 enum machine_mode mode ATTRIBUTE_UNUSED
;
653 return ((GET_CODE (op
) == CONST_INT
654 && (INTVAL (op
) >= 0 && INTVAL (op
) < 32))
655 || GET_CODE (op
) == CONSTANT_P_RTX
);
658 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
661 shladd_operand (op
, mode
)
663 enum machine_mode mode ATTRIBUTE_UNUSED
;
665 return (GET_CODE (op
) == CONST_INT
666 && (INTVAL (op
) == 2 || INTVAL (op
) == 4
667 || INTVAL (op
) == 8 || INTVAL (op
) == 16));
670 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
673 fetchadd_operand (op
, mode
)
675 enum machine_mode mode ATTRIBUTE_UNUSED
;
677 return (GET_CODE (op
) == CONST_INT
678 && (INTVAL (op
) == -16 || INTVAL (op
) == -8 ||
679 INTVAL (op
) == -4 || INTVAL (op
) == -1 ||
680 INTVAL (op
) == 1 || INTVAL (op
) == 4 ||
681 INTVAL (op
) == 8 || INTVAL (op
) == 16));
684 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
687 fr_reg_or_fp01_operand (op
, mode
)
689 enum machine_mode mode
;
691 return ((GET_CODE (op
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (op
))
692 || fr_register_operand (op
, mode
));
695 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
696 POST_MODIFY with a REG as displacement. */
699 destination_operand (op
, mode
)
701 enum machine_mode mode
;
703 if (! nonimmediate_operand (op
, mode
))
705 if (GET_CODE (op
) == MEM
706 && GET_CODE (XEXP (op
, 0)) == POST_MODIFY
707 && GET_CODE (XEXP (XEXP (XEXP (op
, 0), 1), 1)) == REG
)
712 /* Like memory_operand, but don't allow post-increments. */
715 not_postinc_memory_operand (op
, mode
)
717 enum machine_mode mode
;
719 return (memory_operand (op
, mode
)
720 && GET_RTX_CLASS (GET_CODE (XEXP (op
, 0))) != 'a');
723 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
724 signed immediate operand. */
727 normal_comparison_operator (op
, mode
)
729 enum machine_mode mode
;
731 enum rtx_code code
= GET_CODE (op
);
732 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
733 && (code
== EQ
|| code
== NE
734 || code
== GT
|| code
== LE
|| code
== GTU
|| code
== LEU
));
737 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
738 signed immediate operand. */
741 adjusted_comparison_operator (op
, mode
)
743 enum machine_mode mode
;
745 enum rtx_code code
= GET_CODE (op
);
746 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
747 && (code
== LT
|| code
== GE
|| code
== LTU
|| code
== GEU
));
750 /* Return 1 if this is a signed inequality operator. */
753 signed_inequality_operator (op
, mode
)
755 enum machine_mode mode
;
757 enum rtx_code code
= GET_CODE (op
);
758 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
759 && (code
== GE
|| code
== GT
760 || code
== LE
|| code
== LT
));
763 /* Return 1 if this operator is valid for predication. */
766 predicate_operator (op
, mode
)
768 enum machine_mode mode
;
770 enum rtx_code code
= GET_CODE (op
);
771 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
772 && (code
== EQ
|| code
== NE
));
775 /* Return 1 if this operator can be used in a conditional operation. */
778 condop_operator (op
, mode
)
780 enum machine_mode mode
;
782 enum rtx_code code
= GET_CODE (op
);
783 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
784 && (code
== PLUS
|| code
== MINUS
|| code
== AND
785 || code
== IOR
|| code
== XOR
));
788 /* Return 1 if this is the ar.lc register. */
791 ar_lc_reg_operand (op
, mode
)
793 enum machine_mode mode
;
795 return (GET_MODE (op
) == DImode
796 && (mode
== DImode
|| mode
== VOIDmode
)
797 && GET_CODE (op
) == REG
798 && REGNO (op
) == AR_LC_REGNUM
);
801 /* Return 1 if this is the ar.ccv register. */
804 ar_ccv_reg_operand (op
, mode
)
806 enum machine_mode mode
;
808 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
809 && GET_CODE (op
) == REG
810 && REGNO (op
) == AR_CCV_REGNUM
);
813 /* Return 1 if this is the ar.pfs register. */
816 ar_pfs_reg_operand (op
, mode
)
818 enum machine_mode mode
;
820 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
821 && GET_CODE (op
) == REG
822 && REGNO (op
) == AR_PFS_REGNUM
);
825 /* Like general_operand, but don't allow (mem (addressof)). */
828 general_tfmode_operand (op
, mode
)
830 enum machine_mode mode
;
832 if (! general_operand (op
, mode
))
834 if (GET_CODE (op
) == MEM
&& GET_CODE (XEXP (op
, 0)) == ADDRESSOF
)
842 destination_tfmode_operand (op
, mode
)
844 enum machine_mode mode
;
846 if (! destination_operand (op
, mode
))
848 if (GET_CODE (op
) == MEM
&& GET_CODE (XEXP (op
, 0)) == ADDRESSOF
)
856 tfreg_or_fp01_operand (op
, mode
)
858 enum machine_mode mode
;
860 if (GET_CODE (op
) == SUBREG
)
862 return fr_reg_or_fp01_operand (op
, mode
);
865 /* Return 1 if OP is valid as a base register in a reg + offset address. */
868 basereg_operand (op
, mode
)
870 enum machine_mode mode
;
872 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
873 checks from pa.c basereg_operand as well? Seems to be OK without them
876 return (register_operand (op
, mode
) &&
877 REG_POINTER ((GET_CODE (op
) == SUBREG
) ? SUBREG_REG (op
) : op
));
880 /* Return 1 if the operands of a move are ok. */
883 ia64_move_ok (dst
, src
)
886 /* If we're under init_recog_no_volatile, we'll not be able to use
887 memory_operand. So check the code directly and don't worry about
888 the validity of the underlying address, which should have been
889 checked elsewhere anyway. */
890 if (GET_CODE (dst
) != MEM
)
892 if (GET_CODE (src
) == MEM
)
894 if (register_operand (src
, VOIDmode
))
897 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
898 if (INTEGRAL_MODE_P (GET_MODE (dst
)))
899 return src
== const0_rtx
;
901 return GET_CODE (src
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (src
);
904 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
905 Return the length of the field, or <= 0 on failure. */
908 ia64_depz_field_mask (rop
, rshift
)
911 unsigned HOST_WIDE_INT op
= INTVAL (rop
);
912 unsigned HOST_WIDE_INT shift
= INTVAL (rshift
);
914 /* Get rid of the zero bits we're shifting in. */
917 /* We must now have a solid block of 1's at bit 0. */
918 return exact_log2 (op
+ 1);
921 /* Expand a symbolic constant load. */
922 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
925 ia64_expand_load_address (dest
, src
, scratch
)
926 rtx dest
, src
, scratch
;
930 /* The destination could be a MEM during initial rtl generation,
931 which isn't a valid destination for the PIC load address patterns. */
932 if (! register_operand (dest
, DImode
))
933 temp
= gen_reg_rtx (DImode
);
938 emit_insn (gen_load_gprel64 (temp
, src
));
939 else if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_FLAG (src
))
940 emit_insn (gen_load_fptr (temp
, src
));
941 else if (sdata_symbolic_operand (src
, DImode
))
942 emit_insn (gen_load_gprel (temp
, src
));
943 else if (GET_CODE (src
) == CONST
944 && GET_CODE (XEXP (src
, 0)) == PLUS
945 && GET_CODE (XEXP (XEXP (src
, 0), 1)) == CONST_INT
946 && (INTVAL (XEXP (XEXP (src
, 0), 1)) & 0x1fff) != 0)
948 rtx subtarget
= no_new_pseudos
? temp
: gen_reg_rtx (DImode
);
949 rtx sym
= XEXP (XEXP (src
, 0), 0);
950 HOST_WIDE_INT ofs
, hi
, lo
;
952 /* Split the offset into a sign extended 14-bit low part
953 and a complementary high part. */
954 ofs
= INTVAL (XEXP (XEXP (src
, 0), 1));
955 lo
= ((ofs
& 0x3fff) ^ 0x2000) - 0x2000;
959 scratch
= no_new_pseudos
? subtarget
: gen_reg_rtx (DImode
);
961 emit_insn (gen_load_symptr (subtarget
, plus_constant (sym
, hi
),
963 emit_insn (gen_adddi3 (temp
, subtarget
, GEN_INT (lo
)));
969 scratch
= no_new_pseudos
? temp
: gen_reg_rtx (DImode
);
971 insn
= emit_insn (gen_load_symptr (temp
, src
, scratch
));
972 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_EQUAL
, src
, REG_NOTES (insn
));
976 emit_move_insn (dest
, temp
);
980 ia64_gp_save_reg (setjmp_p
)
983 rtx save
= cfun
->machine
->ia64_gp_save
;
987 /* We can't save GP in a pseudo if we are calling setjmp, because
988 pseudos won't be restored by longjmp. For now, we save it in r4. */
989 /* ??? It would be more efficient to save this directly into a stack
990 slot. Unfortunately, the stack slot address gets cse'd across
991 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
994 /* ??? Get the barf bag, Virginia. We've got to replace this thing
995 in place, since this rtx is used in exception handling receivers.
996 Moreover, we must get this rtx out of regno_reg_rtx or reload
997 will do the wrong thing. */
998 unsigned int old_regno
= REGNO (save
);
999 if (setjmp_p
&& old_regno
!= GR_REG (4))
1001 REGNO (save
) = GR_REG (4);
1002 regno_reg_rtx
[old_regno
] = gen_rtx_raw_REG (DImode
, old_regno
);
1008 save
= gen_rtx_REG (DImode
, GR_REG (4));
1009 else if (! optimize
)
1010 save
= gen_rtx_REG (DImode
, LOC_REG (0));
1012 save
= gen_reg_rtx (DImode
);
1013 cfun
->machine
->ia64_gp_save
= save
;
1019 /* Split a post-reload TImode reference into two DImode components. */
1022 ia64_split_timode (out
, in
, scratch
)
1026 switch (GET_CODE (in
))
1029 out
[0] = gen_rtx_REG (DImode
, REGNO (in
));
1030 out
[1] = gen_rtx_REG (DImode
, REGNO (in
) + 1);
1035 rtx base
= XEXP (in
, 0);
1037 switch (GET_CODE (base
))
1040 out
[0] = adjust_address (in
, DImode
, 0);
1043 base
= XEXP (base
, 0);
1044 out
[0] = adjust_address (in
, DImode
, 0);
1047 /* Since we're changing the mode, we need to change to POST_MODIFY
1048 as well to preserve the size of the increment. Either that or
1049 do the update in two steps, but we've already got this scratch
1050 register handy so let's use it. */
1052 base
= XEXP (base
, 0);
1054 = change_address (in
, DImode
,
1056 (Pmode
, base
, plus_constant (base
, 16)));
1059 base
= XEXP (base
, 0);
1061 = change_address (in
, DImode
,
1063 (Pmode
, base
, plus_constant (base
, -16)));
1069 if (scratch
== NULL_RTX
)
1071 out
[1] = change_address (in
, DImode
, scratch
);
1072 return gen_adddi3 (scratch
, base
, GEN_INT (8));
1077 split_double (in
, &out
[0], &out
[1]);
1085 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1086 through memory plus an extra GR scratch register. Except that you can
1087 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1088 SECONDARY_RELOAD_CLASS, but not both.
1090 We got into problems in the first place by allowing a construct like
1091 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1092 This solution attempts to prevent this situation from occurring. When
1093 we see something like the above, we spill the inner register to memory. */
1096 spill_tfmode_operand (in
, force
)
1100 if (GET_CODE (in
) == SUBREG
1101 && GET_MODE (SUBREG_REG (in
)) == TImode
1102 && GET_CODE (SUBREG_REG (in
)) == REG
)
1104 rtx mem
= gen_mem_addressof (SUBREG_REG (in
), NULL_TREE
);
1105 return gen_rtx_MEM (TFmode
, copy_to_reg (XEXP (mem
, 0)));
1107 else if (force
&& GET_CODE (in
) == REG
)
1109 rtx mem
= gen_mem_addressof (in
, NULL_TREE
);
1110 return gen_rtx_MEM (TFmode
, copy_to_reg (XEXP (mem
, 0)));
1112 else if (GET_CODE (in
) == MEM
1113 && GET_CODE (XEXP (in
, 0)) == ADDRESSOF
)
1114 return change_address (in
, TFmode
, copy_to_reg (XEXP (in
, 0)));
1119 /* Emit comparison instruction if necessary, returning the expression
1120 that holds the compare result in the proper mode. */
1123 ia64_expand_compare (code
, mode
)
1125 enum machine_mode mode
;
1127 rtx op0
= ia64_compare_op0
, op1
= ia64_compare_op1
;
1130 /* If we have a BImode input, then we already have a compare result, and
1131 do not need to emit another comparison. */
1132 if (GET_MODE (op0
) == BImode
)
1134 if ((code
== NE
|| code
== EQ
) && op1
== const0_rtx
)
1141 cmp
= gen_reg_rtx (BImode
);
1142 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1143 gen_rtx_fmt_ee (code
, BImode
, op0
, op1
)));
1147 return gen_rtx_fmt_ee (code
, mode
, cmp
, const0_rtx
);
1150 /* Emit the appropriate sequence for a call. */
1153 ia64_expand_call (retval
, addr
, nextarg
, sibcall_p
)
1159 rtx insn
, b0
, pfs
, gp_save
, narg_rtx
, dest
;
1163 addr
= XEXP (addr
, 0);
1164 b0
= gen_rtx_REG (DImode
, R_BR (0));
1165 pfs
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
1169 else if (IN_REGNO_P (REGNO (nextarg
)))
1170 narg
= REGNO (nextarg
) - IN_REG (0);
1172 narg
= REGNO (nextarg
) - OUT_REG (0);
1173 narg_rtx
= GEN_INT (narg
);
1175 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
1178 insn
= gen_sibcall_nopic (addr
, narg_rtx
, b0
, pfs
);
1180 insn
= gen_call_nopic (addr
, narg_rtx
, b0
);
1182 insn
= gen_call_value_nopic (retval
, addr
, narg_rtx
, b0
);
1183 emit_call_insn (insn
);
1187 indirect_p
= ! symbolic_operand (addr
, VOIDmode
);
1189 if (sibcall_p
|| (TARGET_CONST_GP
&& !indirect_p
))
1192 gp_save
= ia64_gp_save_reg (setjmp_operand (addr
, VOIDmode
));
1195 emit_move_insn (gp_save
, pic_offset_table_rtx
);
1197 /* If this is an indirect call, then we have the address of a descriptor. */
1200 dest
= force_reg (DImode
, gen_rtx_MEM (DImode
, addr
));
1201 emit_move_insn (pic_offset_table_rtx
,
1202 gen_rtx_MEM (DImode
, plus_constant (addr
, 8)));
1208 insn
= gen_sibcall_pic (dest
, narg_rtx
, b0
, pfs
);
1210 insn
= gen_call_pic (dest
, narg_rtx
, b0
);
1212 insn
= gen_call_value_pic (retval
, dest
, narg_rtx
, b0
);
1213 emit_call_insn (insn
);
1216 emit_move_insn (pic_offset_table_rtx
, gp_save
);
1219 /* Begin the assembly file. */
1222 emit_safe_across_calls (f
)
1225 unsigned int rs
, re
;
1232 while (rs
< 64 && call_used_regs
[PR_REG (rs
)])
1236 for (re
= rs
+ 1; re
< 64 && ! call_used_regs
[PR_REG (re
)]; re
++)
1240 fputs ("\t.pred.safe_across_calls ", f
);
1246 fprintf (f
, "p%u", rs
);
1248 fprintf (f
, "p%u-p%u", rs
, re
- 1);
1256 /* Structure to be filled in by ia64_compute_frame_size with register
1257 save masks and offsets for the current function. */
1259 struct ia64_frame_info
1261 HOST_WIDE_INT total_size
; /* size of the stack frame, not including
1262 the caller's scratch area. */
1263 HOST_WIDE_INT spill_cfa_off
; /* top of the reg spill area from the cfa. */
1264 HOST_WIDE_INT spill_size
; /* size of the gr/br/fr spill area. */
1265 HOST_WIDE_INT extra_spill_size
; /* size of spill area for others. */
1266 HARD_REG_SET mask
; /* mask of saved registers. */
1267 unsigned int gr_used_mask
; /* mask of registers in use as gr spill
1268 registers or long-term scratches. */
1269 int n_spilled
; /* number of spilled registers. */
1270 int reg_fp
; /* register for fp. */
1271 int reg_save_b0
; /* save register for b0. */
1272 int reg_save_pr
; /* save register for prs. */
1273 int reg_save_ar_pfs
; /* save register for ar.pfs. */
1274 int reg_save_ar_unat
; /* save register for ar.unat. */
1275 int reg_save_ar_lc
; /* save register for ar.lc. */
1276 int n_input_regs
; /* number of input registers used. */
1277 int n_local_regs
; /* number of local registers used. */
1278 int n_output_regs
; /* number of output registers used. */
1279 int n_rotate_regs
; /* number of rotating registers used. */
1281 char need_regstk
; /* true if a .regstk directive needed. */
1282 char initialized
; /* true if the data is finalized. */
1285 /* Current frame information calculated by ia64_compute_frame_size. */
1286 static struct ia64_frame_info current_frame_info
;
1288 /* Helper function for ia64_compute_frame_size: find an appropriate general
1289 register to spill some special register to. SPECIAL_SPILL_MASK contains
1290 bits in GR0 to GR31 that have already been allocated by this routine.
1291 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1294 find_gr_spill (try_locals
)
1299 /* If this is a leaf function, first try an otherwise unused
1300 call-clobbered register. */
1301 if (current_function_is_leaf
)
1303 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1304 if (! regs_ever_live
[regno
]
1305 && call_used_regs
[regno
]
1306 && ! fixed_regs
[regno
]
1307 && ! global_regs
[regno
]
1308 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1310 current_frame_info
.gr_used_mask
|= 1 << regno
;
1317 regno
= current_frame_info
.n_local_regs
;
1318 /* If there is a frame pointer, then we can't use loc79, because
1319 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1320 reg_name switching code in ia64_expand_prologue. */
1321 if (regno
< (80 - frame_pointer_needed
))
1323 current_frame_info
.n_local_regs
= regno
+ 1;
1324 return LOC_REG (0) + regno
;
1328 /* Failed to find a general register to spill to. Must use stack. */
1332 /* In order to make for nice schedules, we try to allocate every temporary
1333 to a different register. We must of course stay away from call-saved,
1334 fixed, and global registers. We must also stay away from registers
1335 allocated in current_frame_info.gr_used_mask, since those include regs
1336 used all through the prologue.
1338 Any register allocated here must be used immediately. The idea is to
1339 aid scheduling, not to solve data flow problems. */
1341 static int last_scratch_gr_reg
;
1344 next_scratch_gr_reg ()
1348 for (i
= 0; i
< 32; ++i
)
1350 regno
= (last_scratch_gr_reg
+ i
+ 1) & 31;
1351 if (call_used_regs
[regno
]
1352 && ! fixed_regs
[regno
]
1353 && ! global_regs
[regno
]
1354 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1356 last_scratch_gr_reg
= regno
;
1361 /* There must be _something_ available. */
1365 /* Helper function for ia64_compute_frame_size, called through
1366 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1369 mark_reg_gr_used_mask (reg
, data
)
1371 void *data ATTRIBUTE_UNUSED
;
1373 unsigned int regno
= REGNO (reg
);
1376 unsigned int i
, n
= HARD_REGNO_NREGS (regno
, GET_MODE (reg
));
1377 for (i
= 0; i
< n
; ++i
)
1378 current_frame_info
.gr_used_mask
|= 1 << (regno
+ i
);
1382 /* Returns the number of bytes offset between the frame pointer and the stack
1383 pointer for the current function. SIZE is the number of bytes of space
1384 needed for local variables. */
1387 ia64_compute_frame_size (size
)
1390 HOST_WIDE_INT total_size
;
1391 HOST_WIDE_INT spill_size
= 0;
1392 HOST_WIDE_INT extra_spill_size
= 0;
1393 HOST_WIDE_INT pretend_args_size
;
1396 int spilled_gr_p
= 0;
1397 int spilled_fr_p
= 0;
1401 if (current_frame_info
.initialized
)
1404 memset (¤t_frame_info
, 0, sizeof current_frame_info
);
1405 CLEAR_HARD_REG_SET (mask
);
1407 /* Don't allocate scratches to the return register. */
1408 diddle_return_value (mark_reg_gr_used_mask
, NULL
);
1410 /* Don't allocate scratches to the EH scratch registers. */
1411 if (cfun
->machine
->ia64_eh_epilogue_sp
)
1412 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_sp
, NULL
);
1413 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
1414 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_bsp
, NULL
);
1416 /* Find the size of the register stack frame. We have only 80 local
1417 registers, because we reserve 8 for the inputs and 8 for the
1420 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1421 since we'll be adjusting that down later. */
1422 regno
= LOC_REG (78) + ! frame_pointer_needed
;
1423 for (; regno
>= LOC_REG (0); regno
--)
1424 if (regs_ever_live
[regno
])
1426 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
1428 /* For functions marked with the syscall_linkage attribute, we must mark
1429 all eight input registers as in use, so that locals aren't visible to
1432 if (cfun
->machine
->n_varargs
> 0
1433 || lookup_attribute ("syscall_linkage",
1434 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
1435 current_frame_info
.n_input_regs
= 8;
1438 for (regno
= IN_REG (7); regno
>= IN_REG (0); regno
--)
1439 if (regs_ever_live
[regno
])
1441 current_frame_info
.n_input_regs
= regno
- IN_REG (0) + 1;
1444 for (regno
= OUT_REG (7); regno
>= OUT_REG (0); regno
--)
1445 if (regs_ever_live
[regno
])
1447 i
= regno
- OUT_REG (0) + 1;
1449 /* When -p profiling, we need one output register for the mcount argument.
1450 Likwise for -a profiling for the bb_init_func argument. For -ax
1451 profiling, we need two output registers for the two bb_init_trace_func
1453 if (current_function_profile
)
1455 current_frame_info
.n_output_regs
= i
;
1457 /* ??? No rotating register support yet. */
1458 current_frame_info
.n_rotate_regs
= 0;
1460 /* Discover which registers need spilling, and how much room that
1461 will take. Begin with floating point and general registers,
1462 which will always wind up on the stack. */
1464 for (regno
= FR_REG (2); regno
<= FR_REG (127); regno
++)
1465 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1467 SET_HARD_REG_BIT (mask
, regno
);
1473 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1474 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1476 SET_HARD_REG_BIT (mask
, regno
);
1482 for (regno
= BR_REG (1); regno
<= BR_REG (7); regno
++)
1483 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1485 SET_HARD_REG_BIT (mask
, regno
);
1490 /* Now come all special registers that might get saved in other
1491 general registers. */
1493 if (frame_pointer_needed
)
1495 current_frame_info
.reg_fp
= find_gr_spill (1);
1496 /* If we did not get a register, then we take LOC79. This is guaranteed
1497 to be free, even if regs_ever_live is already set, because this is
1498 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1499 as we don't count loc79 above. */
1500 if (current_frame_info
.reg_fp
== 0)
1502 current_frame_info
.reg_fp
= LOC_REG (79);
1503 current_frame_info
.n_local_regs
++;
1507 if (! current_function_is_leaf
)
1509 /* Emit a save of BR0 if we call other functions. Do this even
1510 if this function doesn't return, as EH depends on this to be
1511 able to unwind the stack. */
1512 SET_HARD_REG_BIT (mask
, BR_REG (0));
1514 current_frame_info
.reg_save_b0
= find_gr_spill (1);
1515 if (current_frame_info
.reg_save_b0
== 0)
1521 /* Similarly for ar.pfs. */
1522 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
1523 current_frame_info
.reg_save_ar_pfs
= find_gr_spill (1);
1524 if (current_frame_info
.reg_save_ar_pfs
== 0)
1526 extra_spill_size
+= 8;
1532 if (regs_ever_live
[BR_REG (0)] && ! call_used_regs
[BR_REG (0)])
1534 SET_HARD_REG_BIT (mask
, BR_REG (0));
1540 /* Unwind descriptor hackery: things are most efficient if we allocate
1541 consecutive GR save registers for RP, PFS, FP in that order. However,
1542 it is absolutely critical that FP get the only hard register that's
1543 guaranteed to be free, so we allocated it first. If all three did
1544 happen to be allocated hard regs, and are consecutive, rearrange them
1545 into the preferred order now. */
1546 if (current_frame_info
.reg_fp
!= 0
1547 && current_frame_info
.reg_save_b0
== current_frame_info
.reg_fp
+ 1
1548 && current_frame_info
.reg_save_ar_pfs
== current_frame_info
.reg_fp
+ 2)
1550 current_frame_info
.reg_save_b0
= current_frame_info
.reg_fp
;
1551 current_frame_info
.reg_save_ar_pfs
= current_frame_info
.reg_fp
+ 1;
1552 current_frame_info
.reg_fp
= current_frame_info
.reg_fp
+ 2;
1555 /* See if we need to store the predicate register block. */
1556 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
1557 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1559 if (regno
<= PR_REG (63))
1561 SET_HARD_REG_BIT (mask
, PR_REG (0));
1562 current_frame_info
.reg_save_pr
= find_gr_spill (1);
1563 if (current_frame_info
.reg_save_pr
== 0)
1565 extra_spill_size
+= 8;
1569 /* ??? Mark them all as used so that register renaming and such
1570 are free to use them. */
1571 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
1572 regs_ever_live
[regno
] = 1;
1575 /* If we're forced to use st8.spill, we're forced to save and restore
1577 if (spilled_gr_p
|| cfun
->machine
->n_varargs
)
1579 regs_ever_live
[AR_UNAT_REGNUM
] = 1;
1580 SET_HARD_REG_BIT (mask
, AR_UNAT_REGNUM
);
1581 current_frame_info
.reg_save_ar_unat
= find_gr_spill (spill_size
== 0);
1582 if (current_frame_info
.reg_save_ar_unat
== 0)
1584 extra_spill_size
+= 8;
1589 if (regs_ever_live
[AR_LC_REGNUM
])
1591 SET_HARD_REG_BIT (mask
, AR_LC_REGNUM
);
1592 current_frame_info
.reg_save_ar_lc
= find_gr_spill (spill_size
== 0);
1593 if (current_frame_info
.reg_save_ar_lc
== 0)
1595 extra_spill_size
+= 8;
1600 /* If we have an odd number of words of pretend arguments written to
1601 the stack, then the FR save area will be unaligned. We round the
1602 size of this area up to keep things 16 byte aligned. */
1604 pretend_args_size
= IA64_STACK_ALIGN (current_function_pretend_args_size
);
1606 pretend_args_size
= current_function_pretend_args_size
;
1608 total_size
= (spill_size
+ extra_spill_size
+ size
+ pretend_args_size
1609 + current_function_outgoing_args_size
);
1610 total_size
= IA64_STACK_ALIGN (total_size
);
1612 /* We always use the 16-byte scratch area provided by the caller, but
1613 if we are a leaf function, there's no one to which we need to provide
1615 if (current_function_is_leaf
)
1616 total_size
= MAX (0, total_size
- 16);
1618 current_frame_info
.total_size
= total_size
;
1619 current_frame_info
.spill_cfa_off
= pretend_args_size
- 16;
1620 current_frame_info
.spill_size
= spill_size
;
1621 current_frame_info
.extra_spill_size
= extra_spill_size
;
1622 COPY_HARD_REG_SET (current_frame_info
.mask
, mask
);
1623 current_frame_info
.n_spilled
= n_spilled
;
1624 current_frame_info
.initialized
= reload_completed
;
1627 /* Compute the initial difference between the specified pair of registers. */
1630 ia64_initial_elimination_offset (from
, to
)
1633 HOST_WIDE_INT offset
;
1635 ia64_compute_frame_size (get_frame_size ());
1638 case FRAME_POINTER_REGNUM
:
1639 if (to
== HARD_FRAME_POINTER_REGNUM
)
1641 if (current_function_is_leaf
)
1642 offset
= -current_frame_info
.total_size
;
1644 offset
= -(current_frame_info
.total_size
1645 - current_function_outgoing_args_size
- 16);
1647 else if (to
== STACK_POINTER_REGNUM
)
1649 if (current_function_is_leaf
)
1652 offset
= 16 + current_function_outgoing_args_size
;
1658 case ARG_POINTER_REGNUM
:
1659 /* Arguments start above the 16 byte save area, unless stdarg
1660 in which case we store through the 16 byte save area. */
1661 if (to
== HARD_FRAME_POINTER_REGNUM
)
1662 offset
= 16 - current_function_pretend_args_size
;
1663 else if (to
== STACK_POINTER_REGNUM
)
1664 offset
= (current_frame_info
.total_size
1665 + 16 - current_function_pretend_args_size
);
1670 case RETURN_ADDRESS_POINTER_REGNUM
:
1681 /* If there are more than a trivial number of register spills, we use
1682 two interleaved iterators so that we can get two memory references
1685 In order to simplify things in the prologue and epilogue expanders,
1686 we use helper functions to fix up the memory references after the
1687 fact with the appropriate offsets to a POST_MODIFY memory mode.
1688 The following data structure tracks the state of the two iterators
1689 while insns are being emitted. */
1691 struct spill_fill_data
1693 rtx init_after
; /* point at which to emit initializations */
1694 rtx init_reg
[2]; /* initial base register */
1695 rtx iter_reg
[2]; /* the iterator registers */
1696 rtx
*prev_addr
[2]; /* address of last memory use */
1697 rtx prev_insn
[2]; /* the insn corresponding to prev_addr */
1698 HOST_WIDE_INT prev_off
[2]; /* last offset */
1699 int n_iter
; /* number of iterators in use */
1700 int next_iter
; /* next iterator to use */
1701 unsigned int save_gr_used_mask
;
1704 static struct spill_fill_data spill_fill_data
;
1707 setup_spill_pointers (n_spills
, init_reg
, cfa_off
)
1710 HOST_WIDE_INT cfa_off
;
1714 spill_fill_data
.init_after
= get_last_insn ();
1715 spill_fill_data
.init_reg
[0] = init_reg
;
1716 spill_fill_data
.init_reg
[1] = init_reg
;
1717 spill_fill_data
.prev_addr
[0] = NULL
;
1718 spill_fill_data
.prev_addr
[1] = NULL
;
1719 spill_fill_data
.prev_insn
[0] = NULL
;
1720 spill_fill_data
.prev_insn
[1] = NULL
;
1721 spill_fill_data
.prev_off
[0] = cfa_off
;
1722 spill_fill_data
.prev_off
[1] = cfa_off
;
1723 spill_fill_data
.next_iter
= 0;
1724 spill_fill_data
.save_gr_used_mask
= current_frame_info
.gr_used_mask
;
1726 spill_fill_data
.n_iter
= 1 + (n_spills
> 2);
1727 for (i
= 0; i
< spill_fill_data
.n_iter
; ++i
)
1729 int regno
= next_scratch_gr_reg ();
1730 spill_fill_data
.iter_reg
[i
] = gen_rtx_REG (DImode
, regno
);
1731 current_frame_info
.gr_used_mask
|= 1 << regno
;
1736 finish_spill_pointers ()
1738 current_frame_info
.gr_used_mask
= spill_fill_data
.save_gr_used_mask
;
1742 spill_restore_mem (reg
, cfa_off
)
1744 HOST_WIDE_INT cfa_off
;
1746 int iter
= spill_fill_data
.next_iter
;
1747 HOST_WIDE_INT disp
= spill_fill_data
.prev_off
[iter
] - cfa_off
;
1748 rtx disp_rtx
= GEN_INT (disp
);
1751 if (spill_fill_data
.prev_addr
[iter
])
1753 if (CONST_OK_FOR_N (disp
))
1755 *spill_fill_data
.prev_addr
[iter
]
1756 = gen_rtx_POST_MODIFY (DImode
, spill_fill_data
.iter_reg
[iter
],
1757 gen_rtx_PLUS (DImode
,
1758 spill_fill_data
.iter_reg
[iter
],
1760 REG_NOTES (spill_fill_data
.prev_insn
[iter
])
1761 = gen_rtx_EXPR_LIST (REG_INC
, spill_fill_data
.iter_reg
[iter
],
1762 REG_NOTES (spill_fill_data
.prev_insn
[iter
]));
1766 /* ??? Could use register post_modify for loads. */
1767 if (! CONST_OK_FOR_I (disp
))
1769 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
1770 emit_move_insn (tmp
, disp_rtx
);
1773 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
1774 spill_fill_data
.iter_reg
[iter
], disp_rtx
));
1777 /* Micro-optimization: if we've created a frame pointer, it's at
1778 CFA 0, which may allow the real iterator to be initialized lower,
1779 slightly increasing parallelism. Also, if there are few saves
1780 it may eliminate the iterator entirely. */
1782 && spill_fill_data
.init_reg
[iter
] == stack_pointer_rtx
1783 && frame_pointer_needed
)
1785 mem
= gen_rtx_MEM (GET_MODE (reg
), hard_frame_pointer_rtx
);
1786 set_mem_alias_set (mem
, get_varargs_alias_set ());
1794 seq
= gen_movdi (spill_fill_data
.iter_reg
[iter
],
1795 spill_fill_data
.init_reg
[iter
]);
1800 if (! CONST_OK_FOR_I (disp
))
1802 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
1803 emit_move_insn (tmp
, disp_rtx
);
1807 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
1808 spill_fill_data
.init_reg
[iter
],
1811 seq
= gen_sequence ();
1815 /* Careful for being the first insn in a sequence. */
1816 if (spill_fill_data
.init_after
)
1817 insn
= emit_insn_after (seq
, spill_fill_data
.init_after
);
1820 rtx first
= get_insns ();
1822 insn
= emit_insn_before (seq
, first
);
1824 insn
= emit_insn (seq
);
1826 spill_fill_data
.init_after
= insn
;
1828 /* If DISP is 0, we may or may not have a further adjustment
1829 afterward. If we do, then the load/store insn may be modified
1830 to be a post-modify. If we don't, then this copy may be
1831 eliminated by copyprop_hardreg_forward, which makes this
1832 insn garbage, which runs afoul of the sanity check in
1833 propagate_one_insn. So mark this insn as legal to delete. */
1835 REG_NOTES(insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
,
1839 mem
= gen_rtx_MEM (GET_MODE (reg
), spill_fill_data
.iter_reg
[iter
]);
1841 /* ??? Not all of the spills are for varargs, but some of them are.
1842 The rest of the spills belong in an alias set of their own. But
1843 it doesn't actually hurt to include them here. */
1844 set_mem_alias_set (mem
, get_varargs_alias_set ());
1846 spill_fill_data
.prev_addr
[iter
] = &XEXP (mem
, 0);
1847 spill_fill_data
.prev_off
[iter
] = cfa_off
;
1849 if (++iter
>= spill_fill_data
.n_iter
)
1851 spill_fill_data
.next_iter
= iter
;
1857 do_spill (move_fn
, reg
, cfa_off
, frame_reg
)
1858 rtx (*move_fn
) PARAMS ((rtx
, rtx
, rtx
));
1860 HOST_WIDE_INT cfa_off
;
1862 int iter
= spill_fill_data
.next_iter
;
1865 mem
= spill_restore_mem (reg
, cfa_off
);
1866 insn
= emit_insn ((*move_fn
) (mem
, reg
, GEN_INT (cfa_off
)));
1867 spill_fill_data
.prev_insn
[iter
] = insn
;
1874 RTX_FRAME_RELATED_P (insn
) = 1;
1876 /* Don't even pretend that the unwind code can intuit its way
1877 through a pair of interleaved post_modify iterators. Just
1878 provide the correct answer. */
1880 if (frame_pointer_needed
)
1882 base
= hard_frame_pointer_rtx
;
1887 base
= stack_pointer_rtx
;
1888 off
= current_frame_info
.total_size
- cfa_off
;
1892 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1893 gen_rtx_SET (VOIDmode
,
1894 gen_rtx_MEM (GET_MODE (reg
),
1895 plus_constant (base
, off
)),
1902 do_restore (move_fn
, reg
, cfa_off
)
1903 rtx (*move_fn
) PARAMS ((rtx
, rtx
, rtx
));
1905 HOST_WIDE_INT cfa_off
;
1907 int iter
= spill_fill_data
.next_iter
;
1910 insn
= emit_insn ((*move_fn
) (reg
, spill_restore_mem (reg
, cfa_off
),
1911 GEN_INT (cfa_off
)));
1912 spill_fill_data
.prev_insn
[iter
] = insn
;
1915 /* Wrapper functions that discards the CONST_INT spill offset. These
1916 exist so that we can give gr_spill/gr_fill the offset they need and
1917 use a consistant function interface. */
1920 gen_movdi_x (dest
, src
, offset
)
1922 rtx offset ATTRIBUTE_UNUSED
;
1924 return gen_movdi (dest
, src
);
1928 gen_fr_spill_x (dest
, src
, offset
)
1930 rtx offset ATTRIBUTE_UNUSED
;
1932 return gen_fr_spill (dest
, src
);
1936 gen_fr_restore_x (dest
, src
, offset
)
1938 rtx offset ATTRIBUTE_UNUSED
;
1940 return gen_fr_restore (dest
, src
);
1943 /* Called after register allocation to add any instructions needed for the
1944 prologue. Using a prologue insn is favored compared to putting all of the
1945 instructions in output_function_prologue(), since it allows the scheduler
1946 to intermix instructions with the saves of the caller saved registers. In
1947 some cases, it might be necessary to emit a barrier instruction as the last
1948 insn to prevent such scheduling.
1950 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
1951 so that the debug info generation code can handle them properly.
1953 The register save area is layed out like so:
1955 [ varargs spill area ]
1956 [ fr register spill area ]
1957 [ br register spill area ]
1958 [ ar register spill area ]
1959 [ pr register spill area ]
1960 [ gr register spill area ] */
1962 /* ??? Get inefficient code when the frame size is larger than can fit in an
1963 adds instruction. */
1966 ia64_expand_prologue ()
1968 rtx insn
, ar_pfs_save_reg
, ar_unat_save_reg
;
1969 int i
, epilogue_p
, regno
, alt_regno
, cfa_off
, n_varargs
;
1972 ia64_compute_frame_size (get_frame_size ());
1973 last_scratch_gr_reg
= 15;
1975 /* If there is no epilogue, then we don't need some prologue insns.
1976 We need to avoid emitting the dead prologue insns, because flow
1977 will complain about them. */
1982 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
1983 if ((e
->flags
& EDGE_FAKE
) == 0
1984 && (e
->flags
& EDGE_FALLTHRU
) != 0)
1986 epilogue_p
= (e
!= NULL
);
1991 /* Set the local, input, and output register names. We need to do this
1992 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1993 half. If we use in/loc/out register names, then we get assembler errors
1994 in crtn.S because there is no alloc insn or regstk directive in there. */
1995 if (! TARGET_REG_NAMES
)
1997 int inputs
= current_frame_info
.n_input_regs
;
1998 int locals
= current_frame_info
.n_local_regs
;
1999 int outputs
= current_frame_info
.n_output_regs
;
2001 for (i
= 0; i
< inputs
; i
++)
2002 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
2003 for (i
= 0; i
< locals
; i
++)
2004 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
2005 for (i
= 0; i
< outputs
; i
++)
2006 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
2009 /* Set the frame pointer register name. The regnum is logically loc79,
2010 but of course we'll not have allocated that many locals. Rather than
2011 worrying about renumbering the existing rtxs, we adjust the name. */
2012 /* ??? This code means that we can never use one local register when
2013 there is a frame pointer. loc79 gets wasted in this case, as it is
2014 renamed to a register that will never be used. See also the try_locals
2015 code in find_gr_spill. */
2016 if (current_frame_info
.reg_fp
)
2018 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
2019 reg_names
[HARD_FRAME_POINTER_REGNUM
]
2020 = reg_names
[current_frame_info
.reg_fp
];
2021 reg_names
[current_frame_info
.reg_fp
] = tmp
;
2024 /* Fix up the return address placeholder. */
2025 /* ??? We can fail if __builtin_return_address is used, and we didn't
2026 allocate a register in which to save b0. I can't think of a way to
2027 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
2028 then be sure that I got the right one. Further, reload doesn't seem
2029 to care if an eliminable register isn't used, and "eliminates" it
2031 if (regs_ever_live
[RETURN_ADDRESS_POINTER_REGNUM
]
2032 && current_frame_info
.reg_save_b0
!= 0)
2033 XINT (return_address_pointer_rtx
, 0) = current_frame_info
.reg_save_b0
;
2035 /* We don't need an alloc instruction if we've used no outputs or locals. */
2036 if (current_frame_info
.n_local_regs
== 0
2037 && current_frame_info
.n_output_regs
== 0
2038 && current_frame_info
.n_input_regs
<= current_function_args_info
.int_regs
)
2040 /* If there is no alloc, but there are input registers used, then we
2041 need a .regstk directive. */
2042 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
2043 ar_pfs_save_reg
= NULL_RTX
;
2047 current_frame_info
.need_regstk
= 0;
2049 if (current_frame_info
.reg_save_ar_pfs
)
2050 regno
= current_frame_info
.reg_save_ar_pfs
;
2052 regno
= next_scratch_gr_reg ();
2053 ar_pfs_save_reg
= gen_rtx_REG (DImode
, regno
);
2055 insn
= emit_insn (gen_alloc (ar_pfs_save_reg
,
2056 GEN_INT (current_frame_info
.n_input_regs
),
2057 GEN_INT (current_frame_info
.n_local_regs
),
2058 GEN_INT (current_frame_info
.n_output_regs
),
2059 GEN_INT (current_frame_info
.n_rotate_regs
)));
2060 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_pfs
!= 0);
2063 /* Set up frame pointer, stack pointer, and spill iterators. */
2065 n_varargs
= cfun
->machine
->n_varargs
;
2066 setup_spill_pointers (current_frame_info
.n_spilled
+ n_varargs
,
2067 stack_pointer_rtx
, 0);
2069 if (frame_pointer_needed
)
2071 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
2072 RTX_FRAME_RELATED_P (insn
) = 1;
2075 if (current_frame_info
.total_size
!= 0)
2077 rtx frame_size_rtx
= GEN_INT (- current_frame_info
.total_size
);
2080 if (CONST_OK_FOR_I (- current_frame_info
.total_size
))
2081 offset
= frame_size_rtx
;
2084 regno
= next_scratch_gr_reg ();
2085 offset
= gen_rtx_REG (DImode
, regno
);
2086 emit_move_insn (offset
, frame_size_rtx
);
2089 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
,
2090 stack_pointer_rtx
, offset
));
2092 if (! frame_pointer_needed
)
2094 RTX_FRAME_RELATED_P (insn
) = 1;
2095 if (GET_CODE (offset
) != CONST_INT
)
2098 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2099 gen_rtx_SET (VOIDmode
,
2101 gen_rtx_PLUS (DImode
,
2108 /* ??? At this point we must generate a magic insn that appears to
2109 modify the stack pointer, the frame pointer, and all spill
2110 iterators. This would allow the most scheduling freedom. For
2111 now, just hard stop. */
2112 emit_insn (gen_blockage ());
2115 /* Must copy out ar.unat before doing any integer spills. */
2116 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2118 if (current_frame_info
.reg_save_ar_unat
)
2120 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2123 alt_regno
= next_scratch_gr_reg ();
2124 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2125 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2128 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2129 insn
= emit_move_insn (ar_unat_save_reg
, reg
);
2130 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_unat
!= 0);
2132 /* Even if we're not going to generate an epilogue, we still
2133 need to save the register so that EH works. */
2134 if (! epilogue_p
&& current_frame_info
.reg_save_ar_unat
)
2135 emit_insn (gen_prologue_use (ar_unat_save_reg
));
2138 ar_unat_save_reg
= NULL_RTX
;
2140 /* Spill all varargs registers. Do this before spilling any GR registers,
2141 since we want the UNAT bits for the GR registers to override the UNAT
2142 bits from varargs, which we don't care about. */
2145 for (regno
= GR_ARG_FIRST
+ 7; n_varargs
> 0; --n_varargs
, --regno
)
2147 reg
= gen_rtx_REG (DImode
, regno
);
2148 do_spill (gen_gr_spill
, reg
, cfa_off
+= 8, NULL_RTX
);
2151 /* Locate the bottom of the register save area. */
2152 cfa_off
= (current_frame_info
.spill_cfa_off
2153 + current_frame_info
.spill_size
2154 + current_frame_info
.extra_spill_size
);
2156 /* Save the predicate register block either in a register or in memory. */
2157 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2159 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2160 if (current_frame_info
.reg_save_pr
!= 0)
2162 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2163 insn
= emit_move_insn (alt_reg
, reg
);
2165 /* ??? Denote pr spill/fill by a DImode move that modifies all
2166 64 hard registers. */
2167 RTX_FRAME_RELATED_P (insn
) = 1;
2169 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2170 gen_rtx_SET (VOIDmode
, alt_reg
, reg
),
2173 /* Even if we're not going to generate an epilogue, we still
2174 need to save the register so that EH works. */
2176 emit_insn (gen_prologue_use (alt_reg
));
2180 alt_regno
= next_scratch_gr_reg ();
2181 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2182 insn
= emit_move_insn (alt_reg
, reg
);
2183 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2188 /* Handle AR regs in numerical order. All of them get special handling. */
2189 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
2190 && current_frame_info
.reg_save_ar_unat
== 0)
2192 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2193 do_spill (gen_movdi_x
, ar_unat_save_reg
, cfa_off
, reg
);
2197 /* The alloc insn already copied ar.pfs into a general register. The
2198 only thing we have to do now is copy that register to a stack slot
2199 if we'd not allocated a local register for the job. */
2200 if (current_frame_info
.reg_save_ar_pfs
== 0
2201 && ! current_function_is_leaf
)
2203 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2204 do_spill (gen_movdi_x
, ar_pfs_save_reg
, cfa_off
, reg
);
2208 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2210 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2211 if (current_frame_info
.reg_save_ar_lc
!= 0)
2213 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2214 insn
= emit_move_insn (alt_reg
, reg
);
2215 RTX_FRAME_RELATED_P (insn
) = 1;
2217 /* Even if we're not going to generate an epilogue, we still
2218 need to save the register so that EH works. */
2220 emit_insn (gen_prologue_use (alt_reg
));
2224 alt_regno
= next_scratch_gr_reg ();
2225 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2226 emit_move_insn (alt_reg
, reg
);
2227 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2232 /* We should now be at the base of the gr/br/fr spill area. */
2233 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2234 + current_frame_info
.spill_size
))
2237 /* Spill all general registers. */
2238 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2239 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2241 reg
= gen_rtx_REG (DImode
, regno
);
2242 do_spill (gen_gr_spill
, reg
, cfa_off
, reg
);
2246 /* Handle BR0 specially -- it may be getting stored permanently in
2247 some GR register. */
2248 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2250 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2251 if (current_frame_info
.reg_save_b0
!= 0)
2253 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2254 insn
= emit_move_insn (alt_reg
, reg
);
2255 RTX_FRAME_RELATED_P (insn
) = 1;
2257 /* Even if we're not going to generate an epilogue, we still
2258 need to save the register so that EH works. */
2260 emit_insn (gen_prologue_use (alt_reg
));
2264 alt_regno
= next_scratch_gr_reg ();
2265 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2266 emit_move_insn (alt_reg
, reg
);
2267 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2272 /* Spill the rest of the BR registers. */
2273 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2274 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2276 alt_regno
= next_scratch_gr_reg ();
2277 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2278 reg
= gen_rtx_REG (DImode
, regno
);
2279 emit_move_insn (alt_reg
, reg
);
2280 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2284 /* Align the frame and spill all FR registers. */
2285 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2286 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2290 reg
= gen_rtx_REG (TFmode
, regno
);
2291 do_spill (gen_fr_spill_x
, reg
, cfa_off
, reg
);
2295 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2298 finish_spill_pointers ();
2301 /* Called after register allocation to add any instructions needed for the
2302 epilogue. Using an epilogue insn is favored compared to putting all of the
2303 instructions in output_function_prologue(), since it allows the scheduler
2304 to intermix instructions with the saves of the caller saved registers. In
2305 some cases, it might be necessary to emit a barrier instruction as the last
2306 insn to prevent such scheduling. */
2309 ia64_expand_epilogue (sibcall_p
)
2312 rtx insn
, reg
, alt_reg
, ar_unat_save_reg
;
2313 int regno
, alt_regno
, cfa_off
;
2315 ia64_compute_frame_size (get_frame_size ());
2317 /* If there is a frame pointer, then we use it instead of the stack
2318 pointer, so that the stack pointer does not need to be valid when
2319 the epilogue starts. See EXIT_IGNORE_STACK. */
2320 if (frame_pointer_needed
)
2321 setup_spill_pointers (current_frame_info
.n_spilled
,
2322 hard_frame_pointer_rtx
, 0);
2324 setup_spill_pointers (current_frame_info
.n_spilled
, stack_pointer_rtx
,
2325 current_frame_info
.total_size
);
2327 if (current_frame_info
.total_size
!= 0)
2329 /* ??? At this point we must generate a magic insn that appears to
2330 modify the spill iterators and the frame pointer. This would
2331 allow the most scheduling freedom. For now, just hard stop. */
2332 emit_insn (gen_blockage ());
2335 /* Locate the bottom of the register save area. */
2336 cfa_off
= (current_frame_info
.spill_cfa_off
2337 + current_frame_info
.spill_size
2338 + current_frame_info
.extra_spill_size
);
2340 /* Restore the predicate registers. */
2341 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2343 if (current_frame_info
.reg_save_pr
!= 0)
2344 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2347 alt_regno
= next_scratch_gr_reg ();
2348 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2349 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2352 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2353 emit_move_insn (reg
, alt_reg
);
2356 /* Restore the application registers. */
2358 /* Load the saved unat from the stack, but do not restore it until
2359 after the GRs have been restored. */
2360 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2362 if (current_frame_info
.reg_save_ar_unat
!= 0)
2364 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2367 alt_regno
= next_scratch_gr_reg ();
2368 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2369 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2370 do_restore (gen_movdi_x
, ar_unat_save_reg
, cfa_off
);
2375 ar_unat_save_reg
= NULL_RTX
;
2377 if (current_frame_info
.reg_save_ar_pfs
!= 0)
2379 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_pfs
);
2380 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2381 emit_move_insn (reg
, alt_reg
);
2383 else if (! current_function_is_leaf
)
2385 alt_regno
= next_scratch_gr_reg ();
2386 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2387 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2389 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2390 emit_move_insn (reg
, alt_reg
);
2393 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2395 if (current_frame_info
.reg_save_ar_lc
!= 0)
2396 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2399 alt_regno
= next_scratch_gr_reg ();
2400 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2401 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2404 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2405 emit_move_insn (reg
, alt_reg
);
2408 /* We should now be at the base of the gr/br/fr spill area. */
2409 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2410 + current_frame_info
.spill_size
))
2413 /* Restore all general registers. */
2414 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2415 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2417 reg
= gen_rtx_REG (DImode
, regno
);
2418 do_restore (gen_gr_restore
, reg
, cfa_off
);
2422 /* Restore the branch registers. Handle B0 specially, as it may
2423 have gotten stored in some GR register. */
2424 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2426 if (current_frame_info
.reg_save_b0
!= 0)
2427 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2430 alt_regno
= next_scratch_gr_reg ();
2431 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2432 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2435 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2436 emit_move_insn (reg
, alt_reg
);
2439 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2440 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2442 alt_regno
= next_scratch_gr_reg ();
2443 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2444 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2446 reg
= gen_rtx_REG (DImode
, regno
);
2447 emit_move_insn (reg
, alt_reg
);
2450 /* Restore floating point registers. */
2451 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2452 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2456 reg
= gen_rtx_REG (TFmode
, regno
);
2457 do_restore (gen_fr_restore_x
, reg
, cfa_off
);
2461 /* Restore ar.unat for real. */
2462 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2464 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2465 emit_move_insn (reg
, ar_unat_save_reg
);
2468 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2471 finish_spill_pointers ();
2473 if (current_frame_info
.total_size
|| cfun
->machine
->ia64_eh_epilogue_sp
)
2475 /* ??? At this point we must generate a magic insn that appears to
2476 modify the spill iterators, the stack pointer, and the frame
2477 pointer. This would allow the most scheduling freedom. For now,
2479 emit_insn (gen_blockage ());
2482 if (cfun
->machine
->ia64_eh_epilogue_sp
)
2483 emit_move_insn (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
);
2484 else if (frame_pointer_needed
)
2486 insn
= emit_move_insn (stack_pointer_rtx
, hard_frame_pointer_rtx
);
2487 RTX_FRAME_RELATED_P (insn
) = 1;
2489 else if (current_frame_info
.total_size
)
2491 rtx offset
, frame_size_rtx
;
2493 frame_size_rtx
= GEN_INT (current_frame_info
.total_size
);
2494 if (CONST_OK_FOR_I (current_frame_info
.total_size
))
2495 offset
= frame_size_rtx
;
2498 regno
= next_scratch_gr_reg ();
2499 offset
= gen_rtx_REG (DImode
, regno
);
2500 emit_move_insn (offset
, frame_size_rtx
);
2503 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
2506 RTX_FRAME_RELATED_P (insn
) = 1;
2507 if (GET_CODE (offset
) != CONST_INT
)
2510 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2511 gen_rtx_SET (VOIDmode
,
2513 gen_rtx_PLUS (DImode
,
2520 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
2521 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
2524 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
2527 int fp
= GR_REG (2);
2528 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2529 first available call clobbered register. If there was a frame_pointer
2530 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2531 so we have to make sure we're using the string "r2" when emitting
2532 the register name for the assmbler. */
2533 if (current_frame_info
.reg_fp
&& current_frame_info
.reg_fp
== GR_REG (2))
2534 fp
= HARD_FRAME_POINTER_REGNUM
;
2536 /* We must emit an alloc to force the input registers to become output
2537 registers. Otherwise, if the callee tries to pass its parameters
2538 through to another call without an intervening alloc, then these
2540 /* ??? We don't need to preserve all input registers. We only need to
2541 preserve those input registers used as arguments to the sibling call.
2542 It is unclear how to compute that number here. */
2543 if (current_frame_info
.n_input_regs
!= 0)
2544 emit_insn (gen_alloc (gen_rtx_REG (DImode
, fp
),
2545 GEN_INT (0), GEN_INT (0),
2546 GEN_INT (current_frame_info
.n_input_regs
),
2551 /* Return 1 if br.ret can do all the work required to return from a
2555 ia64_direct_return ()
2557 if (reload_completed
&& ! frame_pointer_needed
)
2559 ia64_compute_frame_size (get_frame_size ());
2561 return (current_frame_info
.total_size
== 0
2562 && current_frame_info
.n_spilled
== 0
2563 && current_frame_info
.reg_save_b0
== 0
2564 && current_frame_info
.reg_save_pr
== 0
2565 && current_frame_info
.reg_save_ar_pfs
== 0
2566 && current_frame_info
.reg_save_ar_unat
== 0
2567 && current_frame_info
.reg_save_ar_lc
== 0);
2573 ia64_hard_regno_rename_ok (from
, to
)
2577 /* Don't clobber any of the registers we reserved for the prologue. */
2578 if (to
== current_frame_info
.reg_fp
2579 || to
== current_frame_info
.reg_save_b0
2580 || to
== current_frame_info
.reg_save_pr
2581 || to
== current_frame_info
.reg_save_ar_pfs
2582 || to
== current_frame_info
.reg_save_ar_unat
2583 || to
== current_frame_info
.reg_save_ar_lc
)
2586 if (from
== current_frame_info
.reg_fp
2587 || from
== current_frame_info
.reg_save_b0
2588 || from
== current_frame_info
.reg_save_pr
2589 || from
== current_frame_info
.reg_save_ar_pfs
2590 || from
== current_frame_info
.reg_save_ar_unat
2591 || from
== current_frame_info
.reg_save_ar_lc
)
2594 /* Don't use output registers outside the register frame. */
2595 if (OUT_REGNO_P (to
) && to
>= OUT_REG (current_frame_info
.n_output_regs
))
2598 /* Retain even/oddness on predicate register pairs. */
2599 if (PR_REGNO_P (from
) && PR_REGNO_P (to
))
2600 return (from
& 1) == (to
& 1);
2602 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2603 if (from
== GR_REG (4) && current_function_calls_setjmp
)
2609 /* Target hook for assembling integer objects. Handle word-sized
2610 aligned objects and detect the cases when @fptr is needed. */
2613 ia64_assemble_integer (x
, size
, aligned_p
)
2618 if (size
== UNITS_PER_WORD
&& aligned_p
2619 && !(TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
2620 && GET_CODE (x
) == SYMBOL_REF
2621 && SYMBOL_REF_FLAG (x
))
2623 fputs ("\tdata8\t@fptr(", asm_out_file
);
2624 output_addr_const (asm_out_file
, x
);
2625 fputs (")\n", asm_out_file
);
2628 return default_assemble_integer (x
, size
, aligned_p
);
2631 /* Emit the function prologue. */
2634 ia64_output_function_prologue (file
, size
)
2636 HOST_WIDE_INT size ATTRIBUTE_UNUSED
;
2638 int mask
, grsave
, grsave_prev
;
2640 if (current_frame_info
.need_regstk
)
2641 fprintf (file
, "\t.regstk %d, %d, %d, %d\n",
2642 current_frame_info
.n_input_regs
,
2643 current_frame_info
.n_local_regs
,
2644 current_frame_info
.n_output_regs
,
2645 current_frame_info
.n_rotate_regs
);
2647 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
2650 /* Emit the .prologue directive. */
2653 grsave
= grsave_prev
= 0;
2654 if (current_frame_info
.reg_save_b0
!= 0)
2657 grsave
= grsave_prev
= current_frame_info
.reg_save_b0
;
2659 if (current_frame_info
.reg_save_ar_pfs
!= 0
2660 && (grsave_prev
== 0
2661 || current_frame_info
.reg_save_ar_pfs
== grsave_prev
+ 1))
2664 if (grsave_prev
== 0)
2665 grsave
= current_frame_info
.reg_save_ar_pfs
;
2666 grsave_prev
= current_frame_info
.reg_save_ar_pfs
;
2668 if (current_frame_info
.reg_fp
!= 0
2669 && (grsave_prev
== 0
2670 || current_frame_info
.reg_fp
== grsave_prev
+ 1))
2673 if (grsave_prev
== 0)
2674 grsave
= HARD_FRAME_POINTER_REGNUM
;
2675 grsave_prev
= current_frame_info
.reg_fp
;
2677 if (current_frame_info
.reg_save_pr
!= 0
2678 && (grsave_prev
== 0
2679 || current_frame_info
.reg_save_pr
== grsave_prev
+ 1))
2682 if (grsave_prev
== 0)
2683 grsave
= current_frame_info
.reg_save_pr
;
2687 fprintf (file
, "\t.prologue %d, %d\n", mask
,
2688 ia64_dbx_register_number (grsave
));
2690 fputs ("\t.prologue\n", file
);
2692 /* Emit a .spill directive, if necessary, to relocate the base of
2693 the register spill area. */
2694 if (current_frame_info
.spill_cfa_off
!= -16)
2695 fprintf (file
, "\t.spill %ld\n",
2696 (long) (current_frame_info
.spill_cfa_off
2697 + current_frame_info
.spill_size
));
2700 /* Emit the .body directive at the scheduled end of the prologue. */
2703 ia64_output_function_end_prologue (file
)
2706 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
2709 fputs ("\t.body\n", file
);
2712 /* Emit the function epilogue. */
2715 ia64_output_function_epilogue (file
, size
)
2716 FILE *file ATTRIBUTE_UNUSED
;
2717 HOST_WIDE_INT size ATTRIBUTE_UNUSED
;
2721 /* Reset from the function's potential modifications. */
2722 XINT (return_address_pointer_rtx
, 0) = RETURN_ADDRESS_POINTER_REGNUM
;
2724 if (current_frame_info
.reg_fp
)
2726 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
2727 reg_names
[HARD_FRAME_POINTER_REGNUM
]
2728 = reg_names
[current_frame_info
.reg_fp
];
2729 reg_names
[current_frame_info
.reg_fp
] = tmp
;
2731 if (! TARGET_REG_NAMES
)
2733 for (i
= 0; i
< current_frame_info
.n_input_regs
; i
++)
2734 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
2735 for (i
= 0; i
< current_frame_info
.n_local_regs
; i
++)
2736 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
2737 for (i
= 0; i
< current_frame_info
.n_output_regs
; i
++)
2738 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
2741 current_frame_info
.initialized
= 0;
2745 ia64_dbx_register_number (regno
)
2748 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2749 from its home at loc79 to something inside the register frame. We
2750 must perform the same renumbering here for the debug info. */
2751 if (current_frame_info
.reg_fp
)
2753 if (regno
== HARD_FRAME_POINTER_REGNUM
)
2754 regno
= current_frame_info
.reg_fp
;
2755 else if (regno
== current_frame_info
.reg_fp
)
2756 regno
= HARD_FRAME_POINTER_REGNUM
;
2759 if (IN_REGNO_P (regno
))
2760 return 32 + regno
- IN_REG (0);
2761 else if (LOC_REGNO_P (regno
))
2762 return 32 + current_frame_info
.n_input_regs
+ regno
- LOC_REG (0);
2763 else if (OUT_REGNO_P (regno
))
2764 return (32 + current_frame_info
.n_input_regs
2765 + current_frame_info
.n_local_regs
+ regno
- OUT_REG (0));
2771 ia64_initialize_trampoline (addr
, fnaddr
, static_chain
)
2772 rtx addr
, fnaddr
, static_chain
;
2774 rtx addr_reg
, eight
= GEN_INT (8);
2776 /* Load up our iterator. */
2777 addr_reg
= gen_reg_rtx (Pmode
);
2778 emit_move_insn (addr_reg
, addr
);
2780 /* The first two words are the fake descriptor:
2781 __ia64_trampoline, ADDR+16. */
2782 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
2783 gen_rtx_SYMBOL_REF (Pmode
, "__ia64_trampoline"));
2784 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2786 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
2787 copy_to_reg (plus_constant (addr
, 16)));
2788 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2790 /* The third word is the target descriptor. */
2791 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), fnaddr
);
2792 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2794 /* The fourth word is the static chain. */
2795 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), static_chain
);
2798 /* Do any needed setup for a variadic function. CUM has not been updated
2799 for the last named argument which has type TYPE and mode MODE.
2801 We generate the actual spill instructions during prologue generation. */
2804 ia64_setup_incoming_varargs (cum
, int_mode
, type
, pretend_size
, second_time
)
2805 CUMULATIVE_ARGS cum
;
2809 int second_time ATTRIBUTE_UNUSED
;
2811 /* If this is a stdarg function, then skip the current argument. */
2812 if (! current_function_varargs
)
2813 ia64_function_arg_advance (&cum
, int_mode
, type
, 1);
2815 if (cum
.words
< MAX_ARGUMENT_SLOTS
)
2817 int n
= MAX_ARGUMENT_SLOTS
- cum
.words
;
2818 *pretend_size
= n
* UNITS_PER_WORD
;
2819 cfun
->machine
->n_varargs
= n
;
2823 /* Check whether TYPE is a homogeneous floating point aggregate. If
2824 it is, return the mode of the floating point type that appears
2825 in all leafs. If it is not, return VOIDmode.
2827 An aggregate is a homogeneous floating point aggregate is if all
2828 fields/elements in it have the same floating point type (e.g,
2829 SFmode). 128-bit quad-precision floats are excluded. */
2831 static enum machine_mode
2832 hfa_element_mode (type
, nested
)
2836 enum machine_mode element_mode
= VOIDmode
;
2837 enum machine_mode mode
;
2838 enum tree_code code
= TREE_CODE (type
);
2839 int know_element_mode
= 0;
2844 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
2845 case BOOLEAN_TYPE
: case CHAR_TYPE
: case POINTER_TYPE
:
2846 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
2847 case FILE_TYPE
: case SET_TYPE
: case LANG_TYPE
:
2851 /* Fortran complex types are supposed to be HFAs, so we need to handle
2852 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2855 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
)
2856 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type
))
2857 * BITS_PER_UNIT
, MODE_FLOAT
, 0);
2862 /* ??? Should exclude 128-bit long double here. */
2863 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2864 mode if this is contained within an aggregate. */
2866 return TYPE_MODE (type
);
2871 return hfa_element_mode (TREE_TYPE (type
), 1);
2875 case QUAL_UNION_TYPE
:
2876 for (t
= TYPE_FIELDS (type
); t
; t
= TREE_CHAIN (t
))
2878 if (TREE_CODE (t
) != FIELD_DECL
)
2881 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
2882 if (know_element_mode
)
2884 if (mode
!= element_mode
)
2887 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
2891 know_element_mode
= 1;
2892 element_mode
= mode
;
2895 return element_mode
;
2898 /* If we reach here, we probably have some front-end specific type
2899 that the backend doesn't know about. This can happen via the
2900 aggregate_value_p call in init_function_start. All we can do is
2901 ignore unknown tree types. */
2908 /* Return rtx for register where argument is passed, or zero if it is passed
2911 /* ??? 128-bit quad-precision floats are always passed in general
2915 ia64_function_arg (cum
, mode
, type
, named
, incoming
)
2916 CUMULATIVE_ARGS
*cum
;
2917 enum machine_mode mode
;
2922 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
2923 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
2924 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
2927 enum machine_mode hfa_mode
= VOIDmode
;
2929 /* Integer and float arguments larger than 8 bytes start at the next even
2930 boundary. Aggregates larger than 8 bytes start at the next even boundary
2931 if the aggregate has 16 byte alignment. Net effect is that types with
2932 alignment greater than 8 start at the next even boundary. */
2933 /* ??? The ABI does not specify how to handle aggregates with alignment from
2934 9 to 15 bytes, or greater than 16. We handle them all as if they had
2935 16 byte alignment. Such aggregates can occur only if gcc extensions are
2937 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
2939 && (cum
->words
& 1))
2942 /* If all argument slots are used, then it must go on the stack. */
2943 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
2946 /* Check for and handle homogeneous FP aggregates. */
2948 hfa_mode
= hfa_element_mode (type
, 0);
2950 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2951 and unprototyped hfas are passed specially. */
2952 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
2956 int fp_regs
= cum
->fp_regs
;
2957 int int_regs
= cum
->words
+ offset
;
2958 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
2962 /* If prototyped, pass it in FR regs then GR regs.
2963 If not prototyped, pass it in both FR and GR regs.
2965 If this is an SFmode aggregate, then it is possible to run out of
2966 FR regs while GR regs are still left. In that case, we pass the
2967 remaining part in the GR regs. */
2969 /* Fill the FP regs. We do this always. We stop if we reach the end
2970 of the argument, the last FP register, or the last argument slot. */
2972 byte_size
= ((mode
== BLKmode
)
2973 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
2974 args_byte_size
= int_regs
* UNITS_PER_WORD
;
2976 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
2977 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
2979 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
2980 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
2984 args_byte_size
+= hfa_size
;
2988 /* If no prototype, then the whole thing must go in GR regs. */
2989 if (! cum
->prototype
)
2991 /* If this is an SFmode aggregate, then we might have some left over
2992 that needs to go in GR regs. */
2993 else if (byte_size
!= offset
)
2994 int_regs
+= offset
/ UNITS_PER_WORD
;
2996 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
2998 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
3000 enum machine_mode gr_mode
= DImode
;
3002 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3003 then this goes in a GR reg left adjusted/little endian, right
3004 adjusted/big endian. */
3005 /* ??? Currently this is handled wrong, because 4-byte hunks are
3006 always right adjusted/little endian. */
3009 /* If we have an even 4 byte hunk because the aggregate is a
3010 multiple of 4 bytes in size, then this goes in a GR reg right
3011 adjusted/little endian. */
3012 else if (byte_size
- offset
== 4)
3014 /* Complex floats need to have float mode. */
3015 if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
3018 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3019 gen_rtx_REG (gr_mode
, (basereg
3022 offset
+= GET_MODE_SIZE (gr_mode
);
3023 int_regs
+= GET_MODE_SIZE (gr_mode
) <= UNITS_PER_WORD
3024 ? 1 : GET_MODE_SIZE (gr_mode
) / UNITS_PER_WORD
;
3027 /* If we ended up using just one location, just return that one loc. */
3029 return XEXP (loc
[0], 0);
3031 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3034 /* Integral and aggregates go in general registers. If we have run out of
3035 FR registers, then FP values must also go in general registers. This can
3036 happen when we have a SFmode HFA. */
3037 else if (((mode
== TFmode
) && ! INTEL_EXTENDED_IEEE_FORMAT
)
3038 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
3039 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
3041 /* If there is a prototype, then FP values go in a FR register when
3042 named, and in a GR registeer when unnamed. */
3043 else if (cum
->prototype
)
3046 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
3048 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
3050 /* If there is no prototype, then FP values go in both FR and GR
3054 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3055 gen_rtx_REG (mode
, (FR_ARG_FIRST
3058 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3060 (basereg
+ cum
->words
3064 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, fp_reg
, gr_reg
));
3068 /* Return number of words, at the beginning of the argument, that must be
3069 put in registers. 0 is the argument is entirely in registers or entirely
3073 ia64_function_arg_partial_nregs (cum
, mode
, type
, named
)
3074 CUMULATIVE_ARGS
*cum
;
3075 enum machine_mode mode
;
3077 int named ATTRIBUTE_UNUSED
;
3079 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
3080 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
3084 /* Arguments with alignment larger than 8 bytes start at the next even
3086 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3088 && (cum
->words
& 1))
3091 /* If all argument slots are used, then it must go on the stack. */
3092 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
3095 /* It doesn't matter whether the argument goes in FR or GR regs. If
3096 it fits within the 8 argument slots, then it goes entirely in
3097 registers. If it extends past the last argument slot, then the rest
3098 goes on the stack. */
3100 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
3103 return MAX_ARGUMENT_SLOTS
- cum
->words
- offset
;
3106 /* Update CUM to point after this argument. This is patterned after
3107 ia64_function_arg. */
3110 ia64_function_arg_advance (cum
, mode
, type
, named
)
3111 CUMULATIVE_ARGS
*cum
;
3112 enum machine_mode mode
;
3116 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
3117 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
3120 enum machine_mode hfa_mode
= VOIDmode
;
3122 /* If all arg slots are already full, then there is nothing to do. */
3123 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
3126 /* Arguments with alignment larger than 8 bytes start at the next even
3128 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3130 && (cum
->words
& 1))
3133 cum
->words
+= words
+ offset
;
3135 /* Check for and handle homogeneous FP aggregates. */
3137 hfa_mode
= hfa_element_mode (type
, 0);
3139 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3140 and unprototyped hfas are passed specially. */
3141 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
3143 int fp_regs
= cum
->fp_regs
;
3144 /* This is the original value of cum->words + offset. */
3145 int int_regs
= cum
->words
- words
;
3146 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3150 /* If prototyped, pass it in FR regs then GR regs.
3151 If not prototyped, pass it in both FR and GR regs.
3153 If this is an SFmode aggregate, then it is possible to run out of
3154 FR regs while GR regs are still left. In that case, we pass the
3155 remaining part in the GR regs. */
3157 /* Fill the FP regs. We do this always. We stop if we reach the end
3158 of the argument, the last FP register, or the last argument slot. */
3160 byte_size
= ((mode
== BLKmode
)
3161 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3162 args_byte_size
= int_regs
* UNITS_PER_WORD
;
3164 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
3165 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
3168 args_byte_size
+= hfa_size
;
3172 cum
->fp_regs
= fp_regs
;
3175 /* Integral and aggregates go in general registers. If we have run out of
3176 FR registers, then FP values must also go in general registers. This can
3177 happen when we have a SFmode HFA. */
3178 else if (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
)
3179 cum
->int_regs
= cum
->words
;
3181 /* If there is a prototype, then FP values go in a FR register when
3182 named, and in a GR registeer when unnamed. */
3183 else if (cum
->prototype
)
3186 cum
->int_regs
= cum
->words
;
3188 /* ??? Complex types should not reach here. */
3189 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3191 /* If there is no prototype, then FP values go in both FR and GR
3195 /* ??? Complex types should not reach here. */
3196 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3197 cum
->int_regs
= cum
->words
;
3201 /* Variable sized types are passed by reference. */
3202 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3205 ia64_function_arg_pass_by_reference (cum
, mode
, type
, named
)
3206 CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
;
3207 enum machine_mode mode ATTRIBUTE_UNUSED
;
3209 int named ATTRIBUTE_UNUSED
;
3211 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3214 /* Implement va_start. */
3217 ia64_va_start (stdarg_p
, valist
, nextarg
)
3225 arg_words
= current_function_args_info
.words
;
3230 ofs
= (arg_words
>= MAX_ARGUMENT_SLOTS
? -UNITS_PER_WORD
: 0);
3232 nextarg
= plus_constant (nextarg
, ofs
);
3233 std_expand_builtin_va_start (1, valist
, nextarg
);
3236 /* Implement va_arg. */
3239 ia64_va_arg (valist
, type
)
3244 /* Variable sized types are passed by reference. */
3245 if (TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
3247 rtx addr
= std_expand_builtin_va_arg (valist
, build_pointer_type (type
));
3248 return gen_rtx_MEM (ptr_mode
, force_reg (Pmode
, addr
));
3251 /* Arguments with alignment larger than 8 bytes start at the next even
3253 if (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3255 t
= build (PLUS_EXPR
, TREE_TYPE (valist
), valist
,
3256 build_int_2 (2 * UNITS_PER_WORD
- 1, 0));
3257 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
3258 build_int_2 (-2 * UNITS_PER_WORD
, -1));
3259 t
= build (MODIFY_EXPR
, TREE_TYPE (valist
), valist
, t
);
3260 TREE_SIDE_EFFECTS (t
) = 1;
3261 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3264 return std_expand_builtin_va_arg (valist
, type
);
3267 /* Return 1 if function return value returned in memory. Return 0 if it is
3271 ia64_return_in_memory (valtype
)
3274 enum machine_mode mode
;
3275 enum machine_mode hfa_mode
;
3276 HOST_WIDE_INT byte_size
;
3278 mode
= TYPE_MODE (valtype
);
3279 byte_size
= GET_MODE_SIZE (mode
);
3280 if (mode
== BLKmode
)
3282 byte_size
= int_size_in_bytes (valtype
);
3287 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3289 hfa_mode
= hfa_element_mode (valtype
, 0);
3290 if (hfa_mode
!= VOIDmode
)
3292 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3294 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
3299 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
3305 /* Return rtx for register that holds the function return value. */
3308 ia64_function_value (valtype
, func
)
3310 tree func ATTRIBUTE_UNUSED
;
3312 enum machine_mode mode
;
3313 enum machine_mode hfa_mode
;
3315 mode
= TYPE_MODE (valtype
);
3316 hfa_mode
= hfa_element_mode (valtype
, 0);
3318 if (hfa_mode
!= VOIDmode
)
3326 hfa_size
= GET_MODE_SIZE (hfa_mode
);
3327 byte_size
= ((mode
== BLKmode
)
3328 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
3330 for (i
= 0; offset
< byte_size
; i
++)
3332 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3333 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
3339 return XEXP (loc
[0], 0);
3341 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3343 else if (FLOAT_TYPE_P (valtype
) &&
3344 ((mode
!= TFmode
) || INTEL_EXTENDED_IEEE_FORMAT
))
3345 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
3347 return gen_rtx_REG (mode
, GR_RET_FIRST
);
3350 /* Print a memory address as an operand to reference that memory location. */
3352 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3353 also call this from ia64_print_operand for memory addresses. */
3356 ia64_print_operand_address (stream
, address
)
3357 FILE * stream ATTRIBUTE_UNUSED
;
3358 rtx address ATTRIBUTE_UNUSED
;
3362 /* Print an operand to an assembler instruction.
3363 C Swap and print a comparison operator.
3364 D Print an FP comparison operator.
3365 E Print 32 - constant, for SImode shifts as extract.
3366 e Print 64 - constant, for DImode rotates.
3367 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3368 a floating point register emitted normally.
3369 I Invert a predicate register by adding 1.
3370 J Select the proper predicate register for a condition.
3371 j Select the inverse predicate register for a condition.
3372 O Append .acq for volatile load.
3373 P Postincrement of a MEM.
3374 Q Append .rel for volatile store.
3375 S Shift amount for shladd instruction.
3376 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3377 for Intel assembler.
3378 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3379 for Intel assembler.
3380 r Print register name, or constant 0 as r0. HP compatibility for
3383 ia64_print_operand (file
, x
, code
)
3393 /* Handled below. */
3398 enum rtx_code c
= swap_condition (GET_CODE (x
));
3399 fputs (GET_RTX_NAME (c
), file
);
3404 switch (GET_CODE (x
))
3416 str
= GET_RTX_NAME (GET_CODE (x
));
3423 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
3427 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - INTVAL (x
));
3431 if (x
== CONST0_RTX (GET_MODE (x
)))
3432 str
= reg_names
[FR_REG (0)];
3433 else if (x
== CONST1_RTX (GET_MODE (x
)))
3434 str
= reg_names
[FR_REG (1)];
3435 else if (GET_CODE (x
) == REG
)
3436 str
= reg_names
[REGNO (x
)];
3443 fputs (reg_names
[REGNO (x
) + 1], file
);
3449 unsigned int regno
= REGNO (XEXP (x
, 0));
3450 if (GET_CODE (x
) == EQ
)
3454 fputs (reg_names
[regno
], file
);
3459 if (MEM_VOLATILE_P (x
))
3460 fputs(".acq", file
);
3465 HOST_WIDE_INT value
;
3467 switch (GET_CODE (XEXP (x
, 0)))
3473 x
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
3474 if (GET_CODE (x
) == CONST_INT
)
3476 else if (GET_CODE (x
) == REG
)
3478 fprintf (file
, ", %s", reg_names
[REGNO (x
)]);
3486 value
= GET_MODE_SIZE (GET_MODE (x
));
3490 value
= - (HOST_WIDE_INT
) GET_MODE_SIZE (GET_MODE (x
));
3496 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, value
);
3501 if (MEM_VOLATILE_P (x
))
3502 fputs(".rel", file
);
3506 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
3510 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
3512 fprintf (file
, "0x%x", (int) INTVAL (x
) & 0xffffffff);
3518 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
3520 const char *prefix
= "0x";
3521 if (INTVAL (x
) & 0x80000000)
3523 fprintf (file
, "0xffffffff");
3526 fprintf (file
, "%s%x", prefix
, (int) INTVAL (x
) & 0xffffffff);
3532 /* If this operand is the constant zero, write it as register zero.
3533 Any register, zero, or CONST_INT value is OK here. */
3534 if (GET_CODE (x
) == REG
)
3535 fputs (reg_names
[REGNO (x
)], file
);
3536 else if (x
== CONST0_RTX (GET_MODE (x
)))
3538 else if (GET_CODE (x
) == CONST_INT
)
3539 output_addr_const (file
, x
);
3541 output_operand_lossage ("invalid %%r value");
3548 /* For conditional branches, returns or calls, substitute
3549 sptk, dptk, dpnt, or spnt for %s. */
3550 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
3553 int pred_val
= INTVAL (XEXP (x
, 0));
3555 /* Guess top and bottom 10% statically predicted. */
3556 if (pred_val
< REG_BR_PROB_BASE
/ 50)
3558 else if (pred_val
< REG_BR_PROB_BASE
/ 2)
3560 else if (pred_val
< REG_BR_PROB_BASE
/ 100 * 98)
3565 else if (GET_CODE (current_output_insn
) == CALL_INSN
)
3570 fputs (which
, file
);
3575 x
= current_insn_predicate
;
3578 unsigned int regno
= REGNO (XEXP (x
, 0));
3579 if (GET_CODE (x
) == EQ
)
3581 fprintf (file
, "(%s) ", reg_names
[regno
]);
3586 output_operand_lossage ("ia64_print_operand: unknown code");
3590 switch (GET_CODE (x
))
3592 /* This happens for the spill/restore instructions. */
3597 /* ... fall through ... */
3600 fputs (reg_names
[REGNO (x
)], file
);
3605 rtx addr
= XEXP (x
, 0);
3606 if (GET_RTX_CLASS (GET_CODE (addr
)) == 'a')
3607 addr
= XEXP (addr
, 0);
3608 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
3613 output_addr_const (file
, x
);
3620 /* Calulate the cost of moving data from a register in class FROM to
3621 one in class TO, using MODE. */
3624 ia64_register_move_cost (mode
, from
, to
)
3625 enum machine_mode mode
;
3626 enum reg_class from
, to
;
3628 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
3629 if (to
== ADDL_REGS
)
3631 if (from
== ADDL_REGS
)
3634 /* All costs are symmetric, so reduce cases by putting the
3635 lower number class as the destination. */
3638 enum reg_class tmp
= to
;
3639 to
= from
, from
= tmp
;
3642 /* Moving from FR<->GR in TFmode must be more expensive than 2,
3643 so that we get secondary memory reloads. Between FR_REGS,
3644 we have to make this at least as expensive as MEMORY_MOVE_COST
3645 to avoid spectacularly poor register class preferencing. */
3648 if (to
!= GR_REGS
|| from
!= GR_REGS
)
3649 return MEMORY_MOVE_COST (mode
, to
, 0);
3657 /* Moving between PR registers takes two insns. */
3658 if (from
== PR_REGS
)
3660 /* Moving between PR and anything but GR is impossible. */
3661 if (from
!= GR_REGS
)
3662 return MEMORY_MOVE_COST (mode
, to
, 0);
3666 /* Moving between BR and anything but GR is impossible. */
3667 if (from
!= GR_REGS
&& from
!= GR_AND_BR_REGS
)
3668 return MEMORY_MOVE_COST (mode
, to
, 0);
3673 /* Moving between AR and anything but GR is impossible. */
3674 if (from
!= GR_REGS
)
3675 return MEMORY_MOVE_COST (mode
, to
, 0);
3680 case GR_AND_FR_REGS
:
3681 case GR_AND_BR_REGS
:
3692 /* This function returns the register class required for a secondary
3693 register when copying between one of the registers in CLASS, and X,
3694 using MODE. A return value of NO_REGS means that no secondary register
3698 ia64_secondary_reload_class (class, mode
, x
)
3699 enum reg_class
class;
3700 enum machine_mode mode ATTRIBUTE_UNUSED
;
3705 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
3706 regno
= true_regnum (x
);
3713 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
3714 interaction. We end up with two pseudos with overlapping lifetimes
3715 both of which are equiv to the same constant, and both which need
3716 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
3717 changes depending on the path length, which means the qty_first_reg
3718 check in make_regs_eqv can give different answers at different times.
3719 At some point I'll probably need a reload_indi pattern to handle
3722 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
3723 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
3724 non-general registers for good measure. */
3725 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
))
3728 /* This is needed if a pseudo used as a call_operand gets spilled to a
3730 if (GET_CODE (x
) == MEM
)
3735 /* Need to go through general regsters to get to other class regs. */
3736 if (regno
>= 0 && ! (FR_REGNO_P (regno
) || GENERAL_REGNO_P (regno
)))
3739 /* This can happen when a paradoxical subreg is an operand to the
3741 /* ??? This shouldn't be necessary after instruction scheduling is
3742 enabled, because paradoxical subregs are not accepted by
3743 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3744 stop the paradoxical subreg stupidity in the *_operand functions
3746 if (GET_CODE (x
) == MEM
3747 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
3748 || GET_MODE (x
) == QImode
))
3751 /* This can happen because of the ior/and/etc patterns that accept FP
3752 registers as operands. If the third operand is a constant, then it
3753 needs to be reloaded into a FP register. */
3754 if (GET_CODE (x
) == CONST_INT
)
3757 /* This can happen because of register elimination in a muldi3 insn.
3758 E.g. `26107 * (unsigned long)&u'. */
3759 if (GET_CODE (x
) == PLUS
)
3764 /* ??? This happens if we cse/gcse a BImode value across a call,
3765 and the function has a nonlocal goto. This is because global
3766 does not allocate call crossing pseudos to hard registers when
3767 current_function_has_nonlocal_goto is true. This is relatively
3768 common for C++ programs that use exceptions. To reproduce,
3769 return NO_REGS and compile libstdc++. */
3770 if (GET_CODE (x
) == MEM
)
3773 /* This can happen when we take a BImode subreg of a DImode value,
3774 and that DImode value winds up in some non-GR register. */
3775 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
) && ! PR_REGNO_P (regno
))
3780 /* Since we have no offsettable memory addresses, we need a temporary
3781 to hold the address of the second word. */
3794 /* Emit text to declare externally defined variables and functions, because
3795 the Intel assembler does not support undefined externals. */
3798 ia64_asm_output_external (file
, decl
, name
)
3803 int save_referenced
;
3805 /* GNU as does not need anything here. */
3809 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3810 the linker when we do this, so we need to be careful not to do this for
3811 builtin functions which have no library equivalent. Unfortunately, we
3812 can't tell here whether or not a function will actually be called by
3813 expand_expr, so we pull in library functions even if we may not need
3815 if (! strcmp (name
, "__builtin_next_arg")
3816 || ! strcmp (name
, "alloca")
3817 || ! strcmp (name
, "__builtin_constant_p")
3818 || ! strcmp (name
, "__builtin_args_info"))
3821 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3823 save_referenced
= TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
));
3824 if (TREE_CODE (decl
) == FUNCTION_DECL
)
3826 fprintf (file
, "%s", TYPE_ASM_OP
);
3827 assemble_name (file
, name
);
3829 fprintf (file
, TYPE_OPERAND_FMT
, "function");
3832 ASM_GLOBALIZE_LABEL (file
, name
);
3833 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)) = save_referenced
;
3836 /* Parse the -mfixed-range= option string. */
3839 fix_range (const_str
)
3840 const char *const_str
;
3843 char *str
, *dash
, *comma
;
3845 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3846 REG2 are either register names or register numbers. The effect
3847 of this option is to mark the registers in the range from REG1 to
3848 REG2 as ``fixed'' so they won't be used by the compiler. This is
3849 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3851 i
= strlen (const_str
);
3852 str
= (char *) alloca (i
+ 1);
3853 memcpy (str
, const_str
, i
+ 1);
3857 dash
= strchr (str
, '-');
3860 warning ("value of -mfixed-range must have form REG1-REG2");
3865 comma
= strchr (dash
+ 1, ',');
3869 first
= decode_reg_name (str
);
3872 warning ("unknown register name: %s", str
);
3876 last
= decode_reg_name (dash
+ 1);
3879 warning ("unknown register name: %s", dash
+ 1);
3887 warning ("%s-%s is an empty range", str
, dash
+ 1);
3891 for (i
= first
; i
<= last
; ++i
)
3892 fixed_regs
[i
] = call_used_regs
[i
] = 1;
3902 /* Called to register all of our global variables with the garbage
3906 ia64_add_gc_roots ()
3908 ggc_add_rtx_root (&ia64_compare_op0
, 1);
3909 ggc_add_rtx_root (&ia64_compare_op1
, 1);
3913 ia64_init_machine_status (p
)
3917 (struct machine_function
*) xcalloc (1, sizeof (struct machine_function
));
3921 ia64_mark_machine_status (p
)
3924 struct machine_function
*machine
= p
->machine
;
3928 ggc_mark_rtx (machine
->ia64_eh_epilogue_sp
);
3929 ggc_mark_rtx (machine
->ia64_eh_epilogue_bsp
);
3930 ggc_mark_rtx (machine
->ia64_gp_save
);
3935 ia64_free_machine_status (p
)
3942 /* Handle TARGET_OPTIONS switches. */
3945 ia64_override_options ()
3947 if (TARGET_AUTO_PIC
)
3948 target_flags
|= MASK_CONST_GP
;
3950 if (TARGET_INLINE_DIV_LAT
&& TARGET_INLINE_DIV_THR
)
3952 warning ("cannot optimize division for both latency and throughput");
3953 target_flags
&= ~MASK_INLINE_DIV_THR
;
3956 if (ia64_fixed_range_string
)
3957 fix_range (ia64_fixed_range_string
);
3959 ia64_flag_schedule_insns2
= flag_schedule_insns_after_reload
;
3960 flag_schedule_insns_after_reload
= 0;
3962 ia64_section_threshold
= g_switch_set
? g_switch_value
: IA64_DEFAULT_GVALUE
;
3964 init_machine_status
= ia64_init_machine_status
;
3965 mark_machine_status
= ia64_mark_machine_status
;
3966 free_machine_status
= ia64_free_machine_status
;
3968 ia64_add_gc_roots ();
3971 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0
PARAMS((rtx
));
3972 static enum attr_itanium_class ia64_safe_itanium_class
PARAMS((rtx
));
3973 static enum attr_type ia64_safe_type
PARAMS((rtx
));
3975 static enum attr_itanium_requires_unit0
3976 ia64_safe_itanium_requires_unit0 (insn
)
3979 if (recog_memoized (insn
) >= 0)
3980 return get_attr_itanium_requires_unit0 (insn
);
3982 return ITANIUM_REQUIRES_UNIT0_NO
;
3985 static enum attr_itanium_class
3986 ia64_safe_itanium_class (insn
)
3989 if (recog_memoized (insn
) >= 0)
3990 return get_attr_itanium_class (insn
);
3992 return ITANIUM_CLASS_UNKNOWN
;
3995 static enum attr_type
3996 ia64_safe_type (insn
)
3999 if (recog_memoized (insn
) >= 0)
4000 return get_attr_type (insn
);
4002 return TYPE_UNKNOWN
;
4005 /* The following collection of routines emit instruction group stop bits as
4006 necessary to avoid dependencies. */
4008 /* Need to track some additional registers as far as serialization is
4009 concerned so we can properly handle br.call and br.ret. We could
4010 make these registers visible to gcc, but since these registers are
4011 never explicitly used in gcc generated code, it seems wasteful to
4012 do so (plus it would make the call and return patterns needlessly
4014 #define REG_GP (GR_REG (1))
4015 #define REG_RP (BR_REG (0))
4016 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4017 /* This is used for volatile asms which may require a stop bit immediately
4018 before and after them. */
4019 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4020 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4021 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4023 /* For each register, we keep track of how it has been written in the
4024 current instruction group.
4026 If a register is written unconditionally (no qualifying predicate),
4027 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4029 If a register is written if its qualifying predicate P is true, we
4030 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4031 may be written again by the complement of P (P^1) and when this happens,
4032 WRITE_COUNT gets set to 2.
4034 The result of this is that whenever an insn attempts to write a register
4035 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4037 If a predicate register is written by a floating-point insn, we set
4038 WRITTEN_BY_FP to true.
4040 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4041 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4043 struct reg_write_state
4045 unsigned int write_count
: 2;
4046 unsigned int first_pred
: 16;
4047 unsigned int written_by_fp
: 1;
4048 unsigned int written_by_and
: 1;
4049 unsigned int written_by_or
: 1;
4052 /* Cumulative info for the current instruction group. */
4053 struct reg_write_state rws_sum
[NUM_REGS
];
4054 /* Info for the current instruction. This gets copied to rws_sum after a
4055 stop bit is emitted. */
4056 struct reg_write_state rws_insn
[NUM_REGS
];
4058 /* Indicates whether this is the first instruction after a stop bit,
4059 in which case we don't need another stop bit. Without this, we hit
4060 the abort in ia64_variable_issue when scheduling an alloc. */
4061 static int first_instruction
;
4063 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4064 RTL for one instruction. */
4067 unsigned int is_write
: 1; /* Is register being written? */
4068 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
4069 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
4070 unsigned int is_and
: 1; /* Is register used as part of and.orcm? */
4071 unsigned int is_or
: 1; /* Is register used as part of or.andcm? */
4072 unsigned int is_sibcall
: 1; /* Is this a sibling or normal call? */
4075 static void rws_update
PARAMS ((struct reg_write_state
*, int,
4076 struct reg_flags
, int));
4077 static int rws_access_regno
PARAMS ((int, struct reg_flags
, int));
4078 static int rws_access_reg
PARAMS ((rtx
, struct reg_flags
, int));
4079 static void update_set_flags
PARAMS ((rtx
, struct reg_flags
*, int *, rtx
*));
4080 static int set_src_needs_barrier
PARAMS ((rtx
, struct reg_flags
, int, rtx
));
4081 static int rtx_needs_barrier
PARAMS ((rtx
, struct reg_flags
, int));
4082 static void init_insn_group_barriers
PARAMS ((void));
4083 static int group_barrier_needed_p
PARAMS ((rtx
));
4084 static int safe_group_barrier_needed_p
PARAMS ((rtx
));
4086 /* Update *RWS for REGNO, which is being written by the current instruction,
4087 with predicate PRED, and associated register flags in FLAGS. */
4090 rws_update (rws
, regno
, flags
, pred
)
4091 struct reg_write_state
*rws
;
4093 struct reg_flags flags
;
4097 rws
[regno
].write_count
++;
4099 rws
[regno
].write_count
= 2;
4100 rws
[regno
].written_by_fp
|= flags
.is_fp
;
4101 /* ??? Not tracking and/or across differing predicates. */
4102 rws
[regno
].written_by_and
= flags
.is_and
;
4103 rws
[regno
].written_by_or
= flags
.is_or
;
4104 rws
[regno
].first_pred
= pred
;
4107 /* Handle an access to register REGNO of type FLAGS using predicate register
4108 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4109 a dependency with an earlier instruction in the same group. */
4112 rws_access_regno (regno
, flags
, pred
)
4114 struct reg_flags flags
;
4117 int need_barrier
= 0;
4119 if (regno
>= NUM_REGS
)
4122 if (! PR_REGNO_P (regno
))
4123 flags
.is_and
= flags
.is_or
= 0;
4129 /* One insn writes same reg multiple times? */
4130 if (rws_insn
[regno
].write_count
> 0)
4133 /* Update info for current instruction. */
4134 rws_update (rws_insn
, regno
, flags
, pred
);
4135 write_count
= rws_sum
[regno
].write_count
;
4137 switch (write_count
)
4140 /* The register has not been written yet. */
4141 rws_update (rws_sum
, regno
, flags
, pred
);
4145 /* The register has been written via a predicate. If this is
4146 not a complementary predicate, then we need a barrier. */
4147 /* ??? This assumes that P and P+1 are always complementary
4148 predicates for P even. */
4149 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4151 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4153 else if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
4155 rws_update (rws_sum
, regno
, flags
, pred
);
4159 /* The register has been unconditionally written already. We
4161 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4163 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4167 rws_sum
[regno
].written_by_and
= flags
.is_and
;
4168 rws_sum
[regno
].written_by_or
= flags
.is_or
;
4177 if (flags
.is_branch
)
4179 /* Branches have several RAW exceptions that allow to avoid
4182 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== AR_PFS_REGNUM
)
4183 /* RAW dependencies on branch regs are permissible as long
4184 as the writer is a non-branch instruction. Since we
4185 never generate code that uses a branch register written
4186 by a branch instruction, handling this case is
4190 if (REGNO_REG_CLASS (regno
) == PR_REGS
4191 && ! rws_sum
[regno
].written_by_fp
)
4192 /* The predicates of a branch are available within the
4193 same insn group as long as the predicate was written by
4194 something other than a floating-point instruction. */
4198 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4200 if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4203 switch (rws_sum
[regno
].write_count
)
4206 /* The register has not been written yet. */
4210 /* The register has been written via a predicate. If this is
4211 not a complementary predicate, then we need a barrier. */
4212 /* ??? This assumes that P and P+1 are always complementary
4213 predicates for P even. */
4214 if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
4219 /* The register has been unconditionally written already. We
4229 return need_barrier
;
4233 rws_access_reg (reg
, flags
, pred
)
4235 struct reg_flags flags
;
4238 int regno
= REGNO (reg
);
4239 int n
= HARD_REGNO_NREGS (REGNO (reg
), GET_MODE (reg
));
4242 return rws_access_regno (regno
, flags
, pred
);
4245 int need_barrier
= 0;
4247 need_barrier
|= rws_access_regno (regno
+ n
, flags
, pred
);
4248 return need_barrier
;
4252 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4253 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4256 update_set_flags (x
, pflags
, ppred
, pcond
)
4258 struct reg_flags
*pflags
;
4262 rtx src
= SET_SRC (x
);
4266 switch (GET_CODE (src
))
4272 if (SET_DEST (x
) == pc_rtx
)
4273 /* X is a conditional branch. */
4277 int is_complemented
= 0;
4279 /* X is a conditional move. */
4280 rtx cond
= XEXP (src
, 0);
4281 if (GET_CODE (cond
) == EQ
)
4282 is_complemented
= 1;
4283 cond
= XEXP (cond
, 0);
4284 if (GET_CODE (cond
) != REG
4285 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
4288 if (XEXP (src
, 1) == SET_DEST (x
)
4289 || XEXP (src
, 2) == SET_DEST (x
))
4291 /* X is a conditional move that conditionally writes the
4294 /* We need another complement in this case. */
4295 if (XEXP (src
, 1) == SET_DEST (x
))
4296 is_complemented
= ! is_complemented
;
4298 *ppred
= REGNO (cond
);
4299 if (is_complemented
)
4303 /* ??? If this is a conditional write to the dest, then this
4304 instruction does not actually read one source. This probably
4305 doesn't matter, because that source is also the dest. */
4306 /* ??? Multiple writes to predicate registers are allowed
4307 if they are all AND type compares, or if they are all OR
4308 type compares. We do not generate such instructions
4311 /* ... fall through ... */
4314 if (GET_RTX_CLASS (GET_CODE (src
)) == '<'
4315 && GET_MODE_CLASS (GET_MODE (XEXP (src
, 0))) == MODE_FLOAT
)
4316 /* Set pflags->is_fp to 1 so that we know we're dealing
4317 with a floating point comparison when processing the
4318 destination of the SET. */
4321 /* Discover if this is a parallel comparison. We only handle
4322 and.orcm and or.andcm at present, since we must retain a
4323 strict inverse on the predicate pair. */
4324 else if (GET_CODE (src
) == AND
)
4326 else if (GET_CODE (src
) == IOR
)
4333 /* Subroutine of rtx_needs_barrier; this function determines whether the
4334 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4335 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4339 set_src_needs_barrier (x
, flags
, pred
, cond
)
4341 struct reg_flags flags
;
4345 int need_barrier
= 0;
4347 rtx src
= SET_SRC (x
);
4349 if (GET_CODE (src
) == CALL
)
4350 /* We don't need to worry about the result registers that
4351 get written by subroutine call. */
4352 return rtx_needs_barrier (src
, flags
, pred
);
4353 else if (SET_DEST (x
) == pc_rtx
)
4355 /* X is a conditional branch. */
4356 /* ??? This seems redundant, as the caller sets this bit for
4358 flags
.is_branch
= 1;
4359 return rtx_needs_barrier (src
, flags
, pred
);
4362 need_barrier
= rtx_needs_barrier (src
, flags
, pred
);
4364 /* This instruction unconditionally uses a predicate register. */
4366 need_barrier
|= rws_access_reg (cond
, flags
, 0);
4369 if (GET_CODE (dst
) == ZERO_EXTRACT
)
4371 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
4372 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
4373 dst
= XEXP (dst
, 0);
4375 return need_barrier
;
4378 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4379 Return 1 is this access creates a dependency with an earlier instruction
4380 in the same group. */
4383 rtx_needs_barrier (x
, flags
, pred
)
4385 struct reg_flags flags
;
4389 int is_complemented
= 0;
4390 int need_barrier
= 0;
4391 const char *format_ptr
;
4392 struct reg_flags new_flags
;
4400 switch (GET_CODE (x
))
4403 update_set_flags (x
, &new_flags
, &pred
, &cond
);
4404 need_barrier
= set_src_needs_barrier (x
, new_flags
, pred
, cond
);
4405 if (GET_CODE (SET_SRC (x
)) != CALL
)
4407 new_flags
.is_write
= 1;
4408 need_barrier
|= rtx_needs_barrier (SET_DEST (x
), new_flags
, pred
);
4413 new_flags
.is_write
= 0;
4414 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
4416 /* Avoid multiple register writes, in case this is a pattern with
4417 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4418 if (! flags
.is_sibcall
&& ! rws_insn
[REG_AR_CFM
].write_count
)
4420 new_flags
.is_write
= 1;
4421 need_barrier
|= rws_access_regno (REG_RP
, new_flags
, pred
);
4422 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, new_flags
, pred
);
4423 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4428 /* X is a predicated instruction. */
4430 cond
= COND_EXEC_TEST (x
);
4433 need_barrier
= rtx_needs_barrier (cond
, flags
, 0);
4435 if (GET_CODE (cond
) == EQ
)
4436 is_complemented
= 1;
4437 cond
= XEXP (cond
, 0);
4438 if (GET_CODE (cond
) != REG
4439 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
4441 pred
= REGNO (cond
);
4442 if (is_complemented
)
4445 need_barrier
|= rtx_needs_barrier (COND_EXEC_CODE (x
), flags
, pred
);
4446 return need_barrier
;
4450 /* Clobber & use are for earlier compiler-phases only. */
4455 /* We always emit stop bits for traditional asms. We emit stop bits
4456 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4457 if (GET_CODE (x
) != ASM_OPERANDS
4458 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
4460 /* Avoid writing the register multiple times if we have multiple
4461 asm outputs. This avoids an abort in rws_access_reg. */
4462 if (! rws_insn
[REG_VOLATILE
].write_count
)
4464 new_flags
.is_write
= 1;
4465 rws_access_regno (REG_VOLATILE
, new_flags
, pred
);
4470 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4471 We can not just fall through here since then we would be confused
4472 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4473 traditional asms unlike their normal usage. */
4475 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
4476 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
4481 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
4483 rtx pat
= XVECEXP (x
, 0, i
);
4484 if (GET_CODE (pat
) == SET
)
4486 update_set_flags (pat
, &new_flags
, &pred
, &cond
);
4487 need_barrier
|= set_src_needs_barrier (pat
, new_flags
, pred
, cond
);
4489 else if (GET_CODE (pat
) == USE
4490 || GET_CODE (pat
) == CALL
4491 || GET_CODE (pat
) == ASM_OPERANDS
)
4492 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
4493 else if (GET_CODE (pat
) != CLOBBER
&& GET_CODE (pat
) != RETURN
)
4496 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
4498 rtx pat
= XVECEXP (x
, 0, i
);
4499 if (GET_CODE (pat
) == SET
)
4501 if (GET_CODE (SET_SRC (pat
)) != CALL
)
4503 new_flags
.is_write
= 1;
4504 need_barrier
|= rtx_needs_barrier (SET_DEST (pat
), new_flags
,
4508 else if (GET_CODE (pat
) == CLOBBER
|| GET_CODE (pat
) == RETURN
)
4509 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
4517 if (REGNO (x
) == AR_UNAT_REGNUM
)
4519 for (i
= 0; i
< 64; ++i
)
4520 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ i
, flags
, pred
);
4523 need_barrier
= rws_access_reg (x
, flags
, pred
);
4527 /* Find the regs used in memory address computation. */
4528 new_flags
.is_write
= 0;
4529 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
4532 case CONST_INT
: case CONST_DOUBLE
:
4533 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
4536 /* Operators with side-effects. */
4537 case POST_INC
: case POST_DEC
:
4538 if (GET_CODE (XEXP (x
, 0)) != REG
)
4541 new_flags
.is_write
= 0;
4542 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4543 new_flags
.is_write
= 1;
4544 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4548 if (GET_CODE (XEXP (x
, 0)) != REG
)
4551 new_flags
.is_write
= 0;
4552 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4553 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
4554 new_flags
.is_write
= 1;
4555 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4558 /* Handle common unary and binary ops for efficiency. */
4559 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
4560 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
4561 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
4562 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
4563 case NE
: case EQ
: case GE
: case GT
: case LE
:
4564 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
4565 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
4566 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
4569 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
4570 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
4571 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
4572 case SQRT
: case FFS
:
4573 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
4577 switch (XINT (x
, 1))
4579 case 1: /* st8.spill */
4580 case 2: /* ld8.fill */
4582 HOST_WIDE_INT offset
= INTVAL (XVECEXP (x
, 0, 1));
4583 HOST_WIDE_INT bit
= (offset
>> 3) & 63;
4585 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4586 new_flags
.is_write
= (XINT (x
, 1) == 1);
4587 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ bit
,
4592 case 3: /* stf.spill */
4593 case 4: /* ldf.spill */
4594 case 8: /* popcnt */
4595 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4598 case 7: /* pred_rel_mutex */
4599 case 9: /* pic call */
4601 case 19: /* fetchadd_acq */
4602 case 20: /* mov = ar.bsp */
4603 case 21: /* flushrs */
4604 case 22: /* bundle selector */
4607 case 24: /* addp4 */
4608 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4611 case 5: /* recip_approx */
4612 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4613 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
4616 case 13: /* cmpxchg_acq */
4617 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
4618 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 2), flags
, pred
);
4626 case UNSPEC_VOLATILE
:
4627 switch (XINT (x
, 1))
4630 /* Alloc must always be the first instruction of a group.
4631 We force this by always returning true. */
4632 /* ??? We might get better scheduling if we explicitly check for
4633 input/local/output register dependencies, and modify the
4634 scheduler so that alloc is always reordered to the start of
4635 the current group. We could then eliminate all of the
4636 first_instruction code. */
4637 rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
4639 new_flags
.is_write
= 1;
4640 rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4643 case 1: /* blockage */
4644 case 2: /* insn group barrier */
4647 case 5: /* set_bsp */
4651 case 7: /* pred.rel.mutex */
4652 case 8: /* safe_across_calls all */
4653 case 9: /* safe_across_calls normal */
4662 new_flags
.is_write
= 0;
4663 need_barrier
= rws_access_regno (REG_RP
, flags
, pred
);
4664 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
4666 new_flags
.is_write
= 1;
4667 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
4668 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4672 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
4673 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
4674 switch (format_ptr
[i
])
4676 case '0': /* unused field */
4677 case 'i': /* integer */
4678 case 'n': /* note */
4679 case 'w': /* wide integer */
4680 case 's': /* pointer to string */
4681 case 'S': /* optional pointer to string */
4685 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
4690 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
4691 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
4700 return need_barrier
;
4703 /* Clear out the state for group_barrier_needed_p at the start of a
4704 sequence of insns. */
4707 init_insn_group_barriers ()
4709 memset (rws_sum
, 0, sizeof (rws_sum
));
4710 first_instruction
= 1;
4713 /* Given the current state, recorded by previous calls to this function,
4714 determine whether a group barrier (a stop bit) is necessary before INSN.
4715 Return nonzero if so. */
4718 group_barrier_needed_p (insn
)
4722 int need_barrier
= 0;
4723 struct reg_flags flags
;
4725 memset (&flags
, 0, sizeof (flags
));
4726 switch (GET_CODE (insn
))
4732 /* A barrier doesn't imply an instruction group boundary. */
4736 memset (rws_insn
, 0, sizeof (rws_insn
));
4740 flags
.is_branch
= 1;
4741 flags
.is_sibcall
= SIBLING_CALL_P (insn
);
4742 memset (rws_insn
, 0, sizeof (rws_insn
));
4744 /* Don't bundle a call following another call. */
4745 if ((pat
= prev_active_insn (insn
))
4746 && GET_CODE (pat
) == CALL_INSN
)
4752 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
4756 flags
.is_branch
= 1;
4758 /* Don't bundle a jump following a call. */
4759 if ((pat
= prev_active_insn (insn
))
4760 && GET_CODE (pat
) == CALL_INSN
)
4768 if (GET_CODE (PATTERN (insn
)) == USE
4769 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
4770 /* Don't care about USE and CLOBBER "insns"---those are used to
4771 indicate to the optimizer that it shouldn't get rid of
4772 certain operations. */
4775 pat
= PATTERN (insn
);
4777 /* Ug. Hack hacks hacked elsewhere. */
4778 switch (recog_memoized (insn
))
4780 /* We play dependency tricks with the epilogue in order
4781 to get proper schedules. Undo this for dv analysis. */
4782 case CODE_FOR_epilogue_deallocate_stack
:
4783 case CODE_FOR_prologue_allocate_stack
:
4784 pat
= XVECEXP (pat
, 0, 0);
4787 /* The pattern we use for br.cloop confuses the code above.
4788 The second element of the vector is representative. */
4789 case CODE_FOR_doloop_end_internal
:
4790 pat
= XVECEXP (pat
, 0, 1);
4793 /* Doesn't generate code. */
4794 case CODE_FOR_pred_rel_mutex
:
4795 case CODE_FOR_prologue_use
:
4802 memset (rws_insn
, 0, sizeof (rws_insn
));
4803 need_barrier
= rtx_needs_barrier (pat
, flags
, 0);
4805 /* Check to see if the previous instruction was a volatile
4808 need_barrier
= rws_access_regno (REG_VOLATILE
, flags
, 0);
4815 if (first_instruction
)
4818 first_instruction
= 0;
4821 return need_barrier
;
4824 /* Like group_barrier_needed_p, but do not clobber the current state. */
4827 safe_group_barrier_needed_p (insn
)
4830 struct reg_write_state rws_saved
[NUM_REGS
];
4831 int saved_first_instruction
;
4834 memcpy (rws_saved
, rws_sum
, NUM_REGS
* sizeof *rws_saved
);
4835 saved_first_instruction
= first_instruction
;
4837 t
= group_barrier_needed_p (insn
);
4839 memcpy (rws_sum
, rws_saved
, NUM_REGS
* sizeof *rws_saved
);
4840 first_instruction
= saved_first_instruction
;
4845 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
4846 as necessary to eliminate dependendencies. This function assumes that
4847 a final instruction scheduling pass has been run which has already
4848 inserted most of the necessary stop bits. This function only inserts
4849 new ones at basic block boundaries, since these are invisible to the
4853 emit_insn_group_barriers (dump
, insns
)
4859 int insns_since_last_label
= 0;
4861 init_insn_group_barriers ();
4863 for (insn
= insns
; insn
; insn
= NEXT_INSN (insn
))
4865 if (GET_CODE (insn
) == CODE_LABEL
)
4867 if (insns_since_last_label
)
4869 insns_since_last_label
= 0;
4871 else if (GET_CODE (insn
) == NOTE
4872 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
4874 if (insns_since_last_label
)
4876 insns_since_last_label
= 0;
4878 else if (GET_CODE (insn
) == INSN
4879 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
4880 && XINT (PATTERN (insn
), 1) == 2)
4882 init_insn_group_barriers ();
4885 else if (INSN_P (insn
))
4887 insns_since_last_label
= 1;
4889 if (group_barrier_needed_p (insn
))
4894 fprintf (dump
, "Emitting stop before label %d\n",
4895 INSN_UID (last_label
));
4896 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label
);
4899 init_insn_group_barriers ();
4907 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4908 This function has to emit all necessary group barriers. */
4911 emit_all_insn_group_barriers (dump
, insns
)
4912 FILE *dump ATTRIBUTE_UNUSED
;
4917 init_insn_group_barriers ();
4919 for (insn
= insns
; insn
; insn
= NEXT_INSN (insn
))
4921 if (GET_CODE (insn
) == BARRIER
)
4923 rtx last
= prev_active_insn (insn
);
4927 if (GET_CODE (last
) == JUMP_INSN
4928 && GET_CODE (PATTERN (last
)) == ADDR_DIFF_VEC
)
4929 last
= prev_active_insn (last
);
4930 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
4931 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
4933 init_insn_group_barriers ();
4935 else if (INSN_P (insn
))
4937 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
4938 init_insn_group_barriers ();
4939 else if (group_barrier_needed_p (insn
))
4941 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
4942 init_insn_group_barriers ();
4943 group_barrier_needed_p (insn
);
4949 static int errata_find_address_regs
PARAMS ((rtx
*, void *));
4950 static void errata_emit_nops
PARAMS ((rtx
));
4951 static void fixup_errata
PARAMS ((void));
4953 /* This structure is used to track some details about the previous insns
4954 groups so we can determine if it may be necessary to insert NOPs to
4955 workaround hardware errata. */
4958 HARD_REG_SET p_reg_set
;
4959 HARD_REG_SET gr_reg_conditionally_set
;
4962 /* Index into the last_group array. */
4963 static int group_idx
;
4965 /* Called through for_each_rtx; determines if a hard register that was
4966 conditionally set in the previous group is used as an address register.
4967 It ensures that for_each_rtx returns 1 in that case. */
4969 errata_find_address_regs (xp
, data
)
4971 void *data ATTRIBUTE_UNUSED
;
4974 if (GET_CODE (x
) != MEM
)
4977 if (GET_CODE (x
) == POST_MODIFY
)
4979 if (GET_CODE (x
) == REG
)
4981 struct group
*prev_group
= last_group
+ (group_idx
^ 1);
4982 if (TEST_HARD_REG_BIT (prev_group
->gr_reg_conditionally_set
,
4990 /* Called for each insn; this function keeps track of the state in
4991 last_group and emits additional NOPs if necessary to work around
4992 an Itanium A/B step erratum. */
4994 errata_emit_nops (insn
)
4997 struct group
*this_group
= last_group
+ group_idx
;
4998 struct group
*prev_group
= last_group
+ (group_idx
^ 1);
4999 rtx pat
= PATTERN (insn
);
5000 rtx cond
= GET_CODE (pat
) == COND_EXEC
? COND_EXEC_TEST (pat
) : 0;
5001 rtx real_pat
= cond
? COND_EXEC_CODE (pat
) : pat
;
5002 enum attr_type type
;
5005 if (GET_CODE (real_pat
) == USE
5006 || GET_CODE (real_pat
) == CLOBBER
5007 || GET_CODE (real_pat
) == ASM_INPUT
5008 || GET_CODE (real_pat
) == ADDR_VEC
5009 || GET_CODE (real_pat
) == ADDR_DIFF_VEC
5010 || asm_noperands (PATTERN (insn
)) >= 0)
5013 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5016 if (GET_CODE (set
) == PARALLEL
)
5019 set
= XVECEXP (real_pat
, 0, 0);
5020 for (i
= 1; i
< XVECLEN (real_pat
, 0); i
++)
5021 if (GET_CODE (XVECEXP (real_pat
, 0, i
)) != USE
5022 && GET_CODE (XVECEXP (real_pat
, 0, i
)) != CLOBBER
)
5029 if (set
&& GET_CODE (set
) != SET
)
5032 type
= get_attr_type (insn
);
5035 && set
&& REG_P (SET_DEST (set
)) && PR_REGNO_P (REGNO (SET_DEST (set
))))
5036 SET_HARD_REG_BIT (this_group
->p_reg_set
, REGNO (SET_DEST (set
)));
5038 if ((type
== TYPE_M
|| type
== TYPE_A
) && cond
&& set
5039 && REG_P (SET_DEST (set
))
5040 && GET_CODE (SET_SRC (set
)) != PLUS
5041 && GET_CODE (SET_SRC (set
)) != MINUS
5042 && (GET_CODE (SET_SRC (set
)) != ASHIFT
5043 || !shladd_operand (XEXP (SET_SRC (set
), 1), VOIDmode
))
5044 && (GET_CODE (SET_SRC (set
)) != MEM
5045 || GET_CODE (XEXP (SET_SRC (set
), 0)) != POST_MODIFY
)
5046 && GENERAL_REGNO_P (REGNO (SET_DEST (set
))))
5048 if (GET_RTX_CLASS (GET_CODE (cond
)) != '<'
5049 || ! REG_P (XEXP (cond
, 0)))
5052 if (TEST_HARD_REG_BIT (prev_group
->p_reg_set
, REGNO (XEXP (cond
, 0))))
5053 SET_HARD_REG_BIT (this_group
->gr_reg_conditionally_set
, REGNO (SET_DEST (set
)));
5055 if (for_each_rtx (&real_pat
, errata_find_address_regs
, NULL
))
5057 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5058 emit_insn_before (gen_nop (), insn
);
5059 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5061 memset (last_group
, 0, sizeof last_group
);
5065 /* Emit extra nops if they are required to work around hardware errata. */
5072 if (! TARGET_B_STEP
)
5076 memset (last_group
, 0, sizeof last_group
);
5078 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
5083 if (ia64_safe_type (insn
) == TYPE_S
)
5086 memset (last_group
+ group_idx
, 0, sizeof last_group
[group_idx
]);
5089 errata_emit_nops (insn
);
5093 /* Instruction scheduling support. */
5094 /* Describe one bundle. */
5098 /* Zero if there's no possibility of a stop in this bundle other than
5099 at the end, otherwise the position of the optional stop bit. */
5101 /* The types of the three slots. */
5102 enum attr_type t
[3];
5103 /* The pseudo op to be emitted into the assembler output. */
5107 #define NR_BUNDLES 10
5109 /* A list of all available bundles. */
5111 static const struct bundle bundle
[NR_BUNDLES
] =
5113 { 2, { TYPE_M
, TYPE_I
, TYPE_I
}, ".mii" },
5114 { 1, { TYPE_M
, TYPE_M
, TYPE_I
}, ".mmi" },
5115 { 0, { TYPE_M
, TYPE_F
, TYPE_I
}, ".mfi" },
5116 { 0, { TYPE_M
, TYPE_M
, TYPE_F
}, ".mmf" },
5117 #if NR_BUNDLES == 10
5118 { 0, { TYPE_B
, TYPE_B
, TYPE_B
}, ".bbb" },
5119 { 0, { TYPE_M
, TYPE_B
, TYPE_B
}, ".mbb" },
5121 { 0, { TYPE_M
, TYPE_I
, TYPE_B
}, ".mib" },
5122 { 0, { TYPE_M
, TYPE_M
, TYPE_B
}, ".mmb" },
5123 { 0, { TYPE_M
, TYPE_F
, TYPE_B
}, ".mfb" },
5124 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
5125 it matches an L type insn. Otherwise we'll try to generate L type
5127 { 0, { TYPE_M
, TYPE_L
, TYPE_X
}, ".mlx" }
5130 /* Describe a packet of instructions. Packets consist of two bundles that
5131 are visible to the hardware in one scheduling window. */
5135 const struct bundle
*t1
, *t2
;
5136 /* Precomputed value of the first split issue in this packet if a cycle
5137 starts at its beginning. */
5139 /* For convenience, the insn types are replicated here so we don't have
5140 to go through T1 and T2 all the time. */
5141 enum attr_type t
[6];
5144 /* An array containing all possible packets. */
5145 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
5146 static struct ia64_packet packets
[NR_PACKETS
];
5148 /* Map attr_type to a string with the name. */
5150 static const char *const type_names
[] =
5152 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
5155 /* Nonzero if we should insert stop bits into the schedule. */
5156 int ia64_final_schedule
= 0;
5158 static int itanium_split_issue
PARAMS ((const struct ia64_packet
*, int));
5159 static rtx ia64_single_set
PARAMS ((rtx
));
5160 static int insn_matches_slot
PARAMS ((const struct ia64_packet
*, enum attr_type
, int, rtx
));
5161 static void ia64_emit_insn_before
PARAMS ((rtx
, rtx
));
5162 static void maybe_rotate
PARAMS ((FILE *));
5163 static void finish_last_head
PARAMS ((FILE *, int));
5164 static void rotate_one_bundle
PARAMS ((FILE *));
5165 static void rotate_two_bundles
PARAMS ((FILE *));
5166 static void nop_cycles_until
PARAMS ((int, FILE *));
5167 static void cycle_end_fill_slots
PARAMS ((FILE *));
5168 static int packet_matches_p
PARAMS ((const struct ia64_packet
*, int, int *));
5169 static int get_split
PARAMS ((const struct ia64_packet
*, int));
5170 static int find_best_insn
PARAMS ((rtx
*, enum attr_type
*, int,
5171 const struct ia64_packet
*, int));
5172 static void find_best_packet
PARAMS ((int *, const struct ia64_packet
**,
5173 rtx
*, enum attr_type
*, int));
5174 static int itanium_reorder
PARAMS ((FILE *, rtx
*, rtx
*, int));
5175 static void dump_current_packet
PARAMS ((FILE *));
5176 static void schedule_stop
PARAMS ((FILE *));
5177 static rtx gen_nop_type
PARAMS ((enum attr_type
));
5178 static void ia64_emit_nops
PARAMS ((void));
5180 /* Map a bundle number to its pseudo-op. */
5186 return bundle
[b
].name
;
5189 /* Compute the slot which will cause a split issue in packet P if the
5190 current cycle begins at slot BEGIN. */
5193 itanium_split_issue (p
, begin
)
5194 const struct ia64_packet
*p
;
5197 int type_count
[TYPE_S
];
5203 /* Always split before and after MMF. */
5204 if (p
->t
[0] == TYPE_M
&& p
->t
[1] == TYPE_M
&& p
->t
[2] == TYPE_F
)
5206 if (p
->t
[3] == TYPE_M
&& p
->t
[4] == TYPE_M
&& p
->t
[5] == TYPE_F
)
5208 /* Always split after MBB and BBB. */
5209 if (p
->t
[1] == TYPE_B
)
5211 /* Split after first bundle in MIB BBB combination. */
5212 if (p
->t
[2] == TYPE_B
&& p
->t
[3] == TYPE_B
)
5216 memset (type_count
, 0, sizeof type_count
);
5217 for (i
= begin
; i
< split
; i
++)
5219 enum attr_type t0
= p
->t
[i
];
5220 /* An MLX bundle reserves the same units as an MFI bundle. */
5221 enum attr_type t
= (t0
== TYPE_L
? TYPE_F
5222 : t0
== TYPE_X
? TYPE_I
5225 /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and
5226 2 integer per cycle. */
5227 int max
= (t
== TYPE_B
? 3 : 2);
5228 if (type_count
[t
] == max
)
5236 /* Return the maximum number of instructions a cpu can issue. */
5244 /* Helper function - like single_set, but look inside COND_EXEC. */
5247 ia64_single_set (insn
)
5250 rtx x
= PATTERN (insn
), ret
;
5251 if (GET_CODE (x
) == COND_EXEC
)
5252 x
= COND_EXEC_CODE (x
);
5253 if (GET_CODE (x
) == SET
)
5256 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5257 Although they are not classical single set, the second set is there just
5258 to protect it from moving past FP-relative stack accesses. */
5259 switch (recog_memoized (insn
))
5261 case CODE_FOR_prologue_allocate_stack
:
5262 case CODE_FOR_epilogue_deallocate_stack
:
5263 ret
= XVECEXP (x
, 0, 0);
5267 ret
= single_set_2 (insn
, x
);
5274 /* Adjust the cost of a scheduling dependency. Return the new cost of
5275 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5278 ia64_adjust_cost (insn
, link
, dep_insn
, cost
)
5279 rtx insn
, link
, dep_insn
;
5282 enum attr_type dep_type
;
5283 enum attr_itanium_class dep_class
;
5284 enum attr_itanium_class insn_class
;
5285 rtx dep_set
, set
, src
, addr
;
5287 if (GET_CODE (PATTERN (insn
)) == CLOBBER
5288 || GET_CODE (PATTERN (insn
)) == USE
5289 || GET_CODE (PATTERN (dep_insn
)) == CLOBBER
5290 || GET_CODE (PATTERN (dep_insn
)) == USE
5291 /* @@@ Not accurate for indirect calls. */
5292 || GET_CODE (insn
) == CALL_INSN
5293 || ia64_safe_type (insn
) == TYPE_S
)
5296 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
5297 || REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
5300 dep_type
= ia64_safe_type (dep_insn
);
5301 dep_class
= ia64_safe_itanium_class (dep_insn
);
5302 insn_class
= ia64_safe_itanium_class (insn
);
5304 /* Compares that feed a conditional branch can execute in the same
5306 dep_set
= ia64_single_set (dep_insn
);
5307 set
= ia64_single_set (insn
);
5309 if (dep_type
!= TYPE_F
5311 && GET_CODE (SET_DEST (dep_set
)) == REG
5312 && PR_REG (REGNO (SET_DEST (dep_set
)))
5313 && GET_CODE (insn
) == JUMP_INSN
)
5316 if (dep_set
&& GET_CODE (SET_DEST (dep_set
)) == MEM
)
5318 /* ??? Can't find any information in the documenation about whether
5322 splits issue. Assume it doesn't. */
5326 src
= set
? SET_SRC (set
) : 0;
5330 if (GET_CODE (SET_DEST (set
)) == MEM
)
5331 addr
= XEXP (SET_DEST (set
), 0);
5332 else if (GET_CODE (SET_DEST (set
)) == SUBREG
5333 && GET_CODE (SUBREG_REG (SET_DEST (set
))) == MEM
)
5334 addr
= XEXP (SUBREG_REG (SET_DEST (set
)), 0);
5338 if (GET_CODE (addr
) == UNSPEC
&& XVECLEN (addr
, 0) > 0)
5339 addr
= XVECEXP (addr
, 0, 0);
5340 while (GET_CODE (addr
) == SUBREG
|| GET_CODE (addr
) == ZERO_EXTEND
)
5341 addr
= XEXP (addr
, 0);
5342 if (GET_CODE (addr
) == MEM
)
5343 addr
= XEXP (addr
, 0);
5349 if (addr
&& GET_CODE (addr
) == POST_MODIFY
)
5350 addr
= XEXP (addr
, 0);
5352 set
= ia64_single_set (dep_insn
);
5354 if ((dep_class
== ITANIUM_CLASS_IALU
5355 || dep_class
== ITANIUM_CLASS_ILOG
5356 || dep_class
== ITANIUM_CLASS_LD
)
5357 && (insn_class
== ITANIUM_CLASS_LD
5358 || insn_class
== ITANIUM_CLASS_ST
))
5360 if (! addr
|| ! set
)
5362 /* This isn't completely correct - an IALU that feeds an address has
5363 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5364 otherwise. Unfortunately there's no good way to describe this. */
5365 if (reg_overlap_mentioned_p (SET_DEST (set
), addr
))
5369 if ((dep_class
== ITANIUM_CLASS_IALU
5370 || dep_class
== ITANIUM_CLASS_ILOG
5371 || dep_class
== ITANIUM_CLASS_LD
)
5372 && (insn_class
== ITANIUM_CLASS_MMMUL
5373 || insn_class
== ITANIUM_CLASS_MMSHF
5374 || insn_class
== ITANIUM_CLASS_MMSHFI
))
5377 if (dep_class
== ITANIUM_CLASS_FMAC
5378 && (insn_class
== ITANIUM_CLASS_FMISC
5379 || insn_class
== ITANIUM_CLASS_FCVTFX
5380 || insn_class
== ITANIUM_CLASS_XMPY
))
5383 if ((dep_class
== ITANIUM_CLASS_FMAC
5384 || dep_class
== ITANIUM_CLASS_FMISC
5385 || dep_class
== ITANIUM_CLASS_FCVTFX
5386 || dep_class
== ITANIUM_CLASS_XMPY
)
5387 && insn_class
== ITANIUM_CLASS_STF
)
5390 /* Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4,
5391 but HP engineers say any non-MM operation. */
5392 if ((dep_class
== ITANIUM_CLASS_MMMUL
5393 || dep_class
== ITANIUM_CLASS_MMSHF
5394 || dep_class
== ITANIUM_CLASS_MMSHFI
)
5395 && insn_class
!= ITANIUM_CLASS_MMMUL
5396 && insn_class
!= ITANIUM_CLASS_MMSHF
5397 && insn_class
!= ITANIUM_CLASS_MMSHFI
)
5403 /* Describe the current state of the Itanium pipeline. */
5406 /* The first slot that is used in the current cycle. */
5408 /* The next slot to fill. */
5410 /* The packet we have selected for the current issue window. */
5411 const struct ia64_packet
*packet
;
5412 /* The position of the split issue that occurs due to issue width
5413 limitations (6 if there's no split issue). */
5415 /* Record data about the insns scheduled so far in the same issue
5416 window. The elements up to but not including FIRST_SLOT belong
5417 to the previous cycle, the ones starting with FIRST_SLOT belong
5418 to the current cycle. */
5419 enum attr_type types
[6];
5422 /* Nonzero if we decided to schedule a stop bit. */
5426 /* Temporary arrays; they have enough elements to hold all insns that
5427 can be ready at the same time while scheduling of the current block.
5428 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5429 static rtx
*sched_ready
;
5430 static enum attr_type
*sched_types
;
5432 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5436 insn_matches_slot (p
, itype
, slot
, insn
)
5437 const struct ia64_packet
*p
;
5438 enum attr_type itype
;
5442 enum attr_itanium_requires_unit0 u0
;
5443 enum attr_type stype
= p
->t
[slot
];
5447 u0
= ia64_safe_itanium_requires_unit0 (insn
);
5448 if (u0
== ITANIUM_REQUIRES_UNIT0_YES
)
5451 for (i
= sched_data
.first_slot
; i
< slot
; i
++)
5452 if (p
->t
[i
] == stype
5453 || (stype
== TYPE_F
&& p
->t
[i
] == TYPE_L
)
5454 || (stype
== TYPE_I
&& p
->t
[i
] == TYPE_X
))
5457 if (GET_CODE (insn
) == CALL_INSN
)
5459 /* Reject calls in multiway branch packets. We want to limit
5460 the number of multiway branches we generate (since the branch
5461 predictor is limited), and this seems to work fairly well.
5462 (If we didn't do this, we'd have to add another test here to
5463 force calls into the third slot of the bundle.) */
5466 if (p
->t
[1] == TYPE_B
)
5471 if (p
->t
[4] == TYPE_B
)
5479 if (itype
== TYPE_A
)
5480 return stype
== TYPE_M
|| stype
== TYPE_I
;
5484 /* Like emit_insn_before, but skip cycle_display notes.
5485 ??? When cycle display notes are implemented, update this. */
5488 ia64_emit_insn_before (insn
, before
)
5491 emit_insn_before (insn
, before
);
5494 /* When rotating a bundle out of the issue window, insert a bundle selector
5495 insn in front of it. DUMP is the scheduling dump file or NULL. START
5496 is either 0 or 3, depending on whether we want to emit a bundle selector
5497 for the first bundle or the second bundle in the current issue window.
5499 The selector insns are emitted this late because the selected packet can
5500 be changed until parts of it get rotated out. */
5503 finish_last_head (dump
, start
)
5507 const struct ia64_packet
*p
= sched_data
.packet
;
5508 const struct bundle
*b
= start
== 0 ? p
->t1
: p
->t2
;
5509 int bundle_type
= b
- bundle
;
5513 if (! ia64_final_schedule
)
5516 for (i
= start
; sched_data
.insns
[i
] == 0; i
++)
5519 insn
= sched_data
.insns
[i
];
5522 fprintf (dump
, "// Emitting template before %d: %s\n",
5523 INSN_UID (insn
), b
->name
);
5525 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type
)), insn
);
5528 /* We can't schedule more insns this cycle. Fix up the scheduling state
5529 and advance FIRST_SLOT and CUR.
5530 We have to distribute the insns that are currently found between
5531 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5532 far, they are stored successively in the fields starting at FIRST_SLOT;
5533 now they must be moved to the correct slots.
5534 DUMP is the current scheduling dump file, or NULL. */
5537 cycle_end_fill_slots (dump
)
5540 const struct ia64_packet
*packet
= sched_data
.packet
;
5542 enum attr_type tmp_types
[6];
5545 memcpy (tmp_types
, sched_data
.types
, 6 * sizeof (enum attr_type
));
5546 memcpy (tmp_insns
, sched_data
.insns
, 6 * sizeof (rtx
));
5548 for (i
= slot
= sched_data
.first_slot
; i
< sched_data
.cur
; i
++)
5550 enum attr_type t
= tmp_types
[i
];
5551 if (t
!= ia64_safe_type (tmp_insns
[i
]))
5553 while (! insn_matches_slot (packet
, t
, slot
, tmp_insns
[i
]))
5555 if (slot
> sched_data
.split
)
5558 fprintf (dump
, "// Packet needs %s, have %s\n",
5559 type_names
[packet
->t
[slot
]], type_names
[t
]);
5560 sched_data
.types
[slot
] = packet
->t
[slot
];
5561 sched_data
.insns
[slot
] = 0;
5562 sched_data
.stopbit
[slot
] = 0;
5564 /* ??? TYPE_L instructions always fill up two slots, but we don't
5565 support TYPE_L nops. */
5566 if (packet
->t
[slot
] == TYPE_L
)
5572 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5573 actual slot type later. */
5574 sched_data
.types
[slot
] = packet
->t
[slot
];
5575 sched_data
.insns
[slot
] = tmp_insns
[i
];
5576 sched_data
.stopbit
[slot
] = 0;
5579 /* TYPE_L instructions always fill up two slots. */
5582 sched_data
.types
[slot
] = packet
->t
[slot
];
5583 sched_data
.insns
[slot
] = 0;
5584 sched_data
.stopbit
[slot
] = 0;
5589 /* This isn't right - there's no need to pad out until the forced split;
5590 the CPU will automatically split if an insn isn't ready. */
5592 while (slot
< sched_data
.split
)
5594 sched_data
.types
[slot
] = packet
->t
[slot
];
5595 sched_data
.insns
[slot
] = 0;
5596 sched_data
.stopbit
[slot
] = 0;
5601 sched_data
.first_slot
= sched_data
.cur
= slot
;
5604 /* Bundle rotations, as described in the Itanium optimization manual.
5605 We can rotate either one or both bundles out of the issue window.
5606 DUMP is the current scheduling dump file, or NULL. */
5609 rotate_one_bundle (dump
)
5613 fprintf (dump
, "// Rotating one bundle.\n");
5615 finish_last_head (dump
, 0);
5616 if (sched_data
.cur
> 3)
5618 sched_data
.cur
-= 3;
5619 sched_data
.first_slot
-= 3;
5620 memmove (sched_data
.types
,
5621 sched_data
.types
+ 3,
5622 sched_data
.cur
* sizeof *sched_data
.types
);
5623 memmove (sched_data
.stopbit
,
5624 sched_data
.stopbit
+ 3,
5625 sched_data
.cur
* sizeof *sched_data
.stopbit
);
5626 memmove (sched_data
.insns
,
5627 sched_data
.insns
+ 3,
5628 sched_data
.cur
* sizeof *sched_data
.insns
);
5630 = &packets
[(sched_data
.packet
->t2
- bundle
) * NR_BUNDLES
];
5635 sched_data
.first_slot
= 0;
5640 rotate_two_bundles (dump
)
5644 fprintf (dump
, "// Rotating two bundles.\n");
5646 if (sched_data
.cur
== 0)
5649 finish_last_head (dump
, 0);
5650 if (sched_data
.cur
> 3)
5651 finish_last_head (dump
, 3);
5653 sched_data
.first_slot
= 0;
5656 /* We're beginning a new block. Initialize data structures as necessary. */
5659 ia64_sched_init (dump
, sched_verbose
, max_ready
)
5660 FILE *dump ATTRIBUTE_UNUSED
;
5661 int sched_verbose ATTRIBUTE_UNUSED
;
5664 static int initialized
= 0;
5672 for (i
= b1
= 0; b1
< NR_BUNDLES
; b1
++)
5674 const struct bundle
*t1
= bundle
+ b1
;
5675 for (b2
= 0; b2
< NR_BUNDLES
; b2
++, i
++)
5677 const struct bundle
*t2
= bundle
+ b2
;
5683 for (i
= 0; i
< NR_PACKETS
; i
++)
5686 for (j
= 0; j
< 3; j
++)
5687 packets
[i
].t
[j
] = packets
[i
].t1
->t
[j
];
5688 for (j
= 0; j
< 3; j
++)
5689 packets
[i
].t
[j
+ 3] = packets
[i
].t2
->t
[j
];
5690 packets
[i
].first_split
= itanium_split_issue (packets
+ i
, 0);
5695 init_insn_group_barriers ();
5697 memset (&sched_data
, 0, sizeof sched_data
);
5698 sched_types
= (enum attr_type
*) xmalloc (max_ready
5699 * sizeof (enum attr_type
));
5700 sched_ready
= (rtx
*) xmalloc (max_ready
* sizeof (rtx
));
5703 /* See if the packet P can match the insns we have already scheduled. Return
5704 nonzero if so. In *PSLOT, we store the first slot that is available for
5705 more instructions if we choose this packet.
5706 SPLIT holds the last slot we can use, there's a split issue after it so
5707 scheduling beyond it would cause us to use more than one cycle. */
5710 packet_matches_p (p
, split
, pslot
)
5711 const struct ia64_packet
*p
;
5715 int filled
= sched_data
.cur
;
5716 int first
= sched_data
.first_slot
;
5719 /* First, check if the first of the two bundles must be a specific one (due
5721 if (first
> 0 && sched_data
.stopbit
[0] && p
->t1
->possible_stop
!= 1)
5723 if (first
> 1 && sched_data
.stopbit
[1] && p
->t1
->possible_stop
!= 2)
5726 for (i
= 0; i
< first
; i
++)
5727 if (! insn_matches_slot (p
, sched_data
.types
[i
], i
,
5728 sched_data
.insns
[i
]))
5730 for (i
= slot
= first
; i
< filled
; i
++)
5732 while (slot
< split
)
5734 if (insn_matches_slot (p
, sched_data
.types
[i
], slot
,
5735 sched_data
.insns
[i
]))
5749 /* A frontend for itanium_split_issue. For a packet P and a slot
5750 number FIRST that describes the start of the current clock cycle,
5751 return the slot number of the first split issue. This function
5752 uses the cached number found in P if possible. */
5755 get_split (p
, first
)
5756 const struct ia64_packet
*p
;
5760 return p
->first_split
;
5761 return itanium_split_issue (p
, first
);
5764 /* Given N_READY insns in the array READY, whose types are found in the
5765 corresponding array TYPES, return the insn that is best suited to be
5766 scheduled in slot SLOT of packet P. */
5769 find_best_insn (ready
, types
, n_ready
, p
, slot
)
5771 enum attr_type
*types
;
5773 const struct ia64_packet
*p
;
5778 while (n_ready
-- > 0)
5780 rtx insn
= ready
[n_ready
];
5783 if (best
>= 0 && INSN_PRIORITY (ready
[n_ready
]) < best_pri
)
5785 /* If we have equally good insns, one of which has a stricter
5786 slot requirement, prefer the one with the stricter requirement. */
5787 if (best
>= 0 && types
[n_ready
] == TYPE_A
)
5789 if (insn_matches_slot (p
, types
[n_ready
], slot
, insn
))
5792 best_pri
= INSN_PRIORITY (ready
[best
]);
5794 /* If there's no way we could get a stricter requirement, stop
5796 if (types
[n_ready
] != TYPE_A
5797 && ia64_safe_itanium_requires_unit0 (ready
[n_ready
]))
5805 /* Select the best packet to use given the current scheduler state and the
5807 READY is an array holding N_READY ready insns; TYPES is a corresponding
5808 array that holds their types. Store the best packet in *PPACKET and the
5809 number of insns that can be scheduled in the current cycle in *PBEST. */
5812 find_best_packet (pbest
, ppacket
, ready
, types
, n_ready
)
5814 const struct ia64_packet
**ppacket
;
5816 enum attr_type
*types
;
5819 int first
= sched_data
.first_slot
;
5822 const struct ia64_packet
*best_packet
= NULL
;
5825 for (i
= 0; i
< NR_PACKETS
; i
++)
5827 const struct ia64_packet
*p
= packets
+ i
;
5829 int split
= get_split (p
, first
);
5831 int first_slot
, last_slot
;
5834 if (! packet_matches_p (p
, split
, &first_slot
))
5837 memcpy (sched_ready
, ready
, n_ready
* sizeof (rtx
));
5841 for (slot
= first_slot
; slot
< split
; slot
++)
5845 /* Disallow a degenerate case where the first bundle doesn't
5846 contain anything but NOPs! */
5847 if (first_slot
== 0 && win
== 0 && slot
== 3)
5853 insn_nr
= find_best_insn (sched_ready
, types
, n_ready
, p
, slot
);
5856 sched_ready
[insn_nr
] = 0;
5860 else if (p
->t
[slot
] == TYPE_B
)
5863 /* We must disallow MBB/BBB packets if any of their B slots would be
5864 filled with nops. */
5867 if (p
->t
[1] == TYPE_B
&& (b_nops
|| last_slot
< 2))
5872 if (p
->t
[4] == TYPE_B
&& (b_nops
|| last_slot
< 5))
5877 || (win
== best
&& last_slot
< lowest_end
))
5880 lowest_end
= last_slot
;
5885 *ppacket
= best_packet
;
5888 /* Reorder the ready list so that the insns that can be issued in this cycle
5889 are found in the correct order at the end of the list.
5890 DUMP is the scheduling dump file, or NULL. READY points to the start,
5891 E_READY to the end of the ready list. MAY_FAIL determines what should be
5892 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5893 otherwise we return 0.
5894 Return 1 if any insns can be scheduled in this cycle. */
5897 itanium_reorder (dump
, ready
, e_ready
, may_fail
)
5903 const struct ia64_packet
*best_packet
;
5904 int n_ready
= e_ready
- ready
;
5905 int first
= sched_data
.first_slot
;
5906 int i
, best
, best_split
, filled
;
5908 for (i
= 0; i
< n_ready
; i
++)
5909 sched_types
[i
] = ia64_safe_type (ready
[i
]);
5911 find_best_packet (&best
, &best_packet
, ready
, sched_types
, n_ready
);
5922 fprintf (dump
, "// Selected bundles: %s %s (%d insns)\n",
5923 best_packet
->t1
->name
,
5924 best_packet
->t2
? best_packet
->t2
->name
: NULL
, best
);
5927 best_split
= itanium_split_issue (best_packet
, first
);
5928 packet_matches_p (best_packet
, best_split
, &filled
);
5930 for (i
= filled
; i
< best_split
; i
++)
5934 insn_nr
= find_best_insn (ready
, sched_types
, n_ready
, best_packet
, i
);
5937 rtx insn
= ready
[insn_nr
];
5938 memmove (ready
+ insn_nr
, ready
+ insn_nr
+ 1,
5939 (n_ready
- insn_nr
- 1) * sizeof (rtx
));
5940 memmove (sched_types
+ insn_nr
, sched_types
+ insn_nr
+ 1,
5941 (n_ready
- insn_nr
- 1) * sizeof (enum attr_type
));
5942 ready
[--n_ready
] = insn
;
5946 sched_data
.packet
= best_packet
;
5947 sched_data
.split
= best_split
;
5951 /* Dump information about the current scheduling state to file DUMP. */
5954 dump_current_packet (dump
)
5958 fprintf (dump
, "// %d slots filled:", sched_data
.cur
);
5959 for (i
= 0; i
< sched_data
.first_slot
; i
++)
5961 rtx insn
= sched_data
.insns
[i
];
5962 fprintf (dump
, " %s", type_names
[sched_data
.types
[i
]]);
5964 fprintf (dump
, "/%s", type_names
[ia64_safe_type (insn
)]);
5965 if (sched_data
.stopbit
[i
])
5966 fprintf (dump
, " ;;");
5968 fprintf (dump
, " :::");
5969 for (i
= sched_data
.first_slot
; i
< sched_data
.cur
; i
++)
5971 rtx insn
= sched_data
.insns
[i
];
5972 enum attr_type t
= ia64_safe_type (insn
);
5973 fprintf (dump
, " (%d) %s", INSN_UID (insn
), type_names
[t
]);
5975 fprintf (dump
, "\n");
5978 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
5982 schedule_stop (dump
)
5985 const struct ia64_packet
*best
= sched_data
.packet
;
5990 fprintf (dump
, "// Stop bit, cur = %d.\n", sched_data
.cur
);
5992 if (sched_data
.cur
== 0)
5995 fprintf (dump
, "// At start of bundle, so nothing to do.\n");
5997 rotate_two_bundles (NULL
);
6001 for (i
= -1; i
< NR_PACKETS
; i
++)
6003 /* This is a slight hack to give the current packet the first chance.
6004 This is done to avoid e.g. switching from MIB to MBB bundles. */
6005 const struct ia64_packet
*p
= (i
>= 0 ? packets
+ i
: sched_data
.packet
);
6006 int split
= get_split (p
, sched_data
.first_slot
);
6007 const struct bundle
*compare
;
6010 if (! packet_matches_p (p
, split
, &next
))
6013 compare
= next
> 3 ? p
->t2
: p
->t1
;
6016 if (compare
->possible_stop
)
6017 stoppos
= compare
->possible_stop
;
6021 if (stoppos
< next
|| stoppos
>= best_stop
)
6023 if (compare
->possible_stop
== 0)
6025 stoppos
= (next
> 3 ? 6 : 3);
6027 if (stoppos
< next
|| stoppos
>= best_stop
)
6031 fprintf (dump
, "// switching from %s %s to %s %s (stop at %d)\n",
6032 best
->t1
->name
, best
->t2
->name
, p
->t1
->name
, p
->t2
->name
,
6035 best_stop
= stoppos
;
6039 sched_data
.packet
= best
;
6040 cycle_end_fill_slots (dump
);
6041 while (sched_data
.cur
< best_stop
)
6043 sched_data
.types
[sched_data
.cur
] = best
->t
[sched_data
.cur
];
6044 sched_data
.insns
[sched_data
.cur
] = 0;
6045 sched_data
.stopbit
[sched_data
.cur
] = 0;
6048 sched_data
.stopbit
[sched_data
.cur
- 1] = 1;
6049 sched_data
.first_slot
= best_stop
;
6052 dump_current_packet (dump
);
6055 /* If necessary, perform one or two rotations on the scheduling state.
6056 This should only be called if we are starting a new cycle. */
6062 cycle_end_fill_slots (dump
);
6063 if (sched_data
.cur
== 6)
6064 rotate_two_bundles (dump
);
6065 else if (sched_data
.cur
>= 3)
6066 rotate_one_bundle (dump
);
6067 sched_data
.first_slot
= sched_data
.cur
;
6070 /* The clock cycle when ia64_sched_reorder was last called. */
6071 static int prev_cycle
;
6073 /* The first insn scheduled in the previous cycle. This is the saved
6074 value of sched_data.first_slot. */
6075 static int prev_first
;
6077 /* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
6078 pad out the delay between MM (shifts, etc.) and integer operations. */
6081 nop_cycles_until (clock_var
, dump
)
6085 int prev_clock
= prev_cycle
;
6086 int cycles_left
= clock_var
- prev_clock
;
6087 bool did_stop
= false;
6089 /* Finish the previous cycle; pad it out with NOPs. */
6090 if (sched_data
.cur
== 3)
6092 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6094 maybe_rotate (dump
);
6096 else if (sched_data
.cur
> 0)
6099 int split
= itanium_split_issue (sched_data
.packet
, prev_first
);
6101 if (sched_data
.cur
< 3 && split
> 3)
6107 if (split
> sched_data
.cur
)
6110 for (i
= sched_data
.cur
; i
< split
; i
++)
6112 rtx t
= sched_emit_insn (gen_nop_type (sched_data
.packet
->t
[i
]));
6113 sched_data
.types
[i
] = sched_data
.packet
->t
[i
];
6114 sched_data
.insns
[i
] = t
;
6115 sched_data
.stopbit
[i
] = 0;
6117 sched_data
.cur
= split
;
6120 if (! need_stop
&& sched_data
.cur
> 0 && sched_data
.cur
< 6
6124 for (i
= sched_data
.cur
; i
< 6; i
++)
6126 rtx t
= sched_emit_insn (gen_nop_type (sched_data
.packet
->t
[i
]));
6127 sched_data
.types
[i
] = sched_data
.packet
->t
[i
];
6128 sched_data
.insns
[i
] = t
;
6129 sched_data
.stopbit
[i
] = 0;
6136 if (need_stop
|| sched_data
.cur
== 6)
6138 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6141 maybe_rotate (dump
);
6145 while (cycles_left
> 0)
6147 sched_emit_insn (gen_bundle_selector (GEN_INT (0)));
6148 sched_emit_insn (gen_nop_type (TYPE_M
));
6149 sched_emit_insn (gen_nop_type (TYPE_I
));
6150 if (cycles_left
> 1)
6152 sched_emit_insn (gen_insn_group_barrier (GEN_INT (2)));
6155 sched_emit_insn (gen_nop_type (TYPE_I
));
6156 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6162 init_insn_group_barriers ();
6165 /* We are about to being issuing insns for this clock cycle.
6166 Override the default sort algorithm to better slot instructions. */
6169 ia64_internal_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
,
6170 reorder_type
, clock_var
)
6171 FILE *dump ATTRIBUTE_UNUSED
;
6172 int sched_verbose ATTRIBUTE_UNUSED
;
6175 int reorder_type
, clock_var
;
6178 int n_ready
= *pn_ready
;
6179 rtx
*e_ready
= ready
+ n_ready
;
6184 fprintf (dump
, "// ia64_sched_reorder (type %d):\n", reorder_type
);
6185 dump_current_packet (dump
);
6188 /* Work around the pipeline flush that will occurr if the results of
6189 an MM instruction are accessed before the result is ready. Intel
6190 documentation says this only happens with IALU, ISHF, ILOG, LD,
6191 and ST consumers, but experimental evidence shows that *any* non-MM
6192 type instruction will incurr the flush. */
6193 if (reorder_type
== 0 && clock_var
> 0 && ia64_final_schedule
)
6195 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
6197 rtx insn
= *insnp
, link
;
6198 enum attr_itanium_class t
= ia64_safe_itanium_class (insn
);
6200 if (t
== ITANIUM_CLASS_MMMUL
6201 || t
== ITANIUM_CLASS_MMSHF
6202 || t
== ITANIUM_CLASS_MMSHFI
)
6205 for (link
= LOG_LINKS (insn
); link
; link
= XEXP (link
, 1))
6206 if (REG_NOTE_KIND (link
) == 0)
6208 rtx other
= XEXP (link
, 0);
6209 enum attr_itanium_class t0
= ia64_safe_itanium_class (other
);
6210 if (t0
== ITANIUM_CLASS_MMSHF
|| t0
== ITANIUM_CLASS_MMMUL
)
6212 nop_cycles_until (clock_var
, sched_verbose
? dump
: NULL
);
6220 prev_first
= sched_data
.first_slot
;
6221 prev_cycle
= clock_var
;
6223 if (reorder_type
== 0)
6224 maybe_rotate (sched_verbose
? dump
: NULL
);
6226 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6228 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
6229 if (insnp
< e_ready
)
6232 enum attr_type t
= ia64_safe_type (insn
);
6233 if (t
== TYPE_UNKNOWN
)
6235 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6236 || asm_noperands (PATTERN (insn
)) >= 0)
6238 rtx lowest
= ready
[n_asms
];
6239 ready
[n_asms
] = insn
;
6245 rtx highest
= ready
[n_ready
- 1];
6246 ready
[n_ready
- 1] = insn
;
6248 if (ia64_final_schedule
&& group_barrier_needed_p (insn
))
6250 schedule_stop (sched_verbose
? dump
: NULL
);
6251 sched_data
.last_was_stop
= 1;
6252 maybe_rotate (sched_verbose
? dump
: NULL
);
6259 if (n_asms
< n_ready
)
6261 /* Some normal insns to process. Skip the asms. */
6265 else if (n_ready
> 0)
6267 /* Only asm insns left. */
6268 if (ia64_final_schedule
&& group_barrier_needed_p (ready
[n_ready
- 1]))
6270 schedule_stop (sched_verbose
? dump
: NULL
);
6271 sched_data
.last_was_stop
= 1;
6272 maybe_rotate (sched_verbose
? dump
: NULL
);
6274 cycle_end_fill_slots (sched_verbose
? dump
: NULL
);
6278 if (ia64_final_schedule
)
6280 int nr_need_stop
= 0;
6282 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
6283 if (safe_group_barrier_needed_p (*insnp
))
6286 /* Schedule a stop bit if
6287 - all insns require a stop bit, or
6288 - we are starting a new cycle and _any_ insns require a stop bit.
6289 The reason for the latter is that if our schedule is accurate, then
6290 the additional stop won't decrease performance at this point (since
6291 there's a split issue at this point anyway), but it gives us more
6292 freedom when scheduling the currently ready insns. */
6293 if ((reorder_type
== 0 && nr_need_stop
)
6294 || (reorder_type
== 1 && n_ready
== nr_need_stop
))
6296 schedule_stop (sched_verbose
? dump
: NULL
);
6297 sched_data
.last_was_stop
= 1;
6298 maybe_rotate (sched_verbose
? dump
: NULL
);
6299 if (reorder_type
== 1)
6306 /* Move down everything that needs a stop bit, preserving relative
6308 while (insnp
-- > ready
+ deleted
)
6309 while (insnp
>= ready
+ deleted
)
6312 if (! safe_group_barrier_needed_p (insn
))
6314 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
6320 if (deleted
!= nr_need_stop
)
6325 return itanium_reorder (sched_verbose
? dump
: NULL
,
6326 ready
, e_ready
, reorder_type
== 1);
6330 ia64_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
, clock_var
)
6337 return ia64_internal_sched_reorder (dump
, sched_verbose
, ready
,
6338 pn_ready
, 0, clock_var
);
6341 /* Like ia64_sched_reorder, but called after issuing each insn.
6342 Override the default sort algorithm to better slot instructions. */
6345 ia64_sched_reorder2 (dump
, sched_verbose
, ready
, pn_ready
, clock_var
)
6346 FILE *dump ATTRIBUTE_UNUSED
;
6347 int sched_verbose ATTRIBUTE_UNUSED
;
6352 if (sched_data
.last_was_stop
)
6355 /* Detect one special case and try to optimize it.
6356 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6357 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6358 if (sched_data
.first_slot
== 1
6359 && sched_data
.stopbit
[0]
6360 && ((sched_data
.cur
== 4
6361 && (sched_data
.types
[1] == TYPE_M
|| sched_data
.types
[1] == TYPE_A
)
6362 && (sched_data
.types
[2] == TYPE_I
|| sched_data
.types
[2] == TYPE_A
)
6363 && (sched_data
.types
[3] != TYPE_M
&& sched_data
.types
[3] != TYPE_A
))
6364 || (sched_data
.cur
== 3
6365 && (sched_data
.types
[1] == TYPE_M
6366 || sched_data
.types
[1] == TYPE_A
)
6367 && (sched_data
.types
[2] != TYPE_M
6368 && sched_data
.types
[2] != TYPE_I
6369 && sched_data
.types
[2] != TYPE_A
))))
6373 rtx stop
= sched_data
.insns
[1];
6375 /* Search backward for the stop bit that must be there. */
6380 stop
= PREV_INSN (stop
);
6381 if (GET_CODE (stop
) != INSN
)
6383 insn_code
= recog_memoized (stop
);
6385 /* Ignore .pred.rel.mutex.
6387 ??? Update this to ignore cycle display notes too
6388 ??? once those are implemented */
6389 if (insn_code
== CODE_FOR_pred_rel_mutex
6390 || insn_code
== CODE_FOR_prologue_use
)
6393 if (insn_code
== CODE_FOR_insn_group_barrier
)
6398 /* Adjust the stop bit's slot selector. */
6399 if (INTVAL (XVECEXP (PATTERN (stop
), 0, 0)) != 1)
6401 XVECEXP (PATTERN (stop
), 0, 0) = GEN_INT (3);
6403 sched_data
.stopbit
[0] = 0;
6404 sched_data
.stopbit
[2] = 1;
6406 sched_data
.types
[5] = sched_data
.types
[3];
6407 sched_data
.types
[4] = sched_data
.types
[2];
6408 sched_data
.types
[3] = sched_data
.types
[1];
6409 sched_data
.insns
[5] = sched_data
.insns
[3];
6410 sched_data
.insns
[4] = sched_data
.insns
[2];
6411 sched_data
.insns
[3] = sched_data
.insns
[1];
6412 sched_data
.stopbit
[5] = sched_data
.stopbit
[4] = sched_data
.stopbit
[3] = 0;
6413 sched_data
.cur
+= 2;
6414 sched_data
.first_slot
= 3;
6415 for (i
= 0; i
< NR_PACKETS
; i
++)
6417 const struct ia64_packet
*p
= packets
+ i
;
6418 if (p
->t
[0] == TYPE_M
&& p
->t
[1] == TYPE_F
&& p
->t
[2] == TYPE_B
)
6420 sched_data
.packet
= p
;
6424 rotate_one_bundle (sched_verbose
? dump
: NULL
);
6427 for (i
= 0; i
< NR_PACKETS
; i
++)
6429 const struct ia64_packet
*p
= packets
+ i
;
6430 int split
= get_split (p
, sched_data
.first_slot
);
6433 /* Disallow multiway branches here. */
6434 if (p
->t
[1] == TYPE_B
)
6437 if (packet_matches_p (p
, split
, &next
) && next
< best
)
6440 sched_data
.packet
= p
;
6441 sched_data
.split
= split
;
6450 int more
= ia64_internal_sched_reorder (dump
, sched_verbose
,
6455 /* Did we schedule a stop? If so, finish this cycle. */
6456 if (sched_data
.cur
== sched_data
.first_slot
)
6461 fprintf (dump
, "// Can't issue more this cycle; updating type array.\n");
6463 cycle_end_fill_slots (sched_verbose
? dump
: NULL
);
6465 dump_current_packet (dump
);
6469 /* We are about to issue INSN. Return the number of insns left on the
6470 ready queue that can be issued this cycle. */
6473 ia64_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
6477 int can_issue_more ATTRIBUTE_UNUSED
;
6479 enum attr_type t
= ia64_safe_type (insn
);
6481 if (sched_data
.last_was_stop
)
6483 int t
= sched_data
.first_slot
;
6486 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t
)), insn
);
6487 init_insn_group_barriers ();
6488 sched_data
.last_was_stop
= 0;
6491 if (t
== TYPE_UNKNOWN
)
6494 fprintf (dump
, "// Ignoring type %s\n", type_names
[t
]);
6495 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6496 || asm_noperands (PATTERN (insn
)) >= 0)
6498 /* This must be some kind of asm. Clear the scheduling state. */
6499 rotate_two_bundles (sched_verbose
? dump
: NULL
);
6500 if (ia64_final_schedule
)
6501 group_barrier_needed_p (insn
);
6506 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6507 important state info. Don't delete this test. */
6508 if (ia64_final_schedule
6509 && group_barrier_needed_p (insn
))
6512 sched_data
.stopbit
[sched_data
.cur
] = 0;
6513 sched_data
.insns
[sched_data
.cur
] = insn
;
6514 sched_data
.types
[sched_data
.cur
] = t
;
6518 fprintf (dump
, "// Scheduling insn %d of type %s\n",
6519 INSN_UID (insn
), type_names
[t
]);
6521 if (GET_CODE (insn
) == CALL_INSN
&& ia64_final_schedule
)
6523 schedule_stop (sched_verbose
? dump
: NULL
);
6524 sched_data
.last_was_stop
= 1;
6530 /* Free data allocated by ia64_sched_init. */
6533 ia64_sched_finish (dump
, sched_verbose
)
6538 fprintf (dump
, "// Finishing schedule.\n");
6539 rotate_two_bundles (NULL
);
6544 /* Emit pseudo-ops for the assembler to describe predicate relations.
6545 At present this assumes that we only consider predicate pairs to
6546 be mutex, and that the assembler can deduce proper values from
6547 straight-line code. */
6550 emit_predicate_relation_info ()
6554 for (i
= n_basic_blocks
- 1; i
>= 0; --i
)
6556 basic_block bb
= BASIC_BLOCK (i
);
6558 rtx head
= bb
->head
;
6560 /* We only need such notes at code labels. */
6561 if (GET_CODE (head
) != CODE_LABEL
)
6563 if (GET_CODE (NEXT_INSN (head
)) == NOTE
6564 && NOTE_LINE_NUMBER (NEXT_INSN (head
)) == NOTE_INSN_BASIC_BLOCK
)
6565 head
= NEXT_INSN (head
);
6567 for (r
= PR_REG (0); r
< PR_REG (64); r
+= 2)
6568 if (REGNO_REG_SET_P (bb
->global_live_at_start
, r
))
6570 rtx p
= gen_rtx_REG (BImode
, r
);
6571 rtx n
= emit_insn_after (gen_pred_rel_mutex (p
), head
);
6572 if (head
== bb
->end
)
6578 /* Look for conditional calls that do not return, and protect predicate
6579 relations around them. Otherwise the assembler will assume the call
6580 returns, and complain about uses of call-clobbered predicates after
6582 for (i
= n_basic_blocks
- 1; i
>= 0; --i
)
6584 basic_block bb
= BASIC_BLOCK (i
);
6585 rtx insn
= bb
->head
;
6589 if (GET_CODE (insn
) == CALL_INSN
6590 && GET_CODE (PATTERN (insn
)) == COND_EXEC
6591 && find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
6593 rtx b
= emit_insn_before (gen_safe_across_calls_all (), insn
);
6594 rtx a
= emit_insn_after (gen_safe_across_calls_normal (), insn
);
6595 if (bb
->head
== insn
)
6597 if (bb
->end
== insn
)
6601 if (insn
== bb
->end
)
6603 insn
= NEXT_INSN (insn
);
6608 /* Generate a NOP instruction of type T. We will never generate L type
6618 return gen_nop_m ();
6620 return gen_nop_i ();
6622 return gen_nop_b ();
6624 return gen_nop_f ();
6626 return gen_nop_x ();
6632 /* After the last scheduling pass, fill in NOPs. It's easier to do this
6633 here than while scheduling. */
6639 const struct bundle
*b
= 0;
6642 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6646 pat
= INSN_P (insn
) ? PATTERN (insn
) : const0_rtx
;
6647 if (GET_CODE (pat
) == USE
|| GET_CODE (pat
) == CLOBBER
)
6649 if ((GET_CODE (pat
) == UNSPEC
&& XINT (pat
, 1) == 22)
6650 || GET_CODE (insn
) == CODE_LABEL
)
6653 while (bundle_pos
< 3)
6655 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6658 if (GET_CODE (insn
) != CODE_LABEL
)
6659 b
= bundle
+ INTVAL (XVECEXP (pat
, 0, 0));
6665 else if (GET_CODE (pat
) == UNSPEC_VOLATILE
&& XINT (pat
, 1) == 2)
6667 int t
= INTVAL (XVECEXP (pat
, 0, 0));
6669 while (bundle_pos
< t
)
6671 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6677 if (bundle_pos
== 3)
6680 if (b
&& INSN_P (insn
))
6682 t
= ia64_safe_type (insn
);
6683 if (asm_noperands (PATTERN (insn
)) >= 0
6684 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)
6686 while (bundle_pos
< 3)
6688 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6694 if (t
== TYPE_UNKNOWN
)
6696 while (bundle_pos
< 3)
6698 if (t
== b
->t
[bundle_pos
]
6699 || (t
== TYPE_A
&& (b
->t
[bundle_pos
] == TYPE_M
6700 || b
->t
[bundle_pos
] == TYPE_I
)))
6703 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6712 /* Perform machine dependent operations on the rtl chain INSNS. */
6718 /* If optimizing, we'll have split before scheduling. */
6720 split_all_insns_noflow ();
6722 /* We are freeing block_for_insn in the toplev to keep compatibility
6723 with old MDEP_REORGS that are not CFG based. Recompute it now. */
6724 compute_bb_for_insn (get_max_uid ());
6725 /* update_life_info_in_dirty_blocks should be enought here. */
6726 life_analysis (insns
, NULL
, PROP_DEATH_NOTES
);
6728 if (ia64_flag_schedule_insns2
)
6730 timevar_push (TV_SCHED2
);
6731 ia64_final_schedule
= 1;
6732 schedule_ebbs (rtl_dump_file
);
6733 ia64_final_schedule
= 0;
6734 timevar_pop (TV_SCHED2
);
6736 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6737 place as they were during scheduling. */
6738 emit_insn_group_barriers (rtl_dump_file
, insns
);
6742 emit_all_insn_group_barriers (rtl_dump_file
, insns
);
6744 /* A call must not be the last instruction in a function, so that the
6745 return address is still within the function, so that unwinding works
6746 properly. Note that IA-64 differs from dwarf2 on this point. */
6747 if (flag_unwind_tables
|| (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
6752 insn
= get_last_insn ();
6753 if (! INSN_P (insn
))
6754 insn
= prev_active_insn (insn
);
6755 if (GET_CODE (insn
) == INSN
6756 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
6757 && XINT (PATTERN (insn
), 1) == 2)
6760 insn
= prev_active_insn (insn
);
6762 if (GET_CODE (insn
) == CALL_INSN
)
6765 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6766 emit_insn (gen_break_f ());
6767 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6772 emit_predicate_relation_info ();
6775 /* Return true if REGNO is used by the epilogue. */
6778 ia64_epilogue_uses (regno
)
6784 /* When a function makes a call through a function descriptor, we
6785 will write a (potentially) new value to "gp". After returning
6786 from such a call, we need to make sure the function restores the
6787 original gp-value, even if the function itself does not use the
6789 return (TARGET_CONST_GP
&& !(TARGET_AUTO_PIC
|| TARGET_NO_PIC
));
6791 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
6792 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
6793 /* For functions defined with the syscall_linkage attribute, all
6794 input registers are marked as live at all function exits. This
6795 prevents the register allocator from using the input registers,
6796 which in turn makes it possible to restart a system call after
6797 an interrupt without having to save/restore the input registers.
6798 This also prevents kernel data from leaking to application code. */
6799 return lookup_attribute ("syscall_linkage",
6800 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))) != NULL
;
6803 /* Conditional return patterns can't represent the use of `b0' as
6804 the return address, so we force the value live this way. */
6808 /* Likewise for ar.pfs, which is used by br.ret. */
6816 /* Return true if REGNO is used by the frame unwinder. */
6819 ia64_eh_uses (regno
)
6822 if (! reload_completed
)
6825 if (current_frame_info
.reg_save_b0
6826 && regno
== current_frame_info
.reg_save_b0
)
6828 if (current_frame_info
.reg_save_pr
6829 && regno
== current_frame_info
.reg_save_pr
)
6831 if (current_frame_info
.reg_save_ar_pfs
6832 && regno
== current_frame_info
.reg_save_ar_pfs
)
6834 if (current_frame_info
.reg_save_ar_unat
6835 && regno
== current_frame_info
.reg_save_ar_unat
)
6837 if (current_frame_info
.reg_save_ar_lc
6838 && regno
== current_frame_info
.reg_save_ar_lc
)
6844 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6846 We add @ to the name if this goes in small data/bss. We can only put
6847 a variable in small data/bss if it is defined in this module or a module
6848 that we are statically linked with. We can't check the second condition,
6849 but TREE_STATIC gives us the first one. */
6851 /* ??? If we had IPA, we could check the second condition. We could support
6852 programmer added section attributes if the variable is not defined in this
6855 /* ??? See the v850 port for a cleaner way to do this. */
6857 /* ??? We could also support own long data here. Generating movl/add/ld8
6858 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6859 code faster because there is one less load. This also includes incomplete
6860 types which can't go in sdata/sbss. */
6862 /* ??? See select_section. We must put short own readonly variables in
6863 sdata/sbss instead of the more natural rodata, because we can't perform
6864 the DECL_READONLY_SECTION test here. */
6866 extern struct obstack
* saveable_obstack
;
6869 ia64_encode_section_info (decl
, first
)
6871 int first ATTRIBUTE_UNUSED
;
6873 const char *symbol_str
;
6875 if (TREE_CODE (decl
) == FUNCTION_DECL
)
6877 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl
), 0)) = 1;
6881 /* Careful not to prod global register variables. */
6882 if (TREE_CODE (decl
) != VAR_DECL
6883 || GET_CODE (DECL_RTL (decl
)) != MEM
6884 || GET_CODE (XEXP (DECL_RTL (decl
), 0)) != SYMBOL_REF
)
6887 symbol_str
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
6889 /* We assume that -fpic is used only to create a shared library (dso).
6890 With -fpic, no global data can ever be sdata.
6891 Without -fpic, global common uninitialized data can never be sdata, since
6892 it can unify with a real definition in a dso. */
6893 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
6894 to access them. The linker may then be able to do linker relaxation to
6895 optimize references to them. Currently sdata implies use of gprel. */
6896 /* We need the DECL_EXTERNAL check for C++. static class data members get
6897 both TREE_STATIC and DECL_EXTERNAL set, to indicate that they are
6898 statically allocated, but the space is allocated somewhere else. Such
6899 decls can not be own data. */
6900 if (! TARGET_NO_SDATA
6901 && ((TREE_STATIC (decl
) && ! DECL_EXTERNAL (decl
)
6902 && ! (DECL_ONE_ONLY (decl
) || DECL_WEAK (decl
))
6903 && ! (TREE_PUBLIC (decl
)
6905 || (DECL_COMMON (decl
)
6906 && (DECL_INITIAL (decl
) == 0
6907 || DECL_INITIAL (decl
) == error_mark_node
)))))
6908 || MODULE_LOCAL_P (decl
))
6909 /* Either the variable must be declared without a section attribute,
6910 or the section must be sdata or sbss. */
6911 && (DECL_SECTION_NAME (decl
) == 0
6912 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl
)),
6914 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl
)),
6917 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (decl
));
6919 /* If the variable has already been defined in the output file, then it
6920 is too late to put it in sdata if it wasn't put there in the first
6921 place. The test is here rather than above, because if it is already
6922 in sdata, then it can stay there. */
6924 if (TREE_ASM_WRITTEN (decl
))
6927 /* If this is an incomplete type with size 0, then we can't put it in
6928 sdata because it might be too big when completed.
6929 Objects bigger than threshold should have SDATA_NAME_FLAG_CHAR
6930 added if they are in .sdata or .sbss explicitely. */
6932 && size
<= (HOST_WIDE_INT
) ia64_section_threshold
)
6933 || DECL_SECTION_NAME (decl
))
6934 && symbol_str
[0] != SDATA_NAME_FLAG_CHAR
)
6936 size_t len
= strlen (symbol_str
);
6937 char *newstr
= alloca (len
+ 1);
6940 *newstr
= SDATA_NAME_FLAG_CHAR
;
6941 memcpy (newstr
+ 1, symbol_str
, len
+ 1);
6943 string
= ggc_alloc_string (newstr
, len
+ 1);
6944 XSTR (XEXP (DECL_RTL (decl
), 0), 0) = string
;
6947 /* This decl is marked as being in small data/bss but it shouldn't
6948 be; one likely explanation for this is that the decl has been
6949 moved into a different section from the one it was in when
6950 ENCODE_SECTION_INFO was first called. Remove the '@'. */
6951 else if (symbol_str
[0] == SDATA_NAME_FLAG_CHAR
)
6953 XSTR (XEXP (DECL_RTL (decl
), 0), 0)
6954 = ggc_strdup (symbol_str
+ 1);
6958 /* Output assembly directives for prologue regions. */
6960 /* The current basic block number. */
6962 static int block_num
;
6964 /* True if we need a copy_state command at the start of the next block. */
6966 static int need_copy_state
;
6968 /* The function emits unwind directives for the start of an epilogue. */
6973 /* If this isn't the last block of the function, then we need to label the
6974 current state, and copy it back in at the start of the next block. */
6976 if (block_num
!= n_basic_blocks
- 1)
6978 fprintf (asm_out_file
, "\t.label_state 1\n");
6979 need_copy_state
= 1;
6982 fprintf (asm_out_file
, "\t.restore sp\n");
6985 /* This function processes a SET pattern looking for specific patterns
6986 which result in emitting an assembly directive required for unwinding. */
6989 process_set (asm_out_file
, pat
)
6993 rtx src
= SET_SRC (pat
);
6994 rtx dest
= SET_DEST (pat
);
6995 int src_regno
, dest_regno
;
6997 /* Look for the ALLOC insn. */
6998 if (GET_CODE (src
) == UNSPEC_VOLATILE
6999 && XINT (src
, 1) == 0
7000 && GET_CODE (dest
) == REG
)
7002 dest_regno
= REGNO (dest
);
7004 /* If this isn't the final destination for ar.pfs, the alloc
7005 shouldn't have been marked frame related. */
7006 if (dest_regno
!= current_frame_info
.reg_save_ar_pfs
)
7009 fprintf (asm_out_file
, "\t.save ar.pfs, r%d\n",
7010 ia64_dbx_register_number (dest_regno
));
7014 /* Look for SP = .... */
7015 if (GET_CODE (dest
) == REG
&& REGNO (dest
) == STACK_POINTER_REGNUM
)
7017 if (GET_CODE (src
) == PLUS
)
7019 rtx op0
= XEXP (src
, 0);
7020 rtx op1
= XEXP (src
, 1);
7021 if (op0
== dest
&& GET_CODE (op1
) == CONST_INT
)
7023 if (INTVAL (op1
) < 0)
7025 fputs ("\t.fframe ", asm_out_file
);
7026 fprintf (asm_out_file
, HOST_WIDE_INT_PRINT_DEC
,
7028 fputc ('\n', asm_out_file
);
7031 process_epilogue ();
7036 else if (GET_CODE (src
) == REG
7037 && REGNO (src
) == HARD_FRAME_POINTER_REGNUM
)
7038 process_epilogue ();
7045 /* Register move we need to look at. */
7046 if (GET_CODE (dest
) == REG
&& GET_CODE (src
) == REG
)
7048 src_regno
= REGNO (src
);
7049 dest_regno
= REGNO (dest
);
7054 /* Saving return address pointer. */
7055 if (dest_regno
!= current_frame_info
.reg_save_b0
)
7057 fprintf (asm_out_file
, "\t.save rp, r%d\n",
7058 ia64_dbx_register_number (dest_regno
));
7062 if (dest_regno
!= current_frame_info
.reg_save_pr
)
7064 fprintf (asm_out_file
, "\t.save pr, r%d\n",
7065 ia64_dbx_register_number (dest_regno
));
7068 case AR_UNAT_REGNUM
:
7069 if (dest_regno
!= current_frame_info
.reg_save_ar_unat
)
7071 fprintf (asm_out_file
, "\t.save ar.unat, r%d\n",
7072 ia64_dbx_register_number (dest_regno
));
7076 if (dest_regno
!= current_frame_info
.reg_save_ar_lc
)
7078 fprintf (asm_out_file
, "\t.save ar.lc, r%d\n",
7079 ia64_dbx_register_number (dest_regno
));
7082 case STACK_POINTER_REGNUM
:
7083 if (dest_regno
!= HARD_FRAME_POINTER_REGNUM
7084 || ! frame_pointer_needed
)
7086 fprintf (asm_out_file
, "\t.vframe r%d\n",
7087 ia64_dbx_register_number (dest_regno
));
7091 /* Everything else should indicate being stored to memory. */
7096 /* Memory store we need to look at. */
7097 if (GET_CODE (dest
) == MEM
&& GET_CODE (src
) == REG
)
7103 if (GET_CODE (XEXP (dest
, 0)) == REG
)
7105 base
= XEXP (dest
, 0);
7108 else if (GET_CODE (XEXP (dest
, 0)) == PLUS
7109 && GET_CODE (XEXP (XEXP (dest
, 0), 1)) == CONST_INT
)
7111 base
= XEXP (XEXP (dest
, 0), 0);
7112 off
= INTVAL (XEXP (XEXP (dest
, 0), 1));
7117 if (base
== hard_frame_pointer_rtx
)
7119 saveop
= ".savepsp";
7122 else if (base
== stack_pointer_rtx
)
7127 src_regno
= REGNO (src
);
7131 if (current_frame_info
.reg_save_b0
!= 0)
7133 fprintf (asm_out_file
, "\t%s rp, %ld\n", saveop
, off
);
7137 if (current_frame_info
.reg_save_pr
!= 0)
7139 fprintf (asm_out_file
, "\t%s pr, %ld\n", saveop
, off
);
7143 if (current_frame_info
.reg_save_ar_lc
!= 0)
7145 fprintf (asm_out_file
, "\t%s ar.lc, %ld\n", saveop
, off
);
7149 if (current_frame_info
.reg_save_ar_pfs
!= 0)
7151 fprintf (asm_out_file
, "\t%s ar.pfs, %ld\n", saveop
, off
);
7154 case AR_UNAT_REGNUM
:
7155 if (current_frame_info
.reg_save_ar_unat
!= 0)
7157 fprintf (asm_out_file
, "\t%s ar.unat, %ld\n", saveop
, off
);
7164 fprintf (asm_out_file
, "\t.save.g 0x%x\n",
7165 1 << (src_regno
- GR_REG (4)));
7173 fprintf (asm_out_file
, "\t.save.b 0x%x\n",
7174 1 << (src_regno
- BR_REG (1)));
7181 fprintf (asm_out_file
, "\t.save.f 0x%x\n",
7182 1 << (src_regno
- FR_REG (2)));
7185 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7186 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7187 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7188 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7189 fprintf (asm_out_file
, "\t.save.gf 0x0, 0x%x\n",
7190 1 << (src_regno
- FR_REG (12)));
7202 /* This function looks at a single insn and emits any directives
7203 required to unwind this insn. */
7205 process_for_unwind_directive (asm_out_file
, insn
)
7209 if (flag_unwind_tables
7210 || (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
7214 if (GET_CODE (insn
) == NOTE
7215 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
7217 block_num
= NOTE_BASIC_BLOCK (insn
)->index
;
7219 /* Restore unwind state from immediately before the epilogue. */
7220 if (need_copy_state
)
7222 fprintf (asm_out_file
, "\t.body\n");
7223 fprintf (asm_out_file
, "\t.copy_state 1\n");
7224 need_copy_state
= 0;
7228 if (GET_CODE (insn
) == NOTE
|| ! RTX_FRAME_RELATED_P (insn
))
7231 pat
= find_reg_note (insn
, REG_FRAME_RELATED_EXPR
, NULL_RTX
);
7233 pat
= XEXP (pat
, 0);
7235 pat
= PATTERN (insn
);
7237 switch (GET_CODE (pat
))
7240 process_set (asm_out_file
, pat
);
7246 int limit
= XVECLEN (pat
, 0);
7247 for (par_index
= 0; par_index
< limit
; par_index
++)
7249 rtx x
= XVECEXP (pat
, 0, par_index
);
7250 if (GET_CODE (x
) == SET
)
7251 process_set (asm_out_file
, x
);
7264 ia64_init_builtins ()
7266 tree psi_type_node
= build_pointer_type (integer_type_node
);
7267 tree pdi_type_node
= build_pointer_type (long_integer_type_node
);
7268 tree endlink
= void_list_node
;
7270 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7271 tree si_ftype_psi_si_si
7272 = build_function_type (integer_type_node
,
7273 tree_cons (NULL_TREE
, psi_type_node
,
7274 tree_cons (NULL_TREE
, integer_type_node
,
7275 tree_cons (NULL_TREE
,
7279 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
7280 tree di_ftype_pdi_di_di
7281 = build_function_type (long_integer_type_node
,
7282 tree_cons (NULL_TREE
, pdi_type_node
,
7283 tree_cons (NULL_TREE
,
7284 long_integer_type_node
,
7285 tree_cons (NULL_TREE
,
7286 long_integer_type_node
,
7288 /* __sync_synchronize */
7289 tree void_ftype_void
7290 = build_function_type (void_type_node
, endlink
);
7292 /* __sync_lock_test_and_set_si */
7293 tree si_ftype_psi_si
7294 = build_function_type (integer_type_node
,
7295 tree_cons (NULL_TREE
, psi_type_node
,
7296 tree_cons (NULL_TREE
, integer_type_node
, endlink
)));
7298 /* __sync_lock_test_and_set_di */
7299 tree di_ftype_pdi_di
7300 = build_function_type (long_integer_type_node
,
7301 tree_cons (NULL_TREE
, pdi_type_node
,
7302 tree_cons (NULL_TREE
, long_integer_type_node
,
7305 /* __sync_lock_release_si */
7307 = build_function_type (void_type_node
, tree_cons (NULL_TREE
, psi_type_node
,
7310 /* __sync_lock_release_di */
7312 = build_function_type (void_type_node
, tree_cons (NULL_TREE
, pdi_type_node
,
7315 #define def_builtin(name, type, code) \
7316 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
7318 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si
,
7319 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
);
7320 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di
,
7321 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
);
7322 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si
,
7323 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
);
7324 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di
,
7325 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
);
7327 def_builtin ("__sync_synchronize", void_ftype_void
,
7328 IA64_BUILTIN_SYNCHRONIZE
);
7330 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si
,
7331 IA64_BUILTIN_LOCK_TEST_AND_SET_SI
);
7332 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di
,
7333 IA64_BUILTIN_LOCK_TEST_AND_SET_DI
);
7334 def_builtin ("__sync_lock_release_si", void_ftype_psi
,
7335 IA64_BUILTIN_LOCK_RELEASE_SI
);
7336 def_builtin ("__sync_lock_release_di", void_ftype_pdi
,
7337 IA64_BUILTIN_LOCK_RELEASE_DI
);
7339 def_builtin ("__builtin_ia64_bsp",
7340 build_function_type (ptr_type_node
, endlink
),
7343 def_builtin ("__builtin_ia64_flushrs",
7344 build_function_type (void_type_node
, endlink
),
7345 IA64_BUILTIN_FLUSHRS
);
7347 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si
,
7348 IA64_BUILTIN_FETCH_AND_ADD_SI
);
7349 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si
,
7350 IA64_BUILTIN_FETCH_AND_SUB_SI
);
7351 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si
,
7352 IA64_BUILTIN_FETCH_AND_OR_SI
);
7353 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si
,
7354 IA64_BUILTIN_FETCH_AND_AND_SI
);
7355 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si
,
7356 IA64_BUILTIN_FETCH_AND_XOR_SI
);
7357 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si
,
7358 IA64_BUILTIN_FETCH_AND_NAND_SI
);
7360 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si
,
7361 IA64_BUILTIN_ADD_AND_FETCH_SI
);
7362 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si
,
7363 IA64_BUILTIN_SUB_AND_FETCH_SI
);
7364 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si
,
7365 IA64_BUILTIN_OR_AND_FETCH_SI
);
7366 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si
,
7367 IA64_BUILTIN_AND_AND_FETCH_SI
);
7368 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si
,
7369 IA64_BUILTIN_XOR_AND_FETCH_SI
);
7370 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si
,
7371 IA64_BUILTIN_NAND_AND_FETCH_SI
);
7373 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di
,
7374 IA64_BUILTIN_FETCH_AND_ADD_DI
);
7375 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di
,
7376 IA64_BUILTIN_FETCH_AND_SUB_DI
);
7377 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di
,
7378 IA64_BUILTIN_FETCH_AND_OR_DI
);
7379 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di
,
7380 IA64_BUILTIN_FETCH_AND_AND_DI
);
7381 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di
,
7382 IA64_BUILTIN_FETCH_AND_XOR_DI
);
7383 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di
,
7384 IA64_BUILTIN_FETCH_AND_NAND_DI
);
7386 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di
,
7387 IA64_BUILTIN_ADD_AND_FETCH_DI
);
7388 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di
,
7389 IA64_BUILTIN_SUB_AND_FETCH_DI
);
7390 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di
,
7391 IA64_BUILTIN_OR_AND_FETCH_DI
);
7392 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di
,
7393 IA64_BUILTIN_AND_AND_FETCH_DI
);
7394 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di
,
7395 IA64_BUILTIN_XOR_AND_FETCH_DI
);
7396 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di
,
7397 IA64_BUILTIN_NAND_AND_FETCH_DI
);
7402 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7410 cmpxchgsz.acq tmp = [ptr], tmp
7411 } while (tmp != ret)
7415 ia64_expand_fetch_and_op (binoptab
, mode
, arglist
, target
)
7417 enum machine_mode mode
;
7421 rtx ret
, label
, tmp
, ccv
, insn
, mem
, value
;
7424 arg0
= TREE_VALUE (arglist
);
7425 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7426 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7427 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7429 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7430 MEM_VOLATILE_P (mem
) = 1;
7432 if (target
&& register_operand (target
, mode
))
7435 ret
= gen_reg_rtx (mode
);
7437 emit_insn (gen_mf ());
7439 /* Special case for fetchadd instructions. */
7440 if (binoptab
== add_optab
&& fetchadd_operand (value
, VOIDmode
))
7443 insn
= gen_fetchadd_acq_si (ret
, mem
, value
);
7445 insn
= gen_fetchadd_acq_di (ret
, mem
, value
);
7450 tmp
= gen_reg_rtx (mode
);
7451 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
7452 emit_move_insn (tmp
, mem
);
7454 label
= gen_label_rtx ();
7456 emit_move_insn (ret
, tmp
);
7457 emit_move_insn (ccv
, tmp
);
7459 /* Perform the specific operation. Special case NAND by noticing
7460 one_cmpl_optab instead. */
7461 if (binoptab
== one_cmpl_optab
)
7463 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
7464 binoptab
= and_optab
;
7466 tmp
= expand_binop (mode
, binoptab
, tmp
, value
, tmp
, 1, OPTAB_WIDEN
);
7469 insn
= gen_cmpxchg_acq_si (tmp
, mem
, tmp
, ccv
);
7471 insn
= gen_cmpxchg_acq_di (tmp
, mem
, tmp
, ccv
);
7474 emit_cmp_and_jump_insns (tmp
, ret
, NE
, 0, mode
, 1, label
);
7479 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7487 cmpxchgsz.acq tmp = [ptr], ret
7488 } while (tmp != old)
7492 ia64_expand_op_and_fetch (binoptab
, mode
, arglist
, target
)
7494 enum machine_mode mode
;
7498 rtx old
, label
, tmp
, ret
, ccv
, insn
, mem
, value
;
7501 arg0
= TREE_VALUE (arglist
);
7502 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7503 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7504 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7506 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7507 MEM_VOLATILE_P (mem
) = 1;
7509 if (target
&& ! register_operand (target
, mode
))
7512 emit_insn (gen_mf ());
7513 tmp
= gen_reg_rtx (mode
);
7514 old
= gen_reg_rtx (mode
);
7515 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
7517 emit_move_insn (tmp
, mem
);
7519 label
= gen_label_rtx ();
7521 emit_move_insn (old
, tmp
);
7522 emit_move_insn (ccv
, tmp
);
7524 /* Perform the specific operation. Special case NAND by noticing
7525 one_cmpl_optab instead. */
7526 if (binoptab
== one_cmpl_optab
)
7528 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
7529 binoptab
= and_optab
;
7531 ret
= expand_binop (mode
, binoptab
, tmp
, value
, target
, 1, OPTAB_WIDEN
);
7534 insn
= gen_cmpxchg_acq_si (tmp
, mem
, ret
, ccv
);
7536 insn
= gen_cmpxchg_acq_di (tmp
, mem
, ret
, ccv
);
7539 emit_cmp_and_jump_insns (tmp
, old
, NE
, 0, mode
, 1, label
);
7544 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7548 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7551 For bool_ it's the same except return ret == oldval.
7555 ia64_expand_compare_and_swap (mode
, boolp
, arglist
, target
)
7556 enum machine_mode mode
;
7561 tree arg0
, arg1
, arg2
;
7562 rtx mem
, old
, new, ccv
, tmp
, insn
;
7564 arg0
= TREE_VALUE (arglist
);
7565 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7566 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
7567 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7568 old
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7569 new = expand_expr (arg2
, NULL_RTX
, mode
, 0);
7571 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7572 MEM_VOLATILE_P (mem
) = 1;
7574 if (! register_operand (old
, mode
))
7575 old
= copy_to_mode_reg (mode
, old
);
7576 if (! register_operand (new, mode
))
7577 new = copy_to_mode_reg (mode
, new);
7579 if (! boolp
&& target
&& register_operand (target
, mode
))
7582 tmp
= gen_reg_rtx (mode
);
7584 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
7585 emit_move_insn (ccv
, old
);
7586 emit_insn (gen_mf ());
7588 insn
= gen_cmpxchg_acq_si (tmp
, mem
, new, ccv
);
7590 insn
= gen_cmpxchg_acq_di (tmp
, mem
, new, ccv
);
7596 target
= gen_reg_rtx (mode
);
7597 return emit_store_flag_force (target
, EQ
, tmp
, old
, mode
, 1, 1);
7603 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7606 ia64_expand_lock_test_and_set (mode
, arglist
, target
)
7607 enum machine_mode mode
;
7612 rtx mem
, new, ret
, insn
;
7614 arg0
= TREE_VALUE (arglist
);
7615 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7616 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7617 new = expand_expr (arg1
, NULL_RTX
, mode
, 0);
7619 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7620 MEM_VOLATILE_P (mem
) = 1;
7621 if (! register_operand (new, mode
))
7622 new = copy_to_mode_reg (mode
, new);
7624 if (target
&& register_operand (target
, mode
))
7627 ret
= gen_reg_rtx (mode
);
7630 insn
= gen_xchgsi (ret
, mem
, new);
7632 insn
= gen_xchgdi (ret
, mem
, new);
7638 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7641 ia64_expand_lock_release (mode
, arglist
, target
)
7642 enum machine_mode mode
;
7644 rtx target ATTRIBUTE_UNUSED
;
7649 arg0
= TREE_VALUE (arglist
);
7650 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7652 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7653 MEM_VOLATILE_P (mem
) = 1;
7655 emit_move_insn (mem
, const0_rtx
);
7661 ia64_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
7664 rtx subtarget ATTRIBUTE_UNUSED
;
7665 enum machine_mode mode ATTRIBUTE_UNUSED
;
7666 int ignore ATTRIBUTE_UNUSED
;
7668 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
7669 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
7670 tree arglist
= TREE_OPERAND (exp
, 1);
7674 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
7675 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
7676 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
7677 case IA64_BUILTIN_LOCK_RELEASE_SI
:
7678 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
7679 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
7680 case IA64_BUILTIN_FETCH_AND_OR_SI
:
7681 case IA64_BUILTIN_FETCH_AND_AND_SI
:
7682 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
7683 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
7684 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
7685 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
7686 case IA64_BUILTIN_OR_AND_FETCH_SI
:
7687 case IA64_BUILTIN_AND_AND_FETCH_SI
:
7688 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
7689 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
7693 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
7694 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
7695 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
7696 case IA64_BUILTIN_LOCK_RELEASE_DI
:
7697 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
7698 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
7699 case IA64_BUILTIN_FETCH_AND_OR_DI
:
7700 case IA64_BUILTIN_FETCH_AND_AND_DI
:
7701 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
7702 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
7703 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
7704 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
7705 case IA64_BUILTIN_OR_AND_FETCH_DI
:
7706 case IA64_BUILTIN_AND_AND_FETCH_DI
:
7707 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
7708 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
7718 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
7719 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
7720 return ia64_expand_compare_and_swap (mode
, 1, arglist
, target
);
7722 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
7723 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
7724 return ia64_expand_compare_and_swap (mode
, 0, arglist
, target
);
7726 case IA64_BUILTIN_SYNCHRONIZE
:
7727 emit_insn (gen_mf ());
7730 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
7731 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
7732 return ia64_expand_lock_test_and_set (mode
, arglist
, target
);
7734 case IA64_BUILTIN_LOCK_RELEASE_SI
:
7735 case IA64_BUILTIN_LOCK_RELEASE_DI
:
7736 return ia64_expand_lock_release (mode
, arglist
, target
);
7738 case IA64_BUILTIN_BSP
:
7739 if (! target
|| ! register_operand (target
, DImode
))
7740 target
= gen_reg_rtx (DImode
);
7741 emit_insn (gen_bsp_value (target
));
7744 case IA64_BUILTIN_FLUSHRS
:
7745 emit_insn (gen_flushrs ());
7748 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
7749 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
7750 return ia64_expand_fetch_and_op (add_optab
, mode
, arglist
, target
);
7752 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
7753 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
7754 return ia64_expand_fetch_and_op (sub_optab
, mode
, arglist
, target
);
7756 case IA64_BUILTIN_FETCH_AND_OR_SI
:
7757 case IA64_BUILTIN_FETCH_AND_OR_DI
:
7758 return ia64_expand_fetch_and_op (ior_optab
, mode
, arglist
, target
);
7760 case IA64_BUILTIN_FETCH_AND_AND_SI
:
7761 case IA64_BUILTIN_FETCH_AND_AND_DI
:
7762 return ia64_expand_fetch_and_op (and_optab
, mode
, arglist
, target
);
7764 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
7765 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
7766 return ia64_expand_fetch_and_op (xor_optab
, mode
, arglist
, target
);
7768 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
7769 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
7770 return ia64_expand_fetch_and_op (one_cmpl_optab
, mode
, arglist
, target
);
7772 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
7773 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
7774 return ia64_expand_op_and_fetch (add_optab
, mode
, arglist
, target
);
7776 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
7777 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
7778 return ia64_expand_op_and_fetch (sub_optab
, mode
, arglist
, target
);
7780 case IA64_BUILTIN_OR_AND_FETCH_SI
:
7781 case IA64_BUILTIN_OR_AND_FETCH_DI
:
7782 return ia64_expand_op_and_fetch (ior_optab
, mode
, arglist
, target
);
7784 case IA64_BUILTIN_AND_AND_FETCH_SI
:
7785 case IA64_BUILTIN_AND_AND_FETCH_DI
:
7786 return ia64_expand_op_and_fetch (and_optab
, mode
, arglist
, target
);
7788 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
7789 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
7790 return ia64_expand_op_and_fetch (xor_optab
, mode
, arglist
, target
);
7792 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
7793 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
7794 return ia64_expand_op_and_fetch (one_cmpl_optab
, mode
, arglist
, target
);
7803 /* For the HP-UX IA64 aggregate parameters are passed stored in the
7804 most significant bits of the stack slot. */
7807 ia64_hpux_function_arg_padding (mode
, type
)
7808 enum machine_mode mode
;
7811 /* Exception to normal case for structures/unions/etc. */
7813 if (type
&& AGGREGATE_TYPE_P (type
)
7814 && int_size_in_bytes (type
) < UNITS_PER_WORD
)
7817 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
7818 hardwired to be true. */
7820 return((mode
== BLKmode
7821 ? (type
&& TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
7822 && int_size_in_bytes (type
) < (PARM_BOUNDARY
/ BITS_PER_UNIT
))
7823 : GET_MODE_BITSIZE (mode
) < PARM_BOUNDARY
)
7824 ? downward
: upward
);