1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
43 #include "sched-int.h"
46 #include "target-def.h"
49 /* This is used for communication between ASM_OUTPUT_LABEL and
50 ASM_OUTPUT_LABELREF. */
51 int ia64_asm_output_label
= 0;
53 /* Define the information needed to generate branch and scc insns. This is
54 stored from the compare operation. */
55 struct rtx_def
* ia64_compare_op0
;
56 struct rtx_def
* ia64_compare_op1
;
58 /* Register names for ia64_expand_prologue. */
59 static const char * const ia64_reg_numbers
[96] =
60 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
61 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
62 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
63 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
64 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
65 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
66 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
67 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
68 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
69 "r104","r105","r106","r107","r108","r109","r110","r111",
70 "r112","r113","r114","r115","r116","r117","r118","r119",
71 "r120","r121","r122","r123","r124","r125","r126","r127"};
73 /* ??? These strings could be shared with REGISTER_NAMES. */
74 static const char * const ia64_input_reg_names
[8] =
75 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
77 /* ??? These strings could be shared with REGISTER_NAMES. */
78 static const char * const ia64_local_reg_names
[80] =
79 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
80 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
81 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
82 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
83 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
84 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
85 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
86 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
87 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
88 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
90 /* ??? These strings could be shared with REGISTER_NAMES. */
91 static const char * const ia64_output_reg_names
[8] =
92 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
94 /* String used with the -mfixed-range= option. */
95 const char *ia64_fixed_range_string
;
97 /* Determines whether we run our final scheduling pass or not. We always
98 avoid the normal second scheduling pass. */
99 static int ia64_flag_schedule_insns2
;
101 /* Variables which are this size or smaller are put in the sdata/sbss
104 unsigned int ia64_section_threshold
;
106 static int find_gr_spill
PARAMS ((int));
107 static int next_scratch_gr_reg
PARAMS ((void));
108 static void mark_reg_gr_used_mask
PARAMS ((rtx
, void *));
109 static void ia64_compute_frame_size
PARAMS ((HOST_WIDE_INT
));
110 static void setup_spill_pointers
PARAMS ((int, rtx
, HOST_WIDE_INT
));
111 static void finish_spill_pointers
PARAMS ((void));
112 static rtx spill_restore_mem
PARAMS ((rtx
, HOST_WIDE_INT
));
113 static void do_spill
PARAMS ((rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
, rtx
));
114 static void do_restore
PARAMS ((rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
));
115 static rtx gen_movdi_x
PARAMS ((rtx
, rtx
, rtx
));
116 static rtx gen_fr_spill_x
PARAMS ((rtx
, rtx
, rtx
));
117 static rtx gen_fr_restore_x
PARAMS ((rtx
, rtx
, rtx
));
119 static enum machine_mode hfa_element_mode
PARAMS ((tree
, int));
120 static void fix_range
PARAMS ((const char *));
121 static void ia64_add_gc_roots
PARAMS ((void));
122 static void ia64_init_machine_status
PARAMS ((struct function
*));
123 static void ia64_mark_machine_status
PARAMS ((struct function
*));
124 static void ia64_free_machine_status
PARAMS ((struct function
*));
125 static void emit_insn_group_barriers
PARAMS ((FILE *, rtx
));
126 static void emit_all_insn_group_barriers
PARAMS ((FILE *, rtx
));
127 static void emit_predicate_relation_info
PARAMS ((void));
128 static bool ia64_in_small_data_p
PARAMS ((tree
));
129 static void ia64_encode_section_info
PARAMS ((tree
, int));
130 static void process_epilogue
PARAMS ((void));
131 static int process_set
PARAMS ((FILE *, rtx
));
133 static rtx ia64_expand_fetch_and_op
PARAMS ((optab
, enum machine_mode
,
135 static rtx ia64_expand_op_and_fetch
PARAMS ((optab
, enum machine_mode
,
137 static rtx ia64_expand_compare_and_swap
PARAMS ((enum machine_mode
, int,
139 static rtx ia64_expand_lock_test_and_set
PARAMS ((enum machine_mode
,
141 static rtx ia64_expand_lock_release
PARAMS ((enum machine_mode
, tree
, rtx
));
142 static bool ia64_assemble_integer
PARAMS ((rtx
, unsigned int, int));
143 static void ia64_output_function_prologue
PARAMS ((FILE *, HOST_WIDE_INT
));
144 static void ia64_output_function_epilogue
PARAMS ((FILE *, HOST_WIDE_INT
));
145 static void ia64_output_function_end_prologue
PARAMS ((FILE *));
147 static int ia64_issue_rate
PARAMS ((void));
148 static int ia64_adjust_cost
PARAMS ((rtx
, rtx
, rtx
, int));
149 static void ia64_sched_init
PARAMS ((FILE *, int, int));
150 static void ia64_sched_finish
PARAMS ((FILE *, int));
151 static int ia64_internal_sched_reorder
PARAMS ((FILE *, int, rtx
*,
153 static int ia64_sched_reorder
PARAMS ((FILE *, int, rtx
*, int *, int));
154 static int ia64_sched_reorder2
PARAMS ((FILE *, int, rtx
*, int *, int));
155 static int ia64_variable_issue
PARAMS ((FILE *, int, rtx
, int));
157 static void ia64_select_rtx_section
PARAMS ((enum machine_mode
, rtx
,
158 unsigned HOST_WIDE_INT
));
159 static void ia64_aix_select_section
PARAMS ((tree
, int,
160 unsigned HOST_WIDE_INT
))
162 static void ia64_aix_unique_section
PARAMS ((tree
, int))
164 static void ia64_aix_select_rtx_section
PARAMS ((enum machine_mode
, rtx
,
165 unsigned HOST_WIDE_INT
))
168 /* Table of valid machine attributes. */
169 static const struct attribute_spec ia64_attribute_table
[] =
171 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
172 { "syscall_linkage", 0, 0, false, true, true, NULL
},
173 { NULL
, 0, 0, false, false, false, NULL
}
176 /* Initialize the GCC target structure. */
177 #undef TARGET_ATTRIBUTE_TABLE
178 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
180 #undef TARGET_INIT_BUILTINS
181 #define TARGET_INIT_BUILTINS ia64_init_builtins
183 #undef TARGET_EXPAND_BUILTIN
184 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
186 #undef TARGET_ASM_BYTE_OP
187 #define TARGET_ASM_BYTE_OP "\tdata1\t"
188 #undef TARGET_ASM_ALIGNED_HI_OP
189 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
190 #undef TARGET_ASM_ALIGNED_SI_OP
191 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
192 #undef TARGET_ASM_ALIGNED_DI_OP
193 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
194 #undef TARGET_ASM_UNALIGNED_HI_OP
195 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
196 #undef TARGET_ASM_UNALIGNED_SI_OP
197 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
198 #undef TARGET_ASM_UNALIGNED_DI_OP
199 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
200 #undef TARGET_ASM_INTEGER
201 #define TARGET_ASM_INTEGER ia64_assemble_integer
203 #undef TARGET_ASM_FUNCTION_PROLOGUE
204 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
205 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
206 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
207 #undef TARGET_ASM_FUNCTION_EPILOGUE
208 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
210 #undef TARGET_IN_SMALL_DATA_P
211 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
212 #undef TARGET_ENCODE_SECTION_INFO
213 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
215 #undef TARGET_SCHED_ADJUST_COST
216 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
217 #undef TARGET_SCHED_ISSUE_RATE
218 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
219 #undef TARGET_SCHED_VARIABLE_ISSUE
220 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
221 #undef TARGET_SCHED_INIT
222 #define TARGET_SCHED_INIT ia64_sched_init
223 #undef TARGET_SCHED_FINISH
224 #define TARGET_SCHED_FINISH ia64_sched_finish
225 #undef TARGET_SCHED_REORDER
226 #define TARGET_SCHED_REORDER ia64_sched_reorder
227 #undef TARGET_SCHED_REORDER2
228 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
230 struct gcc_target targetm
= TARGET_INITIALIZER
;
232 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
235 call_operand (op
, mode
)
237 enum machine_mode mode
;
239 if (mode
!= GET_MODE (op
))
242 return (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == REG
243 || (GET_CODE (op
) == SUBREG
&& GET_CODE (XEXP (op
, 0)) == REG
));
246 /* Return 1 if OP refers to a symbol in the sdata section. */
249 sdata_symbolic_operand (op
, mode
)
251 enum machine_mode mode ATTRIBUTE_UNUSED
;
253 switch (GET_CODE (op
))
256 if (GET_CODE (XEXP (op
, 0)) != PLUS
257 || GET_CODE (XEXP (XEXP (op
, 0), 0)) != SYMBOL_REF
)
259 op
= XEXP (XEXP (op
, 0), 0);
263 if (CONSTANT_POOL_ADDRESS_P (op
))
264 return GET_MODE_SIZE (get_pool_mode (op
)) <= ia64_section_threshold
;
266 return XSTR (op
, 0)[0] == SDATA_NAME_FLAG_CHAR
;
275 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
278 got_symbolic_operand (op
, mode
)
280 enum machine_mode mode ATTRIBUTE_UNUSED
;
282 switch (GET_CODE (op
))
286 if (GET_CODE (op
) != PLUS
)
288 if (GET_CODE (XEXP (op
, 0)) != SYMBOL_REF
)
291 if (GET_CODE (op
) != CONST_INT
)
296 /* Ok if we're not using GOT entries at all. */
297 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
300 /* "Ok" while emitting rtl, since otherwise we won't be provided
301 with the entire offset during emission, which makes it very
302 hard to split the offset into high and low parts. */
303 if (rtx_equal_function_value_matters
)
306 /* Force the low 14 bits of the constant to zero so that we do not
307 use up so many GOT entries. */
308 return (INTVAL (op
) & 0x3fff) == 0;
320 /* Return 1 if OP refers to a symbol. */
323 symbolic_operand (op
, mode
)
325 enum machine_mode mode ATTRIBUTE_UNUSED
;
327 switch (GET_CODE (op
))
340 /* Return 1 if OP refers to a function. */
343 function_operand (op
, mode
)
345 enum machine_mode mode ATTRIBUTE_UNUSED
;
347 if (GET_CODE (op
) == SYMBOL_REF
&& SYMBOL_REF_FLAG (op
))
353 /* Return 1 if OP is setjmp or a similar function. */
355 /* ??? This is an unsatisfying solution. Should rethink. */
358 setjmp_operand (op
, mode
)
360 enum machine_mode mode ATTRIBUTE_UNUSED
;
365 if (GET_CODE (op
) != SYMBOL_REF
)
370 /* The following code is borrowed from special_function_p in calls.c. */
372 /* Disregard prefix _, __ or __x. */
375 if (name
[1] == '_' && name
[2] == 'x')
377 else if (name
[1] == '_')
387 && (! strcmp (name
, "setjmp")
388 || ! strcmp (name
, "setjmp_syscall")))
390 && ! strcmp (name
, "sigsetjmp"))
392 && ! strcmp (name
, "savectx")));
394 else if ((name
[0] == 'q' && name
[1] == 's'
395 && ! strcmp (name
, "qsetjmp"))
396 || (name
[0] == 'v' && name
[1] == 'f'
397 && ! strcmp (name
, "vfork")))
403 /* Return 1 if OP is a general operand, but when pic exclude symbolic
406 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
407 from PREDICATE_CODES. */
410 move_operand (op
, mode
)
412 enum machine_mode mode
;
414 if (! TARGET_NO_PIC
&& symbolic_operand (op
, mode
))
417 return general_operand (op
, mode
);
420 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
423 gr_register_operand (op
, mode
)
425 enum machine_mode mode
;
427 if (! register_operand (op
, mode
))
429 if (GET_CODE (op
) == SUBREG
)
430 op
= SUBREG_REG (op
);
431 if (GET_CODE (op
) == REG
)
433 unsigned int regno
= REGNO (op
);
434 if (regno
< FIRST_PSEUDO_REGISTER
)
435 return GENERAL_REGNO_P (regno
);
440 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
443 fr_register_operand (op
, mode
)
445 enum machine_mode mode
;
447 if (! register_operand (op
, mode
))
449 if (GET_CODE (op
) == SUBREG
)
450 op
= SUBREG_REG (op
);
451 if (GET_CODE (op
) == REG
)
453 unsigned int regno
= REGNO (op
);
454 if (regno
< FIRST_PSEUDO_REGISTER
)
455 return FR_REGNO_P (regno
);
460 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
463 grfr_register_operand (op
, mode
)
465 enum machine_mode mode
;
467 if (! register_operand (op
, mode
))
469 if (GET_CODE (op
) == SUBREG
)
470 op
= SUBREG_REG (op
);
471 if (GET_CODE (op
) == REG
)
473 unsigned int regno
= REGNO (op
);
474 if (regno
< FIRST_PSEUDO_REGISTER
)
475 return GENERAL_REGNO_P (regno
) || FR_REGNO_P (regno
);
480 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
483 gr_nonimmediate_operand (op
, mode
)
485 enum machine_mode mode
;
487 if (! nonimmediate_operand (op
, mode
))
489 if (GET_CODE (op
) == SUBREG
)
490 op
= SUBREG_REG (op
);
491 if (GET_CODE (op
) == REG
)
493 unsigned int regno
= REGNO (op
);
494 if (regno
< FIRST_PSEUDO_REGISTER
)
495 return GENERAL_REGNO_P (regno
);
500 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
503 fr_nonimmediate_operand (op
, mode
)
505 enum machine_mode mode
;
507 if (! nonimmediate_operand (op
, mode
))
509 if (GET_CODE (op
) == SUBREG
)
510 op
= SUBREG_REG (op
);
511 if (GET_CODE (op
) == REG
)
513 unsigned int regno
= REGNO (op
);
514 if (regno
< FIRST_PSEUDO_REGISTER
)
515 return FR_REGNO_P (regno
);
520 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
523 grfr_nonimmediate_operand (op
, mode
)
525 enum machine_mode mode
;
527 if (! nonimmediate_operand (op
, mode
))
529 if (GET_CODE (op
) == SUBREG
)
530 op
= SUBREG_REG (op
);
531 if (GET_CODE (op
) == REG
)
533 unsigned int regno
= REGNO (op
);
534 if (regno
< FIRST_PSEUDO_REGISTER
)
535 return GENERAL_REGNO_P (regno
) || FR_REGNO_P (regno
);
540 /* Return 1 if OP is a GR register operand, or zero. */
543 gr_reg_or_0_operand (op
, mode
)
545 enum machine_mode mode
;
547 return (op
== const0_rtx
|| gr_register_operand (op
, mode
));
550 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
553 gr_reg_or_5bit_operand (op
, mode
)
555 enum machine_mode mode
;
557 return ((GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 32)
558 || GET_CODE (op
) == CONSTANT_P_RTX
559 || gr_register_operand (op
, mode
));
562 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
565 gr_reg_or_6bit_operand (op
, mode
)
567 enum machine_mode mode
;
569 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_M (INTVAL (op
)))
570 || GET_CODE (op
) == CONSTANT_P_RTX
571 || gr_register_operand (op
, mode
));
574 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
577 gr_reg_or_8bit_operand (op
, mode
)
579 enum machine_mode mode
;
581 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
)))
582 || GET_CODE (op
) == CONSTANT_P_RTX
583 || gr_register_operand (op
, mode
));
586 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
589 grfr_reg_or_8bit_operand (op
, mode
)
591 enum machine_mode mode
;
593 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
)))
594 || GET_CODE (op
) == CONSTANT_P_RTX
595 || grfr_register_operand (op
, mode
));
598 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
602 gr_reg_or_8bit_adjusted_operand (op
, mode
)
604 enum machine_mode mode
;
606 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_L (INTVAL (op
)))
607 || GET_CODE (op
) == CONSTANT_P_RTX
608 || gr_register_operand (op
, mode
));
611 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
612 immediate and an 8 bit adjusted immediate operand. This is necessary
613 because when we emit a compare, we don't know what the condition will be,
614 so we need the union of the immediates accepted by GT and LT. */
617 gr_reg_or_8bit_and_adjusted_operand (op
, mode
)
619 enum machine_mode mode
;
621 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
))
622 && CONST_OK_FOR_L (INTVAL (op
)))
623 || GET_CODE (op
) == CONSTANT_P_RTX
624 || gr_register_operand (op
, mode
));
627 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
630 gr_reg_or_14bit_operand (op
, mode
)
632 enum machine_mode mode
;
634 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_I (INTVAL (op
)))
635 || GET_CODE (op
) == CONSTANT_P_RTX
636 || gr_register_operand (op
, mode
));
639 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
642 gr_reg_or_22bit_operand (op
, mode
)
644 enum machine_mode mode
;
646 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_J (INTVAL (op
)))
647 || GET_CODE (op
) == CONSTANT_P_RTX
648 || gr_register_operand (op
, mode
));
651 /* Return 1 if OP is a 6 bit immediate operand. */
654 shift_count_operand (op
, mode
)
656 enum machine_mode mode ATTRIBUTE_UNUSED
;
658 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_M (INTVAL (op
)))
659 || GET_CODE (op
) == CONSTANT_P_RTX
);
662 /* Return 1 if OP is a 5 bit immediate operand. */
665 shift_32bit_count_operand (op
, mode
)
667 enum machine_mode mode ATTRIBUTE_UNUSED
;
669 return ((GET_CODE (op
) == CONST_INT
670 && (INTVAL (op
) >= 0 && INTVAL (op
) < 32))
671 || GET_CODE (op
) == CONSTANT_P_RTX
);
674 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
677 shladd_operand (op
, mode
)
679 enum machine_mode mode ATTRIBUTE_UNUSED
;
681 return (GET_CODE (op
) == CONST_INT
682 && (INTVAL (op
) == 2 || INTVAL (op
) == 4
683 || INTVAL (op
) == 8 || INTVAL (op
) == 16));
686 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
689 fetchadd_operand (op
, mode
)
691 enum machine_mode mode ATTRIBUTE_UNUSED
;
693 return (GET_CODE (op
) == CONST_INT
694 && (INTVAL (op
) == -16 || INTVAL (op
) == -8 ||
695 INTVAL (op
) == -4 || INTVAL (op
) == -1 ||
696 INTVAL (op
) == 1 || INTVAL (op
) == 4 ||
697 INTVAL (op
) == 8 || INTVAL (op
) == 16));
700 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
703 fr_reg_or_fp01_operand (op
, mode
)
705 enum machine_mode mode
;
707 return ((GET_CODE (op
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (op
))
708 || fr_register_operand (op
, mode
));
711 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
712 POST_MODIFY with a REG as displacement. */
715 destination_operand (op
, mode
)
717 enum machine_mode mode
;
719 if (! nonimmediate_operand (op
, mode
))
721 if (GET_CODE (op
) == MEM
722 && GET_CODE (XEXP (op
, 0)) == POST_MODIFY
723 && GET_CODE (XEXP (XEXP (XEXP (op
, 0), 1), 1)) == REG
)
728 /* Like memory_operand, but don't allow post-increments. */
731 not_postinc_memory_operand (op
, mode
)
733 enum machine_mode mode
;
735 return (memory_operand (op
, mode
)
736 && GET_RTX_CLASS (GET_CODE (XEXP (op
, 0))) != 'a');
739 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
740 signed immediate operand. */
743 normal_comparison_operator (op
, mode
)
745 enum machine_mode mode
;
747 enum rtx_code code
= GET_CODE (op
);
748 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
749 && (code
== EQ
|| code
== NE
750 || code
== GT
|| code
== LE
|| code
== GTU
|| code
== LEU
));
753 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
754 signed immediate operand. */
757 adjusted_comparison_operator (op
, mode
)
759 enum machine_mode mode
;
761 enum rtx_code code
= GET_CODE (op
);
762 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
763 && (code
== LT
|| code
== GE
|| code
== LTU
|| code
== GEU
));
766 /* Return 1 if this is a signed inequality operator. */
769 signed_inequality_operator (op
, mode
)
771 enum machine_mode mode
;
773 enum rtx_code code
= GET_CODE (op
);
774 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
775 && (code
== GE
|| code
== GT
776 || code
== LE
|| code
== LT
));
779 /* Return 1 if this operator is valid for predication. */
782 predicate_operator (op
, mode
)
784 enum machine_mode mode
;
786 enum rtx_code code
= GET_CODE (op
);
787 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
788 && (code
== EQ
|| code
== NE
));
791 /* Return 1 if this operator can be used in a conditional operation. */
794 condop_operator (op
, mode
)
796 enum machine_mode mode
;
798 enum rtx_code code
= GET_CODE (op
);
799 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
800 && (code
== PLUS
|| code
== MINUS
|| code
== AND
801 || code
== IOR
|| code
== XOR
));
804 /* Return 1 if this is the ar.lc register. */
807 ar_lc_reg_operand (op
, mode
)
809 enum machine_mode mode
;
811 return (GET_MODE (op
) == DImode
812 && (mode
== DImode
|| mode
== VOIDmode
)
813 && GET_CODE (op
) == REG
814 && REGNO (op
) == AR_LC_REGNUM
);
817 /* Return 1 if this is the ar.ccv register. */
820 ar_ccv_reg_operand (op
, mode
)
822 enum machine_mode mode
;
824 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
825 && GET_CODE (op
) == REG
826 && REGNO (op
) == AR_CCV_REGNUM
);
829 /* Return 1 if this is the ar.pfs register. */
832 ar_pfs_reg_operand (op
, mode
)
834 enum machine_mode mode
;
836 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
837 && GET_CODE (op
) == REG
838 && REGNO (op
) == AR_PFS_REGNUM
);
841 /* Like general_operand, but don't allow (mem (addressof)). */
844 general_tfmode_operand (op
, mode
)
846 enum machine_mode mode
;
848 if (! general_operand (op
, mode
))
850 if (GET_CODE (op
) == MEM
&& GET_CODE (XEXP (op
, 0)) == ADDRESSOF
)
858 destination_tfmode_operand (op
, mode
)
860 enum machine_mode mode
;
862 if (! destination_operand (op
, mode
))
864 if (GET_CODE (op
) == MEM
&& GET_CODE (XEXP (op
, 0)) == ADDRESSOF
)
872 tfreg_or_fp01_operand (op
, mode
)
874 enum machine_mode mode
;
876 if (GET_CODE (op
) == SUBREG
)
878 return fr_reg_or_fp01_operand (op
, mode
);
881 /* Return 1 if OP is valid as a base register in a reg + offset address. */
884 basereg_operand (op
, mode
)
886 enum machine_mode mode
;
888 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
889 checks from pa.c basereg_operand as well? Seems to be OK without them
892 return (register_operand (op
, mode
) &&
893 REG_POINTER ((GET_CODE (op
) == SUBREG
) ? SUBREG_REG (op
) : op
));
896 /* Return 1 if the operands of a move are ok. */
899 ia64_move_ok (dst
, src
)
902 /* If we're under init_recog_no_volatile, we'll not be able to use
903 memory_operand. So check the code directly and don't worry about
904 the validity of the underlying address, which should have been
905 checked elsewhere anyway. */
906 if (GET_CODE (dst
) != MEM
)
908 if (GET_CODE (src
) == MEM
)
910 if (register_operand (src
, VOIDmode
))
913 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
914 if (INTEGRAL_MODE_P (GET_MODE (dst
)))
915 return src
== const0_rtx
;
917 return GET_CODE (src
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (src
);
920 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
921 Return the length of the field, or <= 0 on failure. */
924 ia64_depz_field_mask (rop
, rshift
)
927 unsigned HOST_WIDE_INT op
= INTVAL (rop
);
928 unsigned HOST_WIDE_INT shift
= INTVAL (rshift
);
930 /* Get rid of the zero bits we're shifting in. */
933 /* We must now have a solid block of 1's at bit 0. */
934 return exact_log2 (op
+ 1);
937 /* Expand a symbolic constant load. */
938 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
941 ia64_expand_load_address (dest
, src
, scratch
)
942 rtx dest
, src
, scratch
;
946 /* The destination could be a MEM during initial rtl generation,
947 which isn't a valid destination for the PIC load address patterns. */
948 if (! register_operand (dest
, DImode
))
949 temp
= gen_reg_rtx (DImode
);
954 emit_insn (gen_load_gprel64 (temp
, src
));
955 else if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_FLAG (src
))
956 emit_insn (gen_load_fptr (temp
, src
));
957 else if (sdata_symbolic_operand (src
, DImode
))
958 emit_insn (gen_load_gprel (temp
, src
));
959 else if (GET_CODE (src
) == CONST
960 && GET_CODE (XEXP (src
, 0)) == PLUS
961 && GET_CODE (XEXP (XEXP (src
, 0), 1)) == CONST_INT
962 && (INTVAL (XEXP (XEXP (src
, 0), 1)) & 0x1fff) != 0)
964 rtx subtarget
= no_new_pseudos
? temp
: gen_reg_rtx (DImode
);
965 rtx sym
= XEXP (XEXP (src
, 0), 0);
966 HOST_WIDE_INT ofs
, hi
, lo
;
968 /* Split the offset into a sign extended 14-bit low part
969 and a complementary high part. */
970 ofs
= INTVAL (XEXP (XEXP (src
, 0), 1));
971 lo
= ((ofs
& 0x3fff) ^ 0x2000) - 0x2000;
975 scratch
= no_new_pseudos
? subtarget
: gen_reg_rtx (DImode
);
977 emit_insn (gen_load_symptr (subtarget
, plus_constant (sym
, hi
),
979 emit_insn (gen_adddi3 (temp
, subtarget
, GEN_INT (lo
)));
985 scratch
= no_new_pseudos
? temp
: gen_reg_rtx (DImode
);
987 insn
= emit_insn (gen_load_symptr (temp
, src
, scratch
));
988 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_EQUAL
, src
, REG_NOTES (insn
));
992 emit_move_insn (dest
, temp
);
996 ia64_gp_save_reg (setjmp_p
)
999 rtx save
= cfun
->machine
->ia64_gp_save
;
1003 /* We can't save GP in a pseudo if we are calling setjmp, because
1004 pseudos won't be restored by longjmp. For now, we save it in r4. */
1005 /* ??? It would be more efficient to save this directly into a stack
1006 slot. Unfortunately, the stack slot address gets cse'd across
1007 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
1010 /* ??? Get the barf bag, Virginia. We've got to replace this thing
1011 in place, since this rtx is used in exception handling receivers.
1012 Moreover, we must get this rtx out of regno_reg_rtx or reload
1013 will do the wrong thing. */
1014 unsigned int old_regno
= REGNO (save
);
1015 if (setjmp_p
&& old_regno
!= GR_REG (4))
1017 REGNO (save
) = GR_REG (4);
1018 regno_reg_rtx
[old_regno
] = gen_rtx_raw_REG (DImode
, old_regno
);
1024 save
= gen_rtx_REG (DImode
, GR_REG (4));
1025 else if (! optimize
)
1026 save
= gen_rtx_REG (DImode
, LOC_REG (0));
1028 save
= gen_reg_rtx (DImode
);
1029 cfun
->machine
->ia64_gp_save
= save
;
1035 /* Split a post-reload TImode reference into two DImode components. */
1038 ia64_split_timode (out
, in
, scratch
)
1042 switch (GET_CODE (in
))
1045 out
[0] = gen_rtx_REG (DImode
, REGNO (in
));
1046 out
[1] = gen_rtx_REG (DImode
, REGNO (in
) + 1);
1051 rtx base
= XEXP (in
, 0);
1053 switch (GET_CODE (base
))
1056 out
[0] = adjust_address (in
, DImode
, 0);
1059 base
= XEXP (base
, 0);
1060 out
[0] = adjust_address (in
, DImode
, 0);
1063 /* Since we're changing the mode, we need to change to POST_MODIFY
1064 as well to preserve the size of the increment. Either that or
1065 do the update in two steps, but we've already got this scratch
1066 register handy so let's use it. */
1068 base
= XEXP (base
, 0);
1070 = change_address (in
, DImode
,
1072 (Pmode
, base
, plus_constant (base
, 16)));
1075 base
= XEXP (base
, 0);
1077 = change_address (in
, DImode
,
1079 (Pmode
, base
, plus_constant (base
, -16)));
1085 if (scratch
== NULL_RTX
)
1087 out
[1] = change_address (in
, DImode
, scratch
);
1088 return gen_adddi3 (scratch
, base
, GEN_INT (8));
1093 split_double (in
, &out
[0], &out
[1]);
1101 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1102 through memory plus an extra GR scratch register. Except that you can
1103 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1104 SECONDARY_RELOAD_CLASS, but not both.
1106 We got into problems in the first place by allowing a construct like
1107 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1108 This solution attempts to prevent this situation from occurring. When
1109 we see something like the above, we spill the inner register to memory. */
1112 spill_tfmode_operand (in
, force
)
1116 if (GET_CODE (in
) == SUBREG
1117 && GET_MODE (SUBREG_REG (in
)) == TImode
1118 && GET_CODE (SUBREG_REG (in
)) == REG
)
1120 rtx mem
= gen_mem_addressof (SUBREG_REG (in
), NULL_TREE
);
1121 return gen_rtx_MEM (TFmode
, copy_to_reg (XEXP (mem
, 0)));
1123 else if (force
&& GET_CODE (in
) == REG
)
1125 rtx mem
= gen_mem_addressof (in
, NULL_TREE
);
1126 return gen_rtx_MEM (TFmode
, copy_to_reg (XEXP (mem
, 0)));
1128 else if (GET_CODE (in
) == MEM
1129 && GET_CODE (XEXP (in
, 0)) == ADDRESSOF
)
1130 return change_address (in
, TFmode
, copy_to_reg (XEXP (in
, 0)));
1135 /* Emit comparison instruction if necessary, returning the expression
1136 that holds the compare result in the proper mode. */
1139 ia64_expand_compare (code
, mode
)
1141 enum machine_mode mode
;
1143 rtx op0
= ia64_compare_op0
, op1
= ia64_compare_op1
;
1146 /* If we have a BImode input, then we already have a compare result, and
1147 do not need to emit another comparison. */
1148 if (GET_MODE (op0
) == BImode
)
1150 if ((code
== NE
|| code
== EQ
) && op1
== const0_rtx
)
1157 cmp
= gen_reg_rtx (BImode
);
1158 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1159 gen_rtx_fmt_ee (code
, BImode
, op0
, op1
)));
1163 return gen_rtx_fmt_ee (code
, mode
, cmp
, const0_rtx
);
1166 /* Emit the appropriate sequence for a call. */
1169 ia64_expand_call (retval
, addr
, nextarg
, sibcall_p
)
1175 rtx insn
, b0
, pfs
, gp_save
, narg_rtx
, dest
;
1179 addr
= XEXP (addr
, 0);
1180 b0
= gen_rtx_REG (DImode
, R_BR (0));
1181 pfs
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
1185 else if (IN_REGNO_P (REGNO (nextarg
)))
1186 narg
= REGNO (nextarg
) - IN_REG (0);
1188 narg
= REGNO (nextarg
) - OUT_REG (0);
1189 narg_rtx
= GEN_INT (narg
);
1191 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
1194 insn
= gen_sibcall_nopic (addr
, narg_rtx
, b0
, pfs
);
1196 insn
= gen_call_nopic (addr
, narg_rtx
, b0
);
1198 insn
= gen_call_value_nopic (retval
, addr
, narg_rtx
, b0
);
1199 emit_call_insn (insn
);
1203 indirect_p
= ! symbolic_operand (addr
, VOIDmode
);
1205 if (sibcall_p
|| (TARGET_CONST_GP
&& !indirect_p
))
1208 gp_save
= ia64_gp_save_reg (setjmp_operand (addr
, VOIDmode
));
1211 emit_move_insn (gp_save
, pic_offset_table_rtx
);
1213 /* If this is an indirect call, then we have the address of a descriptor. */
1216 dest
= force_reg (DImode
, gen_rtx_MEM (DImode
, addr
));
1217 emit_move_insn (pic_offset_table_rtx
,
1218 gen_rtx_MEM (DImode
, plus_constant (addr
, 8)));
1224 insn
= gen_sibcall_pic (dest
, narg_rtx
, b0
, pfs
);
1226 insn
= gen_call_pic (dest
, narg_rtx
, b0
);
1228 insn
= gen_call_value_pic (retval
, dest
, narg_rtx
, b0
);
1229 emit_call_insn (insn
);
1232 emit_move_insn (pic_offset_table_rtx
, gp_save
);
1235 /* Begin the assembly file. */
1238 emit_safe_across_calls (f
)
1241 unsigned int rs
, re
;
1248 while (rs
< 64 && call_used_regs
[PR_REG (rs
)])
1252 for (re
= rs
+ 1; re
< 64 && ! call_used_regs
[PR_REG (re
)]; re
++)
1256 fputs ("\t.pred.safe_across_calls ", f
);
1262 fprintf (f
, "p%u", rs
);
1264 fprintf (f
, "p%u-p%u", rs
, re
- 1);
1272 /* Structure to be filled in by ia64_compute_frame_size with register
1273 save masks and offsets for the current function. */
1275 struct ia64_frame_info
1277 HOST_WIDE_INT total_size
; /* size of the stack frame, not including
1278 the caller's scratch area. */
1279 HOST_WIDE_INT spill_cfa_off
; /* top of the reg spill area from the cfa. */
1280 HOST_WIDE_INT spill_size
; /* size of the gr/br/fr spill area. */
1281 HOST_WIDE_INT extra_spill_size
; /* size of spill area for others. */
1282 HARD_REG_SET mask
; /* mask of saved registers. */
1283 unsigned int gr_used_mask
; /* mask of registers in use as gr spill
1284 registers or long-term scratches. */
1285 int n_spilled
; /* number of spilled registers. */
1286 int reg_fp
; /* register for fp. */
1287 int reg_save_b0
; /* save register for b0. */
1288 int reg_save_pr
; /* save register for prs. */
1289 int reg_save_ar_pfs
; /* save register for ar.pfs. */
1290 int reg_save_ar_unat
; /* save register for ar.unat. */
1291 int reg_save_ar_lc
; /* save register for ar.lc. */
1292 int n_input_regs
; /* number of input registers used. */
1293 int n_local_regs
; /* number of local registers used. */
1294 int n_output_regs
; /* number of output registers used. */
1295 int n_rotate_regs
; /* number of rotating registers used. */
1297 char need_regstk
; /* true if a .regstk directive needed. */
1298 char initialized
; /* true if the data is finalized. */
1301 /* Current frame information calculated by ia64_compute_frame_size. */
1302 static struct ia64_frame_info current_frame_info
;
1304 /* Helper function for ia64_compute_frame_size: find an appropriate general
1305 register to spill some special register to. SPECIAL_SPILL_MASK contains
1306 bits in GR0 to GR31 that have already been allocated by this routine.
1307 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1310 find_gr_spill (try_locals
)
1315 /* If this is a leaf function, first try an otherwise unused
1316 call-clobbered register. */
1317 if (current_function_is_leaf
)
1319 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1320 if (! regs_ever_live
[regno
]
1321 && call_used_regs
[regno
]
1322 && ! fixed_regs
[regno
]
1323 && ! global_regs
[regno
]
1324 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1326 current_frame_info
.gr_used_mask
|= 1 << regno
;
1333 regno
= current_frame_info
.n_local_regs
;
1334 /* If there is a frame pointer, then we can't use loc79, because
1335 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1336 reg_name switching code in ia64_expand_prologue. */
1337 if (regno
< (80 - frame_pointer_needed
))
1339 current_frame_info
.n_local_regs
= regno
+ 1;
1340 return LOC_REG (0) + regno
;
1344 /* Failed to find a general register to spill to. Must use stack. */
1348 /* In order to make for nice schedules, we try to allocate every temporary
1349 to a different register. We must of course stay away from call-saved,
1350 fixed, and global registers. We must also stay away from registers
1351 allocated in current_frame_info.gr_used_mask, since those include regs
1352 used all through the prologue.
1354 Any register allocated here must be used immediately. The idea is to
1355 aid scheduling, not to solve data flow problems. */
1357 static int last_scratch_gr_reg
;
1360 next_scratch_gr_reg ()
1364 for (i
= 0; i
< 32; ++i
)
1366 regno
= (last_scratch_gr_reg
+ i
+ 1) & 31;
1367 if (call_used_regs
[regno
]
1368 && ! fixed_regs
[regno
]
1369 && ! global_regs
[regno
]
1370 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1372 last_scratch_gr_reg
= regno
;
1377 /* There must be _something_ available. */
1381 /* Helper function for ia64_compute_frame_size, called through
1382 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1385 mark_reg_gr_used_mask (reg
, data
)
1387 void *data ATTRIBUTE_UNUSED
;
1389 unsigned int regno
= REGNO (reg
);
1392 unsigned int i
, n
= HARD_REGNO_NREGS (regno
, GET_MODE (reg
));
1393 for (i
= 0; i
< n
; ++i
)
1394 current_frame_info
.gr_used_mask
|= 1 << (regno
+ i
);
1398 /* Returns the number of bytes offset between the frame pointer and the stack
1399 pointer for the current function. SIZE is the number of bytes of space
1400 needed for local variables. */
1403 ia64_compute_frame_size (size
)
1406 HOST_WIDE_INT total_size
;
1407 HOST_WIDE_INT spill_size
= 0;
1408 HOST_WIDE_INT extra_spill_size
= 0;
1409 HOST_WIDE_INT pretend_args_size
;
1412 int spilled_gr_p
= 0;
1413 int spilled_fr_p
= 0;
1417 if (current_frame_info
.initialized
)
1420 memset (¤t_frame_info
, 0, sizeof current_frame_info
);
1421 CLEAR_HARD_REG_SET (mask
);
1423 /* Don't allocate scratches to the return register. */
1424 diddle_return_value (mark_reg_gr_used_mask
, NULL
);
1426 /* Don't allocate scratches to the EH scratch registers. */
1427 if (cfun
->machine
->ia64_eh_epilogue_sp
)
1428 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_sp
, NULL
);
1429 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
1430 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_bsp
, NULL
);
1432 /* Find the size of the register stack frame. We have only 80 local
1433 registers, because we reserve 8 for the inputs and 8 for the
1436 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1437 since we'll be adjusting that down later. */
1438 regno
= LOC_REG (78) + ! frame_pointer_needed
;
1439 for (; regno
>= LOC_REG (0); regno
--)
1440 if (regs_ever_live
[regno
])
1442 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
1444 /* For functions marked with the syscall_linkage attribute, we must mark
1445 all eight input registers as in use, so that locals aren't visible to
1448 if (cfun
->machine
->n_varargs
> 0
1449 || lookup_attribute ("syscall_linkage",
1450 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
1451 current_frame_info
.n_input_regs
= 8;
1454 for (regno
= IN_REG (7); regno
>= IN_REG (0); regno
--)
1455 if (regs_ever_live
[regno
])
1457 current_frame_info
.n_input_regs
= regno
- IN_REG (0) + 1;
1460 for (regno
= OUT_REG (7); regno
>= OUT_REG (0); regno
--)
1461 if (regs_ever_live
[regno
])
1463 i
= regno
- OUT_REG (0) + 1;
1465 /* When -p profiling, we need one output register for the mcount argument.
1466 Likwise for -a profiling for the bb_init_func argument. For -ax
1467 profiling, we need two output registers for the two bb_init_trace_func
1469 if (current_function_profile
)
1471 current_frame_info
.n_output_regs
= i
;
1473 /* ??? No rotating register support yet. */
1474 current_frame_info
.n_rotate_regs
= 0;
1476 /* Discover which registers need spilling, and how much room that
1477 will take. Begin with floating point and general registers,
1478 which will always wind up on the stack. */
1480 for (regno
= FR_REG (2); regno
<= FR_REG (127); regno
++)
1481 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1483 SET_HARD_REG_BIT (mask
, regno
);
1489 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1490 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1492 SET_HARD_REG_BIT (mask
, regno
);
1498 for (regno
= BR_REG (1); regno
<= BR_REG (7); regno
++)
1499 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1501 SET_HARD_REG_BIT (mask
, regno
);
1506 /* Now come all special registers that might get saved in other
1507 general registers. */
1509 if (frame_pointer_needed
)
1511 current_frame_info
.reg_fp
= find_gr_spill (1);
1512 /* If we did not get a register, then we take LOC79. This is guaranteed
1513 to be free, even if regs_ever_live is already set, because this is
1514 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1515 as we don't count loc79 above. */
1516 if (current_frame_info
.reg_fp
== 0)
1518 current_frame_info
.reg_fp
= LOC_REG (79);
1519 current_frame_info
.n_local_regs
++;
1523 if (! current_function_is_leaf
)
1525 /* Emit a save of BR0 if we call other functions. Do this even
1526 if this function doesn't return, as EH depends on this to be
1527 able to unwind the stack. */
1528 SET_HARD_REG_BIT (mask
, BR_REG (0));
1530 current_frame_info
.reg_save_b0
= find_gr_spill (1);
1531 if (current_frame_info
.reg_save_b0
== 0)
1537 /* Similarly for ar.pfs. */
1538 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
1539 current_frame_info
.reg_save_ar_pfs
= find_gr_spill (1);
1540 if (current_frame_info
.reg_save_ar_pfs
== 0)
1542 extra_spill_size
+= 8;
1548 if (regs_ever_live
[BR_REG (0)] && ! call_used_regs
[BR_REG (0)])
1550 SET_HARD_REG_BIT (mask
, BR_REG (0));
1556 /* Unwind descriptor hackery: things are most efficient if we allocate
1557 consecutive GR save registers for RP, PFS, FP in that order. However,
1558 it is absolutely critical that FP get the only hard register that's
1559 guaranteed to be free, so we allocated it first. If all three did
1560 happen to be allocated hard regs, and are consecutive, rearrange them
1561 into the preferred order now. */
1562 if (current_frame_info
.reg_fp
!= 0
1563 && current_frame_info
.reg_save_b0
== current_frame_info
.reg_fp
+ 1
1564 && current_frame_info
.reg_save_ar_pfs
== current_frame_info
.reg_fp
+ 2)
1566 current_frame_info
.reg_save_b0
= current_frame_info
.reg_fp
;
1567 current_frame_info
.reg_save_ar_pfs
= current_frame_info
.reg_fp
+ 1;
1568 current_frame_info
.reg_fp
= current_frame_info
.reg_fp
+ 2;
1571 /* See if we need to store the predicate register block. */
1572 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
1573 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1575 if (regno
<= PR_REG (63))
1577 SET_HARD_REG_BIT (mask
, PR_REG (0));
1578 current_frame_info
.reg_save_pr
= find_gr_spill (1);
1579 if (current_frame_info
.reg_save_pr
== 0)
1581 extra_spill_size
+= 8;
1585 /* ??? Mark them all as used so that register renaming and such
1586 are free to use them. */
1587 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
1588 regs_ever_live
[regno
] = 1;
1591 /* If we're forced to use st8.spill, we're forced to save and restore
1593 if (spilled_gr_p
|| cfun
->machine
->n_varargs
)
1595 regs_ever_live
[AR_UNAT_REGNUM
] = 1;
1596 SET_HARD_REG_BIT (mask
, AR_UNAT_REGNUM
);
1597 current_frame_info
.reg_save_ar_unat
= find_gr_spill (spill_size
== 0);
1598 if (current_frame_info
.reg_save_ar_unat
== 0)
1600 extra_spill_size
+= 8;
1605 if (regs_ever_live
[AR_LC_REGNUM
])
1607 SET_HARD_REG_BIT (mask
, AR_LC_REGNUM
);
1608 current_frame_info
.reg_save_ar_lc
= find_gr_spill (spill_size
== 0);
1609 if (current_frame_info
.reg_save_ar_lc
== 0)
1611 extra_spill_size
+= 8;
1616 /* If we have an odd number of words of pretend arguments written to
1617 the stack, then the FR save area will be unaligned. We round the
1618 size of this area up to keep things 16 byte aligned. */
1620 pretend_args_size
= IA64_STACK_ALIGN (current_function_pretend_args_size
);
1622 pretend_args_size
= current_function_pretend_args_size
;
1624 total_size
= (spill_size
+ extra_spill_size
+ size
+ pretend_args_size
1625 + current_function_outgoing_args_size
);
1626 total_size
= IA64_STACK_ALIGN (total_size
);
1628 /* We always use the 16-byte scratch area provided by the caller, but
1629 if we are a leaf function, there's no one to which we need to provide
1631 if (current_function_is_leaf
)
1632 total_size
= MAX (0, total_size
- 16);
1634 current_frame_info
.total_size
= total_size
;
1635 current_frame_info
.spill_cfa_off
= pretend_args_size
- 16;
1636 current_frame_info
.spill_size
= spill_size
;
1637 current_frame_info
.extra_spill_size
= extra_spill_size
;
1638 COPY_HARD_REG_SET (current_frame_info
.mask
, mask
);
1639 current_frame_info
.n_spilled
= n_spilled
;
1640 current_frame_info
.initialized
= reload_completed
;
1643 /* Compute the initial difference between the specified pair of registers. */
1646 ia64_initial_elimination_offset (from
, to
)
1649 HOST_WIDE_INT offset
;
1651 ia64_compute_frame_size (get_frame_size ());
1654 case FRAME_POINTER_REGNUM
:
1655 if (to
== HARD_FRAME_POINTER_REGNUM
)
1657 if (current_function_is_leaf
)
1658 offset
= -current_frame_info
.total_size
;
1660 offset
= -(current_frame_info
.total_size
1661 - current_function_outgoing_args_size
- 16);
1663 else if (to
== STACK_POINTER_REGNUM
)
1665 if (current_function_is_leaf
)
1668 offset
= 16 + current_function_outgoing_args_size
;
1674 case ARG_POINTER_REGNUM
:
1675 /* Arguments start above the 16 byte save area, unless stdarg
1676 in which case we store through the 16 byte save area. */
1677 if (to
== HARD_FRAME_POINTER_REGNUM
)
1678 offset
= 16 - current_function_pretend_args_size
;
1679 else if (to
== STACK_POINTER_REGNUM
)
1680 offset
= (current_frame_info
.total_size
1681 + 16 - current_function_pretend_args_size
);
1686 case RETURN_ADDRESS_POINTER_REGNUM
:
1697 /* If there are more than a trivial number of register spills, we use
1698 two interleaved iterators so that we can get two memory references
1701 In order to simplify things in the prologue and epilogue expanders,
1702 we use helper functions to fix up the memory references after the
1703 fact with the appropriate offsets to a POST_MODIFY memory mode.
1704 The following data structure tracks the state of the two iterators
1705 while insns are being emitted. */
1707 struct spill_fill_data
1709 rtx init_after
; /* point at which to emit initializations */
1710 rtx init_reg
[2]; /* initial base register */
1711 rtx iter_reg
[2]; /* the iterator registers */
1712 rtx
*prev_addr
[2]; /* address of last memory use */
1713 rtx prev_insn
[2]; /* the insn corresponding to prev_addr */
1714 HOST_WIDE_INT prev_off
[2]; /* last offset */
1715 int n_iter
; /* number of iterators in use */
1716 int next_iter
; /* next iterator to use */
1717 unsigned int save_gr_used_mask
;
1720 static struct spill_fill_data spill_fill_data
;
1723 setup_spill_pointers (n_spills
, init_reg
, cfa_off
)
1726 HOST_WIDE_INT cfa_off
;
1730 spill_fill_data
.init_after
= get_last_insn ();
1731 spill_fill_data
.init_reg
[0] = init_reg
;
1732 spill_fill_data
.init_reg
[1] = init_reg
;
1733 spill_fill_data
.prev_addr
[0] = NULL
;
1734 spill_fill_data
.prev_addr
[1] = NULL
;
1735 spill_fill_data
.prev_insn
[0] = NULL
;
1736 spill_fill_data
.prev_insn
[1] = NULL
;
1737 spill_fill_data
.prev_off
[0] = cfa_off
;
1738 spill_fill_data
.prev_off
[1] = cfa_off
;
1739 spill_fill_data
.next_iter
= 0;
1740 spill_fill_data
.save_gr_used_mask
= current_frame_info
.gr_used_mask
;
1742 spill_fill_data
.n_iter
= 1 + (n_spills
> 2);
1743 for (i
= 0; i
< spill_fill_data
.n_iter
; ++i
)
1745 int regno
= next_scratch_gr_reg ();
1746 spill_fill_data
.iter_reg
[i
] = gen_rtx_REG (DImode
, regno
);
1747 current_frame_info
.gr_used_mask
|= 1 << regno
;
1752 finish_spill_pointers ()
1754 current_frame_info
.gr_used_mask
= spill_fill_data
.save_gr_used_mask
;
1758 spill_restore_mem (reg
, cfa_off
)
1760 HOST_WIDE_INT cfa_off
;
1762 int iter
= spill_fill_data
.next_iter
;
1763 HOST_WIDE_INT disp
= spill_fill_data
.prev_off
[iter
] - cfa_off
;
1764 rtx disp_rtx
= GEN_INT (disp
);
1767 if (spill_fill_data
.prev_addr
[iter
])
1769 if (CONST_OK_FOR_N (disp
))
1771 *spill_fill_data
.prev_addr
[iter
]
1772 = gen_rtx_POST_MODIFY (DImode
, spill_fill_data
.iter_reg
[iter
],
1773 gen_rtx_PLUS (DImode
,
1774 spill_fill_data
.iter_reg
[iter
],
1776 REG_NOTES (spill_fill_data
.prev_insn
[iter
])
1777 = gen_rtx_EXPR_LIST (REG_INC
, spill_fill_data
.iter_reg
[iter
],
1778 REG_NOTES (spill_fill_data
.prev_insn
[iter
]));
1782 /* ??? Could use register post_modify for loads. */
1783 if (! CONST_OK_FOR_I (disp
))
1785 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
1786 emit_move_insn (tmp
, disp_rtx
);
1789 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
1790 spill_fill_data
.iter_reg
[iter
], disp_rtx
));
1793 /* Micro-optimization: if we've created a frame pointer, it's at
1794 CFA 0, which may allow the real iterator to be initialized lower,
1795 slightly increasing parallelism. Also, if there are few saves
1796 it may eliminate the iterator entirely. */
1798 && spill_fill_data
.init_reg
[iter
] == stack_pointer_rtx
1799 && frame_pointer_needed
)
1801 mem
= gen_rtx_MEM (GET_MODE (reg
), hard_frame_pointer_rtx
);
1802 set_mem_alias_set (mem
, get_varargs_alias_set ());
1810 seq
= gen_movdi (spill_fill_data
.iter_reg
[iter
],
1811 spill_fill_data
.init_reg
[iter
]);
1816 if (! CONST_OK_FOR_I (disp
))
1818 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
1819 emit_move_insn (tmp
, disp_rtx
);
1823 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
1824 spill_fill_data
.init_reg
[iter
],
1827 seq
= gen_sequence ();
1831 /* Careful for being the first insn in a sequence. */
1832 if (spill_fill_data
.init_after
)
1833 insn
= emit_insn_after (seq
, spill_fill_data
.init_after
);
1836 rtx first
= get_insns ();
1838 insn
= emit_insn_before (seq
, first
);
1840 insn
= emit_insn (seq
);
1842 spill_fill_data
.init_after
= insn
;
1844 /* If DISP is 0, we may or may not have a further adjustment
1845 afterward. If we do, then the load/store insn may be modified
1846 to be a post-modify. If we don't, then this copy may be
1847 eliminated by copyprop_hardreg_forward, which makes this
1848 insn garbage, which runs afoul of the sanity check in
1849 propagate_one_insn. So mark this insn as legal to delete. */
1851 REG_NOTES(insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
,
1855 mem
= gen_rtx_MEM (GET_MODE (reg
), spill_fill_data
.iter_reg
[iter
]);
1857 /* ??? Not all of the spills are for varargs, but some of them are.
1858 The rest of the spills belong in an alias set of their own. But
1859 it doesn't actually hurt to include them here. */
1860 set_mem_alias_set (mem
, get_varargs_alias_set ());
1862 spill_fill_data
.prev_addr
[iter
] = &XEXP (mem
, 0);
1863 spill_fill_data
.prev_off
[iter
] = cfa_off
;
1865 if (++iter
>= spill_fill_data
.n_iter
)
1867 spill_fill_data
.next_iter
= iter
;
1873 do_spill (move_fn
, reg
, cfa_off
, frame_reg
)
1874 rtx (*move_fn
) PARAMS ((rtx
, rtx
, rtx
));
1876 HOST_WIDE_INT cfa_off
;
1878 int iter
= spill_fill_data
.next_iter
;
1881 mem
= spill_restore_mem (reg
, cfa_off
);
1882 insn
= emit_insn ((*move_fn
) (mem
, reg
, GEN_INT (cfa_off
)));
1883 spill_fill_data
.prev_insn
[iter
] = insn
;
1890 RTX_FRAME_RELATED_P (insn
) = 1;
1892 /* Don't even pretend that the unwind code can intuit its way
1893 through a pair of interleaved post_modify iterators. Just
1894 provide the correct answer. */
1896 if (frame_pointer_needed
)
1898 base
= hard_frame_pointer_rtx
;
1903 base
= stack_pointer_rtx
;
1904 off
= current_frame_info
.total_size
- cfa_off
;
1908 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1909 gen_rtx_SET (VOIDmode
,
1910 gen_rtx_MEM (GET_MODE (reg
),
1911 plus_constant (base
, off
)),
1918 do_restore (move_fn
, reg
, cfa_off
)
1919 rtx (*move_fn
) PARAMS ((rtx
, rtx
, rtx
));
1921 HOST_WIDE_INT cfa_off
;
1923 int iter
= spill_fill_data
.next_iter
;
1926 insn
= emit_insn ((*move_fn
) (reg
, spill_restore_mem (reg
, cfa_off
),
1927 GEN_INT (cfa_off
)));
1928 spill_fill_data
.prev_insn
[iter
] = insn
;
1931 /* Wrapper functions that discards the CONST_INT spill offset. These
1932 exist so that we can give gr_spill/gr_fill the offset they need and
1933 use a consistant function interface. */
1936 gen_movdi_x (dest
, src
, offset
)
1938 rtx offset ATTRIBUTE_UNUSED
;
1940 return gen_movdi (dest
, src
);
1944 gen_fr_spill_x (dest
, src
, offset
)
1946 rtx offset ATTRIBUTE_UNUSED
;
1948 return gen_fr_spill (dest
, src
);
1952 gen_fr_restore_x (dest
, src
, offset
)
1954 rtx offset ATTRIBUTE_UNUSED
;
1956 return gen_fr_restore (dest
, src
);
1959 /* Called after register allocation to add any instructions needed for the
1960 prologue. Using a prologue insn is favored compared to putting all of the
1961 instructions in output_function_prologue(), since it allows the scheduler
1962 to intermix instructions with the saves of the caller saved registers. In
1963 some cases, it might be necessary to emit a barrier instruction as the last
1964 insn to prevent such scheduling.
1966 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
1967 so that the debug info generation code can handle them properly.
1969 The register save area is layed out like so:
1971 [ varargs spill area ]
1972 [ fr register spill area ]
1973 [ br register spill area ]
1974 [ ar register spill area ]
1975 [ pr register spill area ]
1976 [ gr register spill area ] */
1978 /* ??? Get inefficient code when the frame size is larger than can fit in an
1979 adds instruction. */
1982 ia64_expand_prologue ()
1984 rtx insn
, ar_pfs_save_reg
, ar_unat_save_reg
;
1985 int i
, epilogue_p
, regno
, alt_regno
, cfa_off
, n_varargs
;
1988 ia64_compute_frame_size (get_frame_size ());
1989 last_scratch_gr_reg
= 15;
1991 /* If there is no epilogue, then we don't need some prologue insns.
1992 We need to avoid emitting the dead prologue insns, because flow
1993 will complain about them. */
1998 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
1999 if ((e
->flags
& EDGE_FAKE
) == 0
2000 && (e
->flags
& EDGE_FALLTHRU
) != 0)
2002 epilogue_p
= (e
!= NULL
);
2007 /* Set the local, input, and output register names. We need to do this
2008 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2009 half. If we use in/loc/out register names, then we get assembler errors
2010 in crtn.S because there is no alloc insn or regstk directive in there. */
2011 if (! TARGET_REG_NAMES
)
2013 int inputs
= current_frame_info
.n_input_regs
;
2014 int locals
= current_frame_info
.n_local_regs
;
2015 int outputs
= current_frame_info
.n_output_regs
;
2017 for (i
= 0; i
< inputs
; i
++)
2018 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
2019 for (i
= 0; i
< locals
; i
++)
2020 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
2021 for (i
= 0; i
< outputs
; i
++)
2022 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
2025 /* Set the frame pointer register name. The regnum is logically loc79,
2026 but of course we'll not have allocated that many locals. Rather than
2027 worrying about renumbering the existing rtxs, we adjust the name. */
2028 /* ??? This code means that we can never use one local register when
2029 there is a frame pointer. loc79 gets wasted in this case, as it is
2030 renamed to a register that will never be used. See also the try_locals
2031 code in find_gr_spill. */
2032 if (current_frame_info
.reg_fp
)
2034 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
2035 reg_names
[HARD_FRAME_POINTER_REGNUM
]
2036 = reg_names
[current_frame_info
.reg_fp
];
2037 reg_names
[current_frame_info
.reg_fp
] = tmp
;
2040 /* Fix up the return address placeholder. */
2041 /* ??? We can fail if __builtin_return_address is used, and we didn't
2042 allocate a register in which to save b0. I can't think of a way to
2043 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
2044 then be sure that I got the right one. Further, reload doesn't seem
2045 to care if an eliminable register isn't used, and "eliminates" it
2047 if (regs_ever_live
[RETURN_ADDRESS_POINTER_REGNUM
]
2048 && current_frame_info
.reg_save_b0
!= 0)
2049 XINT (return_address_pointer_rtx
, 0) = current_frame_info
.reg_save_b0
;
2051 /* We don't need an alloc instruction if we've used no outputs or locals. */
2052 if (current_frame_info
.n_local_regs
== 0
2053 && current_frame_info
.n_output_regs
== 0
2054 && current_frame_info
.n_input_regs
<= current_function_args_info
.int_regs
)
2056 /* If there is no alloc, but there are input registers used, then we
2057 need a .regstk directive. */
2058 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
2059 ar_pfs_save_reg
= NULL_RTX
;
2063 current_frame_info
.need_regstk
= 0;
2065 if (current_frame_info
.reg_save_ar_pfs
)
2066 regno
= current_frame_info
.reg_save_ar_pfs
;
2068 regno
= next_scratch_gr_reg ();
2069 ar_pfs_save_reg
= gen_rtx_REG (DImode
, regno
);
2071 insn
= emit_insn (gen_alloc (ar_pfs_save_reg
,
2072 GEN_INT (current_frame_info
.n_input_regs
),
2073 GEN_INT (current_frame_info
.n_local_regs
),
2074 GEN_INT (current_frame_info
.n_output_regs
),
2075 GEN_INT (current_frame_info
.n_rotate_regs
)));
2076 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_pfs
!= 0);
2079 /* Set up frame pointer, stack pointer, and spill iterators. */
2081 n_varargs
= cfun
->machine
->n_varargs
;
2082 setup_spill_pointers (current_frame_info
.n_spilled
+ n_varargs
,
2083 stack_pointer_rtx
, 0);
2085 if (frame_pointer_needed
)
2087 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
2088 RTX_FRAME_RELATED_P (insn
) = 1;
2091 if (current_frame_info
.total_size
!= 0)
2093 rtx frame_size_rtx
= GEN_INT (- current_frame_info
.total_size
);
2096 if (CONST_OK_FOR_I (- current_frame_info
.total_size
))
2097 offset
= frame_size_rtx
;
2100 regno
= next_scratch_gr_reg ();
2101 offset
= gen_rtx_REG (DImode
, regno
);
2102 emit_move_insn (offset
, frame_size_rtx
);
2105 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
,
2106 stack_pointer_rtx
, offset
));
2108 if (! frame_pointer_needed
)
2110 RTX_FRAME_RELATED_P (insn
) = 1;
2111 if (GET_CODE (offset
) != CONST_INT
)
2114 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2115 gen_rtx_SET (VOIDmode
,
2117 gen_rtx_PLUS (DImode
,
2124 /* ??? At this point we must generate a magic insn that appears to
2125 modify the stack pointer, the frame pointer, and all spill
2126 iterators. This would allow the most scheduling freedom. For
2127 now, just hard stop. */
2128 emit_insn (gen_blockage ());
2131 /* Must copy out ar.unat before doing any integer spills. */
2132 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2134 if (current_frame_info
.reg_save_ar_unat
)
2136 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2139 alt_regno
= next_scratch_gr_reg ();
2140 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2141 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2144 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2145 insn
= emit_move_insn (ar_unat_save_reg
, reg
);
2146 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_unat
!= 0);
2148 /* Even if we're not going to generate an epilogue, we still
2149 need to save the register so that EH works. */
2150 if (! epilogue_p
&& current_frame_info
.reg_save_ar_unat
)
2151 emit_insn (gen_prologue_use (ar_unat_save_reg
));
2154 ar_unat_save_reg
= NULL_RTX
;
2156 /* Spill all varargs registers. Do this before spilling any GR registers,
2157 since we want the UNAT bits for the GR registers to override the UNAT
2158 bits from varargs, which we don't care about. */
2161 for (regno
= GR_ARG_FIRST
+ 7; n_varargs
> 0; --n_varargs
, --regno
)
2163 reg
= gen_rtx_REG (DImode
, regno
);
2164 do_spill (gen_gr_spill
, reg
, cfa_off
+= 8, NULL_RTX
);
2167 /* Locate the bottom of the register save area. */
2168 cfa_off
= (current_frame_info
.spill_cfa_off
2169 + current_frame_info
.spill_size
2170 + current_frame_info
.extra_spill_size
);
2172 /* Save the predicate register block either in a register or in memory. */
2173 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2175 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2176 if (current_frame_info
.reg_save_pr
!= 0)
2178 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2179 insn
= emit_move_insn (alt_reg
, reg
);
2181 /* ??? Denote pr spill/fill by a DImode move that modifies all
2182 64 hard registers. */
2183 RTX_FRAME_RELATED_P (insn
) = 1;
2185 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2186 gen_rtx_SET (VOIDmode
, alt_reg
, reg
),
2189 /* Even if we're not going to generate an epilogue, we still
2190 need to save the register so that EH works. */
2192 emit_insn (gen_prologue_use (alt_reg
));
2196 alt_regno
= next_scratch_gr_reg ();
2197 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2198 insn
= emit_move_insn (alt_reg
, reg
);
2199 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2204 /* Handle AR regs in numerical order. All of them get special handling. */
2205 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
2206 && current_frame_info
.reg_save_ar_unat
== 0)
2208 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2209 do_spill (gen_movdi_x
, ar_unat_save_reg
, cfa_off
, reg
);
2213 /* The alloc insn already copied ar.pfs into a general register. The
2214 only thing we have to do now is copy that register to a stack slot
2215 if we'd not allocated a local register for the job. */
2216 if (current_frame_info
.reg_save_ar_pfs
== 0
2217 && ! current_function_is_leaf
)
2219 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2220 do_spill (gen_movdi_x
, ar_pfs_save_reg
, cfa_off
, reg
);
2224 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2226 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2227 if (current_frame_info
.reg_save_ar_lc
!= 0)
2229 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2230 insn
= emit_move_insn (alt_reg
, reg
);
2231 RTX_FRAME_RELATED_P (insn
) = 1;
2233 /* Even if we're not going to generate an epilogue, we still
2234 need to save the register so that EH works. */
2236 emit_insn (gen_prologue_use (alt_reg
));
2240 alt_regno
= next_scratch_gr_reg ();
2241 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2242 emit_move_insn (alt_reg
, reg
);
2243 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2248 /* We should now be at the base of the gr/br/fr spill area. */
2249 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2250 + current_frame_info
.spill_size
))
2253 /* Spill all general registers. */
2254 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2255 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2257 reg
= gen_rtx_REG (DImode
, regno
);
2258 do_spill (gen_gr_spill
, reg
, cfa_off
, reg
);
2262 /* Handle BR0 specially -- it may be getting stored permanently in
2263 some GR register. */
2264 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2266 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2267 if (current_frame_info
.reg_save_b0
!= 0)
2269 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2270 insn
= emit_move_insn (alt_reg
, reg
);
2271 RTX_FRAME_RELATED_P (insn
) = 1;
2273 /* Even if we're not going to generate an epilogue, we still
2274 need to save the register so that EH works. */
2276 emit_insn (gen_prologue_use (alt_reg
));
2280 alt_regno
= next_scratch_gr_reg ();
2281 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2282 emit_move_insn (alt_reg
, reg
);
2283 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2288 /* Spill the rest of the BR registers. */
2289 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2290 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2292 alt_regno
= next_scratch_gr_reg ();
2293 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2294 reg
= gen_rtx_REG (DImode
, regno
);
2295 emit_move_insn (alt_reg
, reg
);
2296 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2300 /* Align the frame and spill all FR registers. */
2301 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2302 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2306 reg
= gen_rtx_REG (TFmode
, regno
);
2307 do_spill (gen_fr_spill_x
, reg
, cfa_off
, reg
);
2311 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2314 finish_spill_pointers ();
2317 /* Called after register allocation to add any instructions needed for the
2318 epilogue. Using an epilogue insn is favored compared to putting all of the
2319 instructions in output_function_prologue(), since it allows the scheduler
2320 to intermix instructions with the saves of the caller saved registers. In
2321 some cases, it might be necessary to emit a barrier instruction as the last
2322 insn to prevent such scheduling. */
2325 ia64_expand_epilogue (sibcall_p
)
2328 rtx insn
, reg
, alt_reg
, ar_unat_save_reg
;
2329 int regno
, alt_regno
, cfa_off
;
2331 ia64_compute_frame_size (get_frame_size ());
2333 /* If there is a frame pointer, then we use it instead of the stack
2334 pointer, so that the stack pointer does not need to be valid when
2335 the epilogue starts. See EXIT_IGNORE_STACK. */
2336 if (frame_pointer_needed
)
2337 setup_spill_pointers (current_frame_info
.n_spilled
,
2338 hard_frame_pointer_rtx
, 0);
2340 setup_spill_pointers (current_frame_info
.n_spilled
, stack_pointer_rtx
,
2341 current_frame_info
.total_size
);
2343 if (current_frame_info
.total_size
!= 0)
2345 /* ??? At this point we must generate a magic insn that appears to
2346 modify the spill iterators and the frame pointer. This would
2347 allow the most scheduling freedom. For now, just hard stop. */
2348 emit_insn (gen_blockage ());
2351 /* Locate the bottom of the register save area. */
2352 cfa_off
= (current_frame_info
.spill_cfa_off
2353 + current_frame_info
.spill_size
2354 + current_frame_info
.extra_spill_size
);
2356 /* Restore the predicate registers. */
2357 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2359 if (current_frame_info
.reg_save_pr
!= 0)
2360 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2363 alt_regno
= next_scratch_gr_reg ();
2364 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2365 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2368 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2369 emit_move_insn (reg
, alt_reg
);
2372 /* Restore the application registers. */
2374 /* Load the saved unat from the stack, but do not restore it until
2375 after the GRs have been restored. */
2376 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2378 if (current_frame_info
.reg_save_ar_unat
!= 0)
2380 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2383 alt_regno
= next_scratch_gr_reg ();
2384 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2385 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2386 do_restore (gen_movdi_x
, ar_unat_save_reg
, cfa_off
);
2391 ar_unat_save_reg
= NULL_RTX
;
2393 if (current_frame_info
.reg_save_ar_pfs
!= 0)
2395 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_pfs
);
2396 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2397 emit_move_insn (reg
, alt_reg
);
2399 else if (! current_function_is_leaf
)
2401 alt_regno
= next_scratch_gr_reg ();
2402 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2403 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2405 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2406 emit_move_insn (reg
, alt_reg
);
2409 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2411 if (current_frame_info
.reg_save_ar_lc
!= 0)
2412 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2415 alt_regno
= next_scratch_gr_reg ();
2416 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2417 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2420 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2421 emit_move_insn (reg
, alt_reg
);
2424 /* We should now be at the base of the gr/br/fr spill area. */
2425 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2426 + current_frame_info
.spill_size
))
2429 /* Restore all general registers. */
2430 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2431 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2433 reg
= gen_rtx_REG (DImode
, regno
);
2434 do_restore (gen_gr_restore
, reg
, cfa_off
);
2438 /* Restore the branch registers. Handle B0 specially, as it may
2439 have gotten stored in some GR register. */
2440 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2442 if (current_frame_info
.reg_save_b0
!= 0)
2443 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2446 alt_regno
= next_scratch_gr_reg ();
2447 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2448 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2451 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2452 emit_move_insn (reg
, alt_reg
);
2455 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2456 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2458 alt_regno
= next_scratch_gr_reg ();
2459 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2460 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2462 reg
= gen_rtx_REG (DImode
, regno
);
2463 emit_move_insn (reg
, alt_reg
);
2466 /* Restore floating point registers. */
2467 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2468 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2472 reg
= gen_rtx_REG (TFmode
, regno
);
2473 do_restore (gen_fr_restore_x
, reg
, cfa_off
);
2477 /* Restore ar.unat for real. */
2478 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2480 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2481 emit_move_insn (reg
, ar_unat_save_reg
);
2484 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2487 finish_spill_pointers ();
2489 if (current_frame_info
.total_size
|| cfun
->machine
->ia64_eh_epilogue_sp
)
2491 /* ??? At this point we must generate a magic insn that appears to
2492 modify the spill iterators, the stack pointer, and the frame
2493 pointer. This would allow the most scheduling freedom. For now,
2495 emit_insn (gen_blockage ());
2498 if (cfun
->machine
->ia64_eh_epilogue_sp
)
2499 emit_move_insn (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
);
2500 else if (frame_pointer_needed
)
2502 insn
= emit_move_insn (stack_pointer_rtx
, hard_frame_pointer_rtx
);
2503 RTX_FRAME_RELATED_P (insn
) = 1;
2505 else if (current_frame_info
.total_size
)
2507 rtx offset
, frame_size_rtx
;
2509 frame_size_rtx
= GEN_INT (current_frame_info
.total_size
);
2510 if (CONST_OK_FOR_I (current_frame_info
.total_size
))
2511 offset
= frame_size_rtx
;
2514 regno
= next_scratch_gr_reg ();
2515 offset
= gen_rtx_REG (DImode
, regno
);
2516 emit_move_insn (offset
, frame_size_rtx
);
2519 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
2522 RTX_FRAME_RELATED_P (insn
) = 1;
2523 if (GET_CODE (offset
) != CONST_INT
)
2526 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2527 gen_rtx_SET (VOIDmode
,
2529 gen_rtx_PLUS (DImode
,
2536 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
2537 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
2540 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
2543 int fp
= GR_REG (2);
2544 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2545 first available call clobbered register. If there was a frame_pointer
2546 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2547 so we have to make sure we're using the string "r2" when emitting
2548 the register name for the assmbler. */
2549 if (current_frame_info
.reg_fp
&& current_frame_info
.reg_fp
== GR_REG (2))
2550 fp
= HARD_FRAME_POINTER_REGNUM
;
2552 /* We must emit an alloc to force the input registers to become output
2553 registers. Otherwise, if the callee tries to pass its parameters
2554 through to another call without an intervening alloc, then these
2556 /* ??? We don't need to preserve all input registers. We only need to
2557 preserve those input registers used as arguments to the sibling call.
2558 It is unclear how to compute that number here. */
2559 if (current_frame_info
.n_input_regs
!= 0)
2560 emit_insn (gen_alloc (gen_rtx_REG (DImode
, fp
),
2561 GEN_INT (0), GEN_INT (0),
2562 GEN_INT (current_frame_info
.n_input_regs
),
2567 /* Return 1 if br.ret can do all the work required to return from a
2571 ia64_direct_return ()
2573 if (reload_completed
&& ! frame_pointer_needed
)
2575 ia64_compute_frame_size (get_frame_size ());
2577 return (current_frame_info
.total_size
== 0
2578 && current_frame_info
.n_spilled
== 0
2579 && current_frame_info
.reg_save_b0
== 0
2580 && current_frame_info
.reg_save_pr
== 0
2581 && current_frame_info
.reg_save_ar_pfs
== 0
2582 && current_frame_info
.reg_save_ar_unat
== 0
2583 && current_frame_info
.reg_save_ar_lc
== 0);
2589 ia64_hard_regno_rename_ok (from
, to
)
2593 /* Don't clobber any of the registers we reserved for the prologue. */
2594 if (to
== current_frame_info
.reg_fp
2595 || to
== current_frame_info
.reg_save_b0
2596 || to
== current_frame_info
.reg_save_pr
2597 || to
== current_frame_info
.reg_save_ar_pfs
2598 || to
== current_frame_info
.reg_save_ar_unat
2599 || to
== current_frame_info
.reg_save_ar_lc
)
2602 if (from
== current_frame_info
.reg_fp
2603 || from
== current_frame_info
.reg_save_b0
2604 || from
== current_frame_info
.reg_save_pr
2605 || from
== current_frame_info
.reg_save_ar_pfs
2606 || from
== current_frame_info
.reg_save_ar_unat
2607 || from
== current_frame_info
.reg_save_ar_lc
)
2610 /* Don't use output registers outside the register frame. */
2611 if (OUT_REGNO_P (to
) && to
>= OUT_REG (current_frame_info
.n_output_regs
))
2614 /* Retain even/oddness on predicate register pairs. */
2615 if (PR_REGNO_P (from
) && PR_REGNO_P (to
))
2616 return (from
& 1) == (to
& 1);
2618 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2619 if (from
== GR_REG (4) && current_function_calls_setjmp
)
2625 /* Target hook for assembling integer objects. Handle word-sized
2626 aligned objects and detect the cases when @fptr is needed. */
2629 ia64_assemble_integer (x
, size
, aligned_p
)
2634 if (size
== UNITS_PER_WORD
&& aligned_p
2635 && !(TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
2636 && GET_CODE (x
) == SYMBOL_REF
2637 && SYMBOL_REF_FLAG (x
))
2639 fputs ("\tdata8\t@fptr(", asm_out_file
);
2640 output_addr_const (asm_out_file
, x
);
2641 fputs (")\n", asm_out_file
);
2644 return default_assemble_integer (x
, size
, aligned_p
);
2647 /* Emit the function prologue. */
2650 ia64_output_function_prologue (file
, size
)
2652 HOST_WIDE_INT size ATTRIBUTE_UNUSED
;
2654 int mask
, grsave
, grsave_prev
;
2656 if (current_frame_info
.need_regstk
)
2657 fprintf (file
, "\t.regstk %d, %d, %d, %d\n",
2658 current_frame_info
.n_input_regs
,
2659 current_frame_info
.n_local_regs
,
2660 current_frame_info
.n_output_regs
,
2661 current_frame_info
.n_rotate_regs
);
2663 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
2666 /* Emit the .prologue directive. */
2669 grsave
= grsave_prev
= 0;
2670 if (current_frame_info
.reg_save_b0
!= 0)
2673 grsave
= grsave_prev
= current_frame_info
.reg_save_b0
;
2675 if (current_frame_info
.reg_save_ar_pfs
!= 0
2676 && (grsave_prev
== 0
2677 || current_frame_info
.reg_save_ar_pfs
== grsave_prev
+ 1))
2680 if (grsave_prev
== 0)
2681 grsave
= current_frame_info
.reg_save_ar_pfs
;
2682 grsave_prev
= current_frame_info
.reg_save_ar_pfs
;
2684 if (current_frame_info
.reg_fp
!= 0
2685 && (grsave_prev
== 0
2686 || current_frame_info
.reg_fp
== grsave_prev
+ 1))
2689 if (grsave_prev
== 0)
2690 grsave
= HARD_FRAME_POINTER_REGNUM
;
2691 grsave_prev
= current_frame_info
.reg_fp
;
2693 if (current_frame_info
.reg_save_pr
!= 0
2694 && (grsave_prev
== 0
2695 || current_frame_info
.reg_save_pr
== grsave_prev
+ 1))
2698 if (grsave_prev
== 0)
2699 grsave
= current_frame_info
.reg_save_pr
;
2703 fprintf (file
, "\t.prologue %d, %d\n", mask
,
2704 ia64_dbx_register_number (grsave
));
2706 fputs ("\t.prologue\n", file
);
2708 /* Emit a .spill directive, if necessary, to relocate the base of
2709 the register spill area. */
2710 if (current_frame_info
.spill_cfa_off
!= -16)
2711 fprintf (file
, "\t.spill %ld\n",
2712 (long) (current_frame_info
.spill_cfa_off
2713 + current_frame_info
.spill_size
));
2716 /* Emit the .body directive at the scheduled end of the prologue. */
2719 ia64_output_function_end_prologue (file
)
2722 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
2725 fputs ("\t.body\n", file
);
2728 /* Emit the function epilogue. */
2731 ia64_output_function_epilogue (file
, size
)
2732 FILE *file ATTRIBUTE_UNUSED
;
2733 HOST_WIDE_INT size ATTRIBUTE_UNUSED
;
2737 /* Reset from the function's potential modifications. */
2738 XINT (return_address_pointer_rtx
, 0) = RETURN_ADDRESS_POINTER_REGNUM
;
2740 if (current_frame_info
.reg_fp
)
2742 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
2743 reg_names
[HARD_FRAME_POINTER_REGNUM
]
2744 = reg_names
[current_frame_info
.reg_fp
];
2745 reg_names
[current_frame_info
.reg_fp
] = tmp
;
2747 if (! TARGET_REG_NAMES
)
2749 for (i
= 0; i
< current_frame_info
.n_input_regs
; i
++)
2750 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
2751 for (i
= 0; i
< current_frame_info
.n_local_regs
; i
++)
2752 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
2753 for (i
= 0; i
< current_frame_info
.n_output_regs
; i
++)
2754 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
2757 current_frame_info
.initialized
= 0;
2761 ia64_dbx_register_number (regno
)
2764 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2765 from its home at loc79 to something inside the register frame. We
2766 must perform the same renumbering here for the debug info. */
2767 if (current_frame_info
.reg_fp
)
2769 if (regno
== HARD_FRAME_POINTER_REGNUM
)
2770 regno
= current_frame_info
.reg_fp
;
2771 else if (regno
== current_frame_info
.reg_fp
)
2772 regno
= HARD_FRAME_POINTER_REGNUM
;
2775 if (IN_REGNO_P (regno
))
2776 return 32 + regno
- IN_REG (0);
2777 else if (LOC_REGNO_P (regno
))
2778 return 32 + current_frame_info
.n_input_regs
+ regno
- LOC_REG (0);
2779 else if (OUT_REGNO_P (regno
))
2780 return (32 + current_frame_info
.n_input_regs
2781 + current_frame_info
.n_local_regs
+ regno
- OUT_REG (0));
2787 ia64_initialize_trampoline (addr
, fnaddr
, static_chain
)
2788 rtx addr
, fnaddr
, static_chain
;
2790 rtx addr_reg
, eight
= GEN_INT (8);
2792 /* Load up our iterator. */
2793 addr_reg
= gen_reg_rtx (Pmode
);
2794 emit_move_insn (addr_reg
, addr
);
2796 /* The first two words are the fake descriptor:
2797 __ia64_trampoline, ADDR+16. */
2798 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
2799 gen_rtx_SYMBOL_REF (Pmode
, "__ia64_trampoline"));
2800 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2802 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
2803 copy_to_reg (plus_constant (addr
, 16)));
2804 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2806 /* The third word is the target descriptor. */
2807 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), fnaddr
);
2808 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2810 /* The fourth word is the static chain. */
2811 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), static_chain
);
2814 /* Do any needed setup for a variadic function. CUM has not been updated
2815 for the last named argument which has type TYPE and mode MODE.
2817 We generate the actual spill instructions during prologue generation. */
2820 ia64_setup_incoming_varargs (cum
, int_mode
, type
, pretend_size
, second_time
)
2821 CUMULATIVE_ARGS cum
;
2825 int second_time ATTRIBUTE_UNUSED
;
2827 /* If this is a stdarg function, then skip the current argument. */
2828 if (! current_function_varargs
)
2829 ia64_function_arg_advance (&cum
, int_mode
, type
, 1);
2831 if (cum
.words
< MAX_ARGUMENT_SLOTS
)
2833 int n
= MAX_ARGUMENT_SLOTS
- cum
.words
;
2834 *pretend_size
= n
* UNITS_PER_WORD
;
2835 cfun
->machine
->n_varargs
= n
;
2839 /* Check whether TYPE is a homogeneous floating point aggregate. If
2840 it is, return the mode of the floating point type that appears
2841 in all leafs. If it is not, return VOIDmode.
2843 An aggregate is a homogeneous floating point aggregate is if all
2844 fields/elements in it have the same floating point type (e.g,
2845 SFmode). 128-bit quad-precision floats are excluded. */
2847 static enum machine_mode
2848 hfa_element_mode (type
, nested
)
2852 enum machine_mode element_mode
= VOIDmode
;
2853 enum machine_mode mode
;
2854 enum tree_code code
= TREE_CODE (type
);
2855 int know_element_mode
= 0;
2860 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
2861 case BOOLEAN_TYPE
: case CHAR_TYPE
: case POINTER_TYPE
:
2862 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
2863 case FILE_TYPE
: case SET_TYPE
: case LANG_TYPE
:
2867 /* Fortran complex types are supposed to be HFAs, so we need to handle
2868 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2871 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
)
2872 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type
))
2873 * BITS_PER_UNIT
, MODE_FLOAT
, 0);
2878 /* ??? Should exclude 128-bit long double here. */
2879 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2880 mode if this is contained within an aggregate. */
2882 return TYPE_MODE (type
);
2887 return hfa_element_mode (TREE_TYPE (type
), 1);
2891 case QUAL_UNION_TYPE
:
2892 for (t
= TYPE_FIELDS (type
); t
; t
= TREE_CHAIN (t
))
2894 if (TREE_CODE (t
) != FIELD_DECL
)
2897 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
2898 if (know_element_mode
)
2900 if (mode
!= element_mode
)
2903 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
2907 know_element_mode
= 1;
2908 element_mode
= mode
;
2911 return element_mode
;
2914 /* If we reach here, we probably have some front-end specific type
2915 that the backend doesn't know about. This can happen via the
2916 aggregate_value_p call in init_function_start. All we can do is
2917 ignore unknown tree types. */
2924 /* Return rtx for register where argument is passed, or zero if it is passed
2927 /* ??? 128-bit quad-precision floats are always passed in general
2931 ia64_function_arg (cum
, mode
, type
, named
, incoming
)
2932 CUMULATIVE_ARGS
*cum
;
2933 enum machine_mode mode
;
2938 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
2939 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
2940 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
2943 enum machine_mode hfa_mode
= VOIDmode
;
2945 /* Integer and float arguments larger than 8 bytes start at the next even
2946 boundary. Aggregates larger than 8 bytes start at the next even boundary
2947 if the aggregate has 16 byte alignment. Net effect is that types with
2948 alignment greater than 8 start at the next even boundary. */
2949 /* ??? The ABI does not specify how to handle aggregates with alignment from
2950 9 to 15 bytes, or greater than 16. We handle them all as if they had
2951 16 byte alignment. Such aggregates can occur only if gcc extensions are
2953 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
2955 && (cum
->words
& 1))
2958 /* If all argument slots are used, then it must go on the stack. */
2959 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
2962 /* Check for and handle homogeneous FP aggregates. */
2964 hfa_mode
= hfa_element_mode (type
, 0);
2966 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2967 and unprototyped hfas are passed specially. */
2968 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
2972 int fp_regs
= cum
->fp_regs
;
2973 int int_regs
= cum
->words
+ offset
;
2974 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
2978 /* If prototyped, pass it in FR regs then GR regs.
2979 If not prototyped, pass it in both FR and GR regs.
2981 If this is an SFmode aggregate, then it is possible to run out of
2982 FR regs while GR regs are still left. In that case, we pass the
2983 remaining part in the GR regs. */
2985 /* Fill the FP regs. We do this always. We stop if we reach the end
2986 of the argument, the last FP register, or the last argument slot. */
2988 byte_size
= ((mode
== BLKmode
)
2989 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
2990 args_byte_size
= int_regs
* UNITS_PER_WORD
;
2992 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
2993 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
2995 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
2996 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
3000 args_byte_size
+= hfa_size
;
3004 /* If no prototype, then the whole thing must go in GR regs. */
3005 if (! cum
->prototype
)
3007 /* If this is an SFmode aggregate, then we might have some left over
3008 that needs to go in GR regs. */
3009 else if (byte_size
!= offset
)
3010 int_regs
+= offset
/ UNITS_PER_WORD
;
3012 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3014 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
3016 enum machine_mode gr_mode
= DImode
;
3018 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3019 then this goes in a GR reg left adjusted/little endian, right
3020 adjusted/big endian. */
3021 /* ??? Currently this is handled wrong, because 4-byte hunks are
3022 always right adjusted/little endian. */
3025 /* If we have an even 4 byte hunk because the aggregate is a
3026 multiple of 4 bytes in size, then this goes in a GR reg right
3027 adjusted/little endian. */
3028 else if (byte_size
- offset
== 4)
3030 /* Complex floats need to have float mode. */
3031 if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
3034 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3035 gen_rtx_REG (gr_mode
, (basereg
3038 offset
+= GET_MODE_SIZE (gr_mode
);
3039 int_regs
+= GET_MODE_SIZE (gr_mode
) <= UNITS_PER_WORD
3040 ? 1 : GET_MODE_SIZE (gr_mode
) / UNITS_PER_WORD
;
3043 /* If we ended up using just one location, just return that one loc. */
3045 return XEXP (loc
[0], 0);
3047 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3050 /* Integral and aggregates go in general registers. If we have run out of
3051 FR registers, then FP values must also go in general registers. This can
3052 happen when we have a SFmode HFA. */
3053 else if (((mode
== TFmode
) && ! INTEL_EXTENDED_IEEE_FORMAT
)
3054 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
3055 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
3057 /* If there is a prototype, then FP values go in a FR register when
3058 named, and in a GR registeer when unnamed. */
3059 else if (cum
->prototype
)
3062 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
3064 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
3066 /* If there is no prototype, then FP values go in both FR and GR
3070 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3071 gen_rtx_REG (mode
, (FR_ARG_FIRST
3074 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3076 (basereg
+ cum
->words
3080 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, fp_reg
, gr_reg
));
3084 /* Return number of words, at the beginning of the argument, that must be
3085 put in registers. 0 is the argument is entirely in registers or entirely
3089 ia64_function_arg_partial_nregs (cum
, mode
, type
, named
)
3090 CUMULATIVE_ARGS
*cum
;
3091 enum machine_mode mode
;
3093 int named ATTRIBUTE_UNUSED
;
3095 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
3096 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
3100 /* Arguments with alignment larger than 8 bytes start at the next even
3102 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3104 && (cum
->words
& 1))
3107 /* If all argument slots are used, then it must go on the stack. */
3108 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
3111 /* It doesn't matter whether the argument goes in FR or GR regs. If
3112 it fits within the 8 argument slots, then it goes entirely in
3113 registers. If it extends past the last argument slot, then the rest
3114 goes on the stack. */
3116 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
3119 return MAX_ARGUMENT_SLOTS
- cum
->words
- offset
;
3122 /* Update CUM to point after this argument. This is patterned after
3123 ia64_function_arg. */
3126 ia64_function_arg_advance (cum
, mode
, type
, named
)
3127 CUMULATIVE_ARGS
*cum
;
3128 enum machine_mode mode
;
3132 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
3133 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
3136 enum machine_mode hfa_mode
= VOIDmode
;
3138 /* If all arg slots are already full, then there is nothing to do. */
3139 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
3142 /* Arguments with alignment larger than 8 bytes start at the next even
3144 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3146 && (cum
->words
& 1))
3149 cum
->words
+= words
+ offset
;
3151 /* Check for and handle homogeneous FP aggregates. */
3153 hfa_mode
= hfa_element_mode (type
, 0);
3155 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3156 and unprototyped hfas are passed specially. */
3157 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
3159 int fp_regs
= cum
->fp_regs
;
3160 /* This is the original value of cum->words + offset. */
3161 int int_regs
= cum
->words
- words
;
3162 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3166 /* If prototyped, pass it in FR regs then GR regs.
3167 If not prototyped, pass it in both FR and GR regs.
3169 If this is an SFmode aggregate, then it is possible to run out of
3170 FR regs while GR regs are still left. In that case, we pass the
3171 remaining part in the GR regs. */
3173 /* Fill the FP regs. We do this always. We stop if we reach the end
3174 of the argument, the last FP register, or the last argument slot. */
3176 byte_size
= ((mode
== BLKmode
)
3177 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3178 args_byte_size
= int_regs
* UNITS_PER_WORD
;
3180 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
3181 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
3184 args_byte_size
+= hfa_size
;
3188 cum
->fp_regs
= fp_regs
;
3191 /* Integral and aggregates go in general registers. If we have run out of
3192 FR registers, then FP values must also go in general registers. This can
3193 happen when we have a SFmode HFA. */
3194 else if (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
)
3195 cum
->int_regs
= cum
->words
;
3197 /* If there is a prototype, then FP values go in a FR register when
3198 named, and in a GR registeer when unnamed. */
3199 else if (cum
->prototype
)
3202 cum
->int_regs
= cum
->words
;
3204 /* ??? Complex types should not reach here. */
3205 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3207 /* If there is no prototype, then FP values go in both FR and GR
3211 /* ??? Complex types should not reach here. */
3212 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3213 cum
->int_regs
= cum
->words
;
3217 /* Variable sized types are passed by reference. */
3218 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3221 ia64_function_arg_pass_by_reference (cum
, mode
, type
, named
)
3222 CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
;
3223 enum machine_mode mode ATTRIBUTE_UNUSED
;
3225 int named ATTRIBUTE_UNUSED
;
3227 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3230 /* Implement va_start. */
3233 ia64_va_start (stdarg_p
, valist
, nextarg
)
3241 arg_words
= current_function_args_info
.words
;
3246 ofs
= (arg_words
>= MAX_ARGUMENT_SLOTS
? -UNITS_PER_WORD
: 0);
3248 nextarg
= plus_constant (nextarg
, ofs
);
3249 std_expand_builtin_va_start (1, valist
, nextarg
);
3252 /* Implement va_arg. */
3255 ia64_va_arg (valist
, type
)
3260 /* Variable sized types are passed by reference. */
3261 if (TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
3263 rtx addr
= std_expand_builtin_va_arg (valist
, build_pointer_type (type
));
3264 return gen_rtx_MEM (ptr_mode
, force_reg (Pmode
, addr
));
3267 /* Arguments with alignment larger than 8 bytes start at the next even
3269 if (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3271 t
= build (PLUS_EXPR
, TREE_TYPE (valist
), valist
,
3272 build_int_2 (2 * UNITS_PER_WORD
- 1, 0));
3273 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
3274 build_int_2 (-2 * UNITS_PER_WORD
, -1));
3275 t
= build (MODIFY_EXPR
, TREE_TYPE (valist
), valist
, t
);
3276 TREE_SIDE_EFFECTS (t
) = 1;
3277 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3280 return std_expand_builtin_va_arg (valist
, type
);
3283 /* Return 1 if function return value returned in memory. Return 0 if it is
3287 ia64_return_in_memory (valtype
)
3290 enum machine_mode mode
;
3291 enum machine_mode hfa_mode
;
3292 HOST_WIDE_INT byte_size
;
3294 mode
= TYPE_MODE (valtype
);
3295 byte_size
= GET_MODE_SIZE (mode
);
3296 if (mode
== BLKmode
)
3298 byte_size
= int_size_in_bytes (valtype
);
3303 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3305 hfa_mode
= hfa_element_mode (valtype
, 0);
3306 if (hfa_mode
!= VOIDmode
)
3308 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3310 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
3315 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
3321 /* Return rtx for register that holds the function return value. */
3324 ia64_function_value (valtype
, func
)
3326 tree func ATTRIBUTE_UNUSED
;
3328 enum machine_mode mode
;
3329 enum machine_mode hfa_mode
;
3331 mode
= TYPE_MODE (valtype
);
3332 hfa_mode
= hfa_element_mode (valtype
, 0);
3334 if (hfa_mode
!= VOIDmode
)
3342 hfa_size
= GET_MODE_SIZE (hfa_mode
);
3343 byte_size
= ((mode
== BLKmode
)
3344 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
3346 for (i
= 0; offset
< byte_size
; i
++)
3348 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3349 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
3355 return XEXP (loc
[0], 0);
3357 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3359 else if (FLOAT_TYPE_P (valtype
) &&
3360 ((mode
!= TFmode
) || INTEL_EXTENDED_IEEE_FORMAT
))
3361 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
3363 return gen_rtx_REG (mode
, GR_RET_FIRST
);
3366 /* Print a memory address as an operand to reference that memory location. */
3368 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3369 also call this from ia64_print_operand for memory addresses. */
3372 ia64_print_operand_address (stream
, address
)
3373 FILE * stream ATTRIBUTE_UNUSED
;
3374 rtx address ATTRIBUTE_UNUSED
;
3378 /* Print an operand to an assembler instruction.
3379 C Swap and print a comparison operator.
3380 D Print an FP comparison operator.
3381 E Print 32 - constant, for SImode shifts as extract.
3382 e Print 64 - constant, for DImode rotates.
3383 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3384 a floating point register emitted normally.
3385 I Invert a predicate register by adding 1.
3386 J Select the proper predicate register for a condition.
3387 j Select the inverse predicate register for a condition.
3388 O Append .acq for volatile load.
3389 P Postincrement of a MEM.
3390 Q Append .rel for volatile store.
3391 S Shift amount for shladd instruction.
3392 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3393 for Intel assembler.
3394 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3395 for Intel assembler.
3396 r Print register name, or constant 0 as r0. HP compatibility for
3399 ia64_print_operand (file
, x
, code
)
3409 /* Handled below. */
3414 enum rtx_code c
= swap_condition (GET_CODE (x
));
3415 fputs (GET_RTX_NAME (c
), file
);
3420 switch (GET_CODE (x
))
3432 str
= GET_RTX_NAME (GET_CODE (x
));
3439 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
3443 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - INTVAL (x
));
3447 if (x
== CONST0_RTX (GET_MODE (x
)))
3448 str
= reg_names
[FR_REG (0)];
3449 else if (x
== CONST1_RTX (GET_MODE (x
)))
3450 str
= reg_names
[FR_REG (1)];
3451 else if (GET_CODE (x
) == REG
)
3452 str
= reg_names
[REGNO (x
)];
3459 fputs (reg_names
[REGNO (x
) + 1], file
);
3465 unsigned int regno
= REGNO (XEXP (x
, 0));
3466 if (GET_CODE (x
) == EQ
)
3470 fputs (reg_names
[regno
], file
);
3475 if (MEM_VOLATILE_P (x
))
3476 fputs(".acq", file
);
3481 HOST_WIDE_INT value
;
3483 switch (GET_CODE (XEXP (x
, 0)))
3489 x
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
3490 if (GET_CODE (x
) == CONST_INT
)
3492 else if (GET_CODE (x
) == REG
)
3494 fprintf (file
, ", %s", reg_names
[REGNO (x
)]);
3502 value
= GET_MODE_SIZE (GET_MODE (x
));
3506 value
= - (HOST_WIDE_INT
) GET_MODE_SIZE (GET_MODE (x
));
3512 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, value
);
3517 if (MEM_VOLATILE_P (x
))
3518 fputs(".rel", file
);
3522 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
3526 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
3528 fprintf (file
, "0x%x", (int) INTVAL (x
) & 0xffffffff);
3534 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
3536 const char *prefix
= "0x";
3537 if (INTVAL (x
) & 0x80000000)
3539 fprintf (file
, "0xffffffff");
3542 fprintf (file
, "%s%x", prefix
, (int) INTVAL (x
) & 0xffffffff);
3548 /* If this operand is the constant zero, write it as register zero.
3549 Any register, zero, or CONST_INT value is OK here. */
3550 if (GET_CODE (x
) == REG
)
3551 fputs (reg_names
[REGNO (x
)], file
);
3552 else if (x
== CONST0_RTX (GET_MODE (x
)))
3554 else if (GET_CODE (x
) == CONST_INT
)
3555 output_addr_const (file
, x
);
3557 output_operand_lossage ("invalid %%r value");
3564 /* For conditional branches, returns or calls, substitute
3565 sptk, dptk, dpnt, or spnt for %s. */
3566 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
3569 int pred_val
= INTVAL (XEXP (x
, 0));
3571 /* Guess top and bottom 10% statically predicted. */
3572 if (pred_val
< REG_BR_PROB_BASE
/ 50)
3574 else if (pred_val
< REG_BR_PROB_BASE
/ 2)
3576 else if (pred_val
< REG_BR_PROB_BASE
/ 100 * 98)
3581 else if (GET_CODE (current_output_insn
) == CALL_INSN
)
3586 fputs (which
, file
);
3591 x
= current_insn_predicate
;
3594 unsigned int regno
= REGNO (XEXP (x
, 0));
3595 if (GET_CODE (x
) == EQ
)
3597 fprintf (file
, "(%s) ", reg_names
[regno
]);
3602 output_operand_lossage ("ia64_print_operand: unknown code");
3606 switch (GET_CODE (x
))
3608 /* This happens for the spill/restore instructions. */
3613 /* ... fall through ... */
3616 fputs (reg_names
[REGNO (x
)], file
);
3621 rtx addr
= XEXP (x
, 0);
3622 if (GET_RTX_CLASS (GET_CODE (addr
)) == 'a')
3623 addr
= XEXP (addr
, 0);
3624 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
3629 output_addr_const (file
, x
);
3636 /* Calulate the cost of moving data from a register in class FROM to
3637 one in class TO, using MODE. */
3640 ia64_register_move_cost (mode
, from
, to
)
3641 enum machine_mode mode
;
3642 enum reg_class from
, to
;
3644 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
3645 if (to
== ADDL_REGS
)
3647 if (from
== ADDL_REGS
)
3650 /* All costs are symmetric, so reduce cases by putting the
3651 lower number class as the destination. */
3654 enum reg_class tmp
= to
;
3655 to
= from
, from
= tmp
;
3658 /* Moving from FR<->GR in TFmode must be more expensive than 2,
3659 so that we get secondary memory reloads. Between FR_REGS,
3660 we have to make this at least as expensive as MEMORY_MOVE_COST
3661 to avoid spectacularly poor register class preferencing. */
3664 if (to
!= GR_REGS
|| from
!= GR_REGS
)
3665 return MEMORY_MOVE_COST (mode
, to
, 0);
3673 /* Moving between PR registers takes two insns. */
3674 if (from
== PR_REGS
)
3676 /* Moving between PR and anything but GR is impossible. */
3677 if (from
!= GR_REGS
)
3678 return MEMORY_MOVE_COST (mode
, to
, 0);
3682 /* Moving between BR and anything but GR is impossible. */
3683 if (from
!= GR_REGS
&& from
!= GR_AND_BR_REGS
)
3684 return MEMORY_MOVE_COST (mode
, to
, 0);
3689 /* Moving between AR and anything but GR is impossible. */
3690 if (from
!= GR_REGS
)
3691 return MEMORY_MOVE_COST (mode
, to
, 0);
3696 case GR_AND_FR_REGS
:
3697 case GR_AND_BR_REGS
:
3708 /* This function returns the register class required for a secondary
3709 register when copying between one of the registers in CLASS, and X,
3710 using MODE. A return value of NO_REGS means that no secondary register
3714 ia64_secondary_reload_class (class, mode
, x
)
3715 enum reg_class
class;
3716 enum machine_mode mode ATTRIBUTE_UNUSED
;
3721 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
3722 regno
= true_regnum (x
);
3729 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
3730 interaction. We end up with two pseudos with overlapping lifetimes
3731 both of which are equiv to the same constant, and both which need
3732 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
3733 changes depending on the path length, which means the qty_first_reg
3734 check in make_regs_eqv can give different answers at different times.
3735 At some point I'll probably need a reload_indi pattern to handle
3738 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
3739 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
3740 non-general registers for good measure. */
3741 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
))
3744 /* This is needed if a pseudo used as a call_operand gets spilled to a
3746 if (GET_CODE (x
) == MEM
)
3751 /* Need to go through general regsters to get to other class regs. */
3752 if (regno
>= 0 && ! (FR_REGNO_P (regno
) || GENERAL_REGNO_P (regno
)))
3755 /* This can happen when a paradoxical subreg is an operand to the
3757 /* ??? This shouldn't be necessary after instruction scheduling is
3758 enabled, because paradoxical subregs are not accepted by
3759 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3760 stop the paradoxical subreg stupidity in the *_operand functions
3762 if (GET_CODE (x
) == MEM
3763 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
3764 || GET_MODE (x
) == QImode
))
3767 /* This can happen because of the ior/and/etc patterns that accept FP
3768 registers as operands. If the third operand is a constant, then it
3769 needs to be reloaded into a FP register. */
3770 if (GET_CODE (x
) == CONST_INT
)
3773 /* This can happen because of register elimination in a muldi3 insn.
3774 E.g. `26107 * (unsigned long)&u'. */
3775 if (GET_CODE (x
) == PLUS
)
3780 /* ??? This happens if we cse/gcse a BImode value across a call,
3781 and the function has a nonlocal goto. This is because global
3782 does not allocate call crossing pseudos to hard registers when
3783 current_function_has_nonlocal_goto is true. This is relatively
3784 common for C++ programs that use exceptions. To reproduce,
3785 return NO_REGS and compile libstdc++. */
3786 if (GET_CODE (x
) == MEM
)
3789 /* This can happen when we take a BImode subreg of a DImode value,
3790 and that DImode value winds up in some non-GR register. */
3791 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
) && ! PR_REGNO_P (regno
))
3796 /* Since we have no offsettable memory addresses, we need a temporary
3797 to hold the address of the second word. */
3810 /* Emit text to declare externally defined variables and functions, because
3811 the Intel assembler does not support undefined externals. */
3814 ia64_asm_output_external (file
, decl
, name
)
3819 int save_referenced
;
3821 /* GNU as does not need anything here. */
3825 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3826 the linker when we do this, so we need to be careful not to do this for
3827 builtin functions which have no library equivalent. Unfortunately, we
3828 can't tell here whether or not a function will actually be called by
3829 expand_expr, so we pull in library functions even if we may not need
3831 if (! strcmp (name
, "__builtin_next_arg")
3832 || ! strcmp (name
, "alloca")
3833 || ! strcmp (name
, "__builtin_constant_p")
3834 || ! strcmp (name
, "__builtin_args_info"))
3837 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3839 save_referenced
= TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
));
3840 if (TREE_CODE (decl
) == FUNCTION_DECL
)
3842 fprintf (file
, "%s", TYPE_ASM_OP
);
3843 assemble_name (file
, name
);
3845 fprintf (file
, TYPE_OPERAND_FMT
, "function");
3848 ASM_GLOBALIZE_LABEL (file
, name
);
3849 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)) = save_referenced
;
3852 /* Parse the -mfixed-range= option string. */
3855 fix_range (const_str
)
3856 const char *const_str
;
3859 char *str
, *dash
, *comma
;
3861 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3862 REG2 are either register names or register numbers. The effect
3863 of this option is to mark the registers in the range from REG1 to
3864 REG2 as ``fixed'' so they won't be used by the compiler. This is
3865 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3867 i
= strlen (const_str
);
3868 str
= (char *) alloca (i
+ 1);
3869 memcpy (str
, const_str
, i
+ 1);
3873 dash
= strchr (str
, '-');
3876 warning ("value of -mfixed-range must have form REG1-REG2");
3881 comma
= strchr (dash
+ 1, ',');
3885 first
= decode_reg_name (str
);
3888 warning ("unknown register name: %s", str
);
3892 last
= decode_reg_name (dash
+ 1);
3895 warning ("unknown register name: %s", dash
+ 1);
3903 warning ("%s-%s is an empty range", str
, dash
+ 1);
3907 for (i
= first
; i
<= last
; ++i
)
3908 fixed_regs
[i
] = call_used_regs
[i
] = 1;
3918 /* Called to register all of our global variables with the garbage
3922 ia64_add_gc_roots ()
3924 ggc_add_rtx_root (&ia64_compare_op0
, 1);
3925 ggc_add_rtx_root (&ia64_compare_op1
, 1);
3929 ia64_init_machine_status (p
)
3933 (struct machine_function
*) xcalloc (1, sizeof (struct machine_function
));
3937 ia64_mark_machine_status (p
)
3940 struct machine_function
*machine
= p
->machine
;
3944 ggc_mark_rtx (machine
->ia64_eh_epilogue_sp
);
3945 ggc_mark_rtx (machine
->ia64_eh_epilogue_bsp
);
3946 ggc_mark_rtx (machine
->ia64_gp_save
);
3951 ia64_free_machine_status (p
)
3958 /* Handle TARGET_OPTIONS switches. */
3961 ia64_override_options ()
3963 if (TARGET_AUTO_PIC
)
3964 target_flags
|= MASK_CONST_GP
;
3966 if (TARGET_INLINE_DIV_LAT
&& TARGET_INLINE_DIV_THR
)
3968 warning ("cannot optimize division for both latency and throughput");
3969 target_flags
&= ~MASK_INLINE_DIV_THR
;
3972 if (ia64_fixed_range_string
)
3973 fix_range (ia64_fixed_range_string
);
3975 ia64_flag_schedule_insns2
= flag_schedule_insns_after_reload
;
3976 flag_schedule_insns_after_reload
= 0;
3978 ia64_section_threshold
= g_switch_set
? g_switch_value
: IA64_DEFAULT_GVALUE
;
3980 init_machine_status
= ia64_init_machine_status
;
3981 mark_machine_status
= ia64_mark_machine_status
;
3982 free_machine_status
= ia64_free_machine_status
;
3984 ia64_add_gc_roots ();
3987 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0
PARAMS((rtx
));
3988 static enum attr_itanium_class ia64_safe_itanium_class
PARAMS((rtx
));
3989 static enum attr_type ia64_safe_type
PARAMS((rtx
));
3991 static enum attr_itanium_requires_unit0
3992 ia64_safe_itanium_requires_unit0 (insn
)
3995 if (recog_memoized (insn
) >= 0)
3996 return get_attr_itanium_requires_unit0 (insn
);
3998 return ITANIUM_REQUIRES_UNIT0_NO
;
4001 static enum attr_itanium_class
4002 ia64_safe_itanium_class (insn
)
4005 if (recog_memoized (insn
) >= 0)
4006 return get_attr_itanium_class (insn
);
4008 return ITANIUM_CLASS_UNKNOWN
;
4011 static enum attr_type
4012 ia64_safe_type (insn
)
4015 if (recog_memoized (insn
) >= 0)
4016 return get_attr_type (insn
);
4018 return TYPE_UNKNOWN
;
4021 /* The following collection of routines emit instruction group stop bits as
4022 necessary to avoid dependencies. */
4024 /* Need to track some additional registers as far as serialization is
4025 concerned so we can properly handle br.call and br.ret. We could
4026 make these registers visible to gcc, but since these registers are
4027 never explicitly used in gcc generated code, it seems wasteful to
4028 do so (plus it would make the call and return patterns needlessly
4030 #define REG_GP (GR_REG (1))
4031 #define REG_RP (BR_REG (0))
4032 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4033 /* This is used for volatile asms which may require a stop bit immediately
4034 before and after them. */
4035 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4036 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4037 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4039 /* For each register, we keep track of how it has been written in the
4040 current instruction group.
4042 If a register is written unconditionally (no qualifying predicate),
4043 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4045 If a register is written if its qualifying predicate P is true, we
4046 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4047 may be written again by the complement of P (P^1) and when this happens,
4048 WRITE_COUNT gets set to 2.
4050 The result of this is that whenever an insn attempts to write a register
4051 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4053 If a predicate register is written by a floating-point insn, we set
4054 WRITTEN_BY_FP to true.
4056 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4057 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4059 struct reg_write_state
4061 unsigned int write_count
: 2;
4062 unsigned int first_pred
: 16;
4063 unsigned int written_by_fp
: 1;
4064 unsigned int written_by_and
: 1;
4065 unsigned int written_by_or
: 1;
4068 /* Cumulative info for the current instruction group. */
4069 struct reg_write_state rws_sum
[NUM_REGS
];
4070 /* Info for the current instruction. This gets copied to rws_sum after a
4071 stop bit is emitted. */
4072 struct reg_write_state rws_insn
[NUM_REGS
];
4074 /* Indicates whether this is the first instruction after a stop bit,
4075 in which case we don't need another stop bit. Without this, we hit
4076 the abort in ia64_variable_issue when scheduling an alloc. */
4077 static int first_instruction
;
4079 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4080 RTL for one instruction. */
4083 unsigned int is_write
: 1; /* Is register being written? */
4084 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
4085 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
4086 unsigned int is_and
: 1; /* Is register used as part of and.orcm? */
4087 unsigned int is_or
: 1; /* Is register used as part of or.andcm? */
4088 unsigned int is_sibcall
: 1; /* Is this a sibling or normal call? */
4091 static void rws_update
PARAMS ((struct reg_write_state
*, int,
4092 struct reg_flags
, int));
4093 static int rws_access_regno
PARAMS ((int, struct reg_flags
, int));
4094 static int rws_access_reg
PARAMS ((rtx
, struct reg_flags
, int));
4095 static void update_set_flags
PARAMS ((rtx
, struct reg_flags
*, int *, rtx
*));
4096 static int set_src_needs_barrier
PARAMS ((rtx
, struct reg_flags
, int, rtx
));
4097 static int rtx_needs_barrier
PARAMS ((rtx
, struct reg_flags
, int));
4098 static void init_insn_group_barriers
PARAMS ((void));
4099 static int group_barrier_needed_p
PARAMS ((rtx
));
4100 static int safe_group_barrier_needed_p
PARAMS ((rtx
));
4102 /* Update *RWS for REGNO, which is being written by the current instruction,
4103 with predicate PRED, and associated register flags in FLAGS. */
4106 rws_update (rws
, regno
, flags
, pred
)
4107 struct reg_write_state
*rws
;
4109 struct reg_flags flags
;
4113 rws
[regno
].write_count
++;
4115 rws
[regno
].write_count
= 2;
4116 rws
[regno
].written_by_fp
|= flags
.is_fp
;
4117 /* ??? Not tracking and/or across differing predicates. */
4118 rws
[regno
].written_by_and
= flags
.is_and
;
4119 rws
[regno
].written_by_or
= flags
.is_or
;
4120 rws
[regno
].first_pred
= pred
;
4123 /* Handle an access to register REGNO of type FLAGS using predicate register
4124 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4125 a dependency with an earlier instruction in the same group. */
4128 rws_access_regno (regno
, flags
, pred
)
4130 struct reg_flags flags
;
4133 int need_barrier
= 0;
4135 if (regno
>= NUM_REGS
)
4138 if (! PR_REGNO_P (regno
))
4139 flags
.is_and
= flags
.is_or
= 0;
4145 /* One insn writes same reg multiple times? */
4146 if (rws_insn
[regno
].write_count
> 0)
4149 /* Update info for current instruction. */
4150 rws_update (rws_insn
, regno
, flags
, pred
);
4151 write_count
= rws_sum
[regno
].write_count
;
4153 switch (write_count
)
4156 /* The register has not been written yet. */
4157 rws_update (rws_sum
, regno
, flags
, pred
);
4161 /* The register has been written via a predicate. If this is
4162 not a complementary predicate, then we need a barrier. */
4163 /* ??? This assumes that P and P+1 are always complementary
4164 predicates for P even. */
4165 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4167 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4169 else if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
4171 rws_update (rws_sum
, regno
, flags
, pred
);
4175 /* The register has been unconditionally written already. We
4177 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4179 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4183 rws_sum
[regno
].written_by_and
= flags
.is_and
;
4184 rws_sum
[regno
].written_by_or
= flags
.is_or
;
4193 if (flags
.is_branch
)
4195 /* Branches have several RAW exceptions that allow to avoid
4198 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== AR_PFS_REGNUM
)
4199 /* RAW dependencies on branch regs are permissible as long
4200 as the writer is a non-branch instruction. Since we
4201 never generate code that uses a branch register written
4202 by a branch instruction, handling this case is
4206 if (REGNO_REG_CLASS (regno
) == PR_REGS
4207 && ! rws_sum
[regno
].written_by_fp
)
4208 /* The predicates of a branch are available within the
4209 same insn group as long as the predicate was written by
4210 something other than a floating-point instruction. */
4214 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4216 if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4219 switch (rws_sum
[regno
].write_count
)
4222 /* The register has not been written yet. */
4226 /* The register has been written via a predicate. If this is
4227 not a complementary predicate, then we need a barrier. */
4228 /* ??? This assumes that P and P+1 are always complementary
4229 predicates for P even. */
4230 if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
4235 /* The register has been unconditionally written already. We
4245 return need_barrier
;
4249 rws_access_reg (reg
, flags
, pred
)
4251 struct reg_flags flags
;
4254 int regno
= REGNO (reg
);
4255 int n
= HARD_REGNO_NREGS (REGNO (reg
), GET_MODE (reg
));
4258 return rws_access_regno (regno
, flags
, pred
);
4261 int need_barrier
= 0;
4263 need_barrier
|= rws_access_regno (regno
+ n
, flags
, pred
);
4264 return need_barrier
;
4268 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4269 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4272 update_set_flags (x
, pflags
, ppred
, pcond
)
4274 struct reg_flags
*pflags
;
4278 rtx src
= SET_SRC (x
);
4282 switch (GET_CODE (src
))
4288 if (SET_DEST (x
) == pc_rtx
)
4289 /* X is a conditional branch. */
4293 int is_complemented
= 0;
4295 /* X is a conditional move. */
4296 rtx cond
= XEXP (src
, 0);
4297 if (GET_CODE (cond
) == EQ
)
4298 is_complemented
= 1;
4299 cond
= XEXP (cond
, 0);
4300 if (GET_CODE (cond
) != REG
4301 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
4304 if (XEXP (src
, 1) == SET_DEST (x
)
4305 || XEXP (src
, 2) == SET_DEST (x
))
4307 /* X is a conditional move that conditionally writes the
4310 /* We need another complement in this case. */
4311 if (XEXP (src
, 1) == SET_DEST (x
))
4312 is_complemented
= ! is_complemented
;
4314 *ppred
= REGNO (cond
);
4315 if (is_complemented
)
4319 /* ??? If this is a conditional write to the dest, then this
4320 instruction does not actually read one source. This probably
4321 doesn't matter, because that source is also the dest. */
4322 /* ??? Multiple writes to predicate registers are allowed
4323 if they are all AND type compares, or if they are all OR
4324 type compares. We do not generate such instructions
4327 /* ... fall through ... */
4330 if (GET_RTX_CLASS (GET_CODE (src
)) == '<'
4331 && GET_MODE_CLASS (GET_MODE (XEXP (src
, 0))) == MODE_FLOAT
)
4332 /* Set pflags->is_fp to 1 so that we know we're dealing
4333 with a floating point comparison when processing the
4334 destination of the SET. */
4337 /* Discover if this is a parallel comparison. We only handle
4338 and.orcm and or.andcm at present, since we must retain a
4339 strict inverse on the predicate pair. */
4340 else if (GET_CODE (src
) == AND
)
4342 else if (GET_CODE (src
) == IOR
)
4349 /* Subroutine of rtx_needs_barrier; this function determines whether the
4350 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4351 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4355 set_src_needs_barrier (x
, flags
, pred
, cond
)
4357 struct reg_flags flags
;
4361 int need_barrier
= 0;
4363 rtx src
= SET_SRC (x
);
4365 if (GET_CODE (src
) == CALL
)
4366 /* We don't need to worry about the result registers that
4367 get written by subroutine call. */
4368 return rtx_needs_barrier (src
, flags
, pred
);
4369 else if (SET_DEST (x
) == pc_rtx
)
4371 /* X is a conditional branch. */
4372 /* ??? This seems redundant, as the caller sets this bit for
4374 flags
.is_branch
= 1;
4375 return rtx_needs_barrier (src
, flags
, pred
);
4378 need_barrier
= rtx_needs_barrier (src
, flags
, pred
);
4380 /* This instruction unconditionally uses a predicate register. */
4382 need_barrier
|= rws_access_reg (cond
, flags
, 0);
4385 if (GET_CODE (dst
) == ZERO_EXTRACT
)
4387 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
4388 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
4389 dst
= XEXP (dst
, 0);
4391 return need_barrier
;
4394 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4395 Return 1 is this access creates a dependency with an earlier instruction
4396 in the same group. */
4399 rtx_needs_barrier (x
, flags
, pred
)
4401 struct reg_flags flags
;
4405 int is_complemented
= 0;
4406 int need_barrier
= 0;
4407 const char *format_ptr
;
4408 struct reg_flags new_flags
;
4416 switch (GET_CODE (x
))
4419 update_set_flags (x
, &new_flags
, &pred
, &cond
);
4420 need_barrier
= set_src_needs_barrier (x
, new_flags
, pred
, cond
);
4421 if (GET_CODE (SET_SRC (x
)) != CALL
)
4423 new_flags
.is_write
= 1;
4424 need_barrier
|= rtx_needs_barrier (SET_DEST (x
), new_flags
, pred
);
4429 new_flags
.is_write
= 0;
4430 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
4432 /* Avoid multiple register writes, in case this is a pattern with
4433 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4434 if (! flags
.is_sibcall
&& ! rws_insn
[REG_AR_CFM
].write_count
)
4436 new_flags
.is_write
= 1;
4437 need_barrier
|= rws_access_regno (REG_RP
, new_flags
, pred
);
4438 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, new_flags
, pred
);
4439 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4444 /* X is a predicated instruction. */
4446 cond
= COND_EXEC_TEST (x
);
4449 need_barrier
= rtx_needs_barrier (cond
, flags
, 0);
4451 if (GET_CODE (cond
) == EQ
)
4452 is_complemented
= 1;
4453 cond
= XEXP (cond
, 0);
4454 if (GET_CODE (cond
) != REG
4455 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
4457 pred
= REGNO (cond
);
4458 if (is_complemented
)
4461 need_barrier
|= rtx_needs_barrier (COND_EXEC_CODE (x
), flags
, pred
);
4462 return need_barrier
;
4466 /* Clobber & use are for earlier compiler-phases only. */
4471 /* We always emit stop bits for traditional asms. We emit stop bits
4472 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4473 if (GET_CODE (x
) != ASM_OPERANDS
4474 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
4476 /* Avoid writing the register multiple times if we have multiple
4477 asm outputs. This avoids an abort in rws_access_reg. */
4478 if (! rws_insn
[REG_VOLATILE
].write_count
)
4480 new_flags
.is_write
= 1;
4481 rws_access_regno (REG_VOLATILE
, new_flags
, pred
);
4486 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4487 We can not just fall through here since then we would be confused
4488 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4489 traditional asms unlike their normal usage. */
4491 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
4492 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
4497 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
4499 rtx pat
= XVECEXP (x
, 0, i
);
4500 if (GET_CODE (pat
) == SET
)
4502 update_set_flags (pat
, &new_flags
, &pred
, &cond
);
4503 need_barrier
|= set_src_needs_barrier (pat
, new_flags
, pred
, cond
);
4505 else if (GET_CODE (pat
) == USE
4506 || GET_CODE (pat
) == CALL
4507 || GET_CODE (pat
) == ASM_OPERANDS
)
4508 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
4509 else if (GET_CODE (pat
) != CLOBBER
&& GET_CODE (pat
) != RETURN
)
4512 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
4514 rtx pat
= XVECEXP (x
, 0, i
);
4515 if (GET_CODE (pat
) == SET
)
4517 if (GET_CODE (SET_SRC (pat
)) != CALL
)
4519 new_flags
.is_write
= 1;
4520 need_barrier
|= rtx_needs_barrier (SET_DEST (pat
), new_flags
,
4524 else if (GET_CODE (pat
) == CLOBBER
|| GET_CODE (pat
) == RETURN
)
4525 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
4533 if (REGNO (x
) == AR_UNAT_REGNUM
)
4535 for (i
= 0; i
< 64; ++i
)
4536 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ i
, flags
, pred
);
4539 need_barrier
= rws_access_reg (x
, flags
, pred
);
4543 /* Find the regs used in memory address computation. */
4544 new_flags
.is_write
= 0;
4545 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
4548 case CONST_INT
: case CONST_DOUBLE
:
4549 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
4552 /* Operators with side-effects. */
4553 case POST_INC
: case POST_DEC
:
4554 if (GET_CODE (XEXP (x
, 0)) != REG
)
4557 new_flags
.is_write
= 0;
4558 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4559 new_flags
.is_write
= 1;
4560 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4564 if (GET_CODE (XEXP (x
, 0)) != REG
)
4567 new_flags
.is_write
= 0;
4568 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4569 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
4570 new_flags
.is_write
= 1;
4571 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4574 /* Handle common unary and binary ops for efficiency. */
4575 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
4576 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
4577 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
4578 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
4579 case NE
: case EQ
: case GE
: case GT
: case LE
:
4580 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
4581 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
4582 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
4585 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
4586 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
4587 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
4588 case SQRT
: case FFS
:
4589 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
4593 switch (XINT (x
, 1))
4595 case UNSPEC_GR_SPILL
:
4596 case UNSPEC_GR_RESTORE
:
4598 HOST_WIDE_INT offset
= INTVAL (XVECEXP (x
, 0, 1));
4599 HOST_WIDE_INT bit
= (offset
>> 3) & 63;
4601 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4602 new_flags
.is_write
= (XINT (x
, 1) == 1);
4603 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ bit
,
4608 case UNSPEC_FR_SPILL
:
4609 case UNSPEC_FR_RESTORE
:
4611 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4614 case UNSPEC_PRED_REL_MUTEX
:
4615 case UNSPEC_PIC_CALL
:
4617 case UNSPEC_FETCHADD_ACQ
:
4618 case UNSPEC_BSP_VALUE
:
4619 case UNSPEC_FLUSHRS
:
4620 case UNSPEC_BUNDLE_SELECTOR
:
4624 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4627 case UNSPEC_FR_RECIP_APPROX
:
4628 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4629 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
4632 case UNSPEC_CMPXCHG_ACQ
:
4633 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
4634 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 2), flags
, pred
);
4642 case UNSPEC_VOLATILE
:
4643 switch (XINT (x
, 1))
4646 /* Alloc must always be the first instruction of a group.
4647 We force this by always returning true. */
4648 /* ??? We might get better scheduling if we explicitly check for
4649 input/local/output register dependencies, and modify the
4650 scheduler so that alloc is always reordered to the start of
4651 the current group. We could then eliminate all of the
4652 first_instruction code. */
4653 rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
4655 new_flags
.is_write
= 1;
4656 rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4659 case UNSPECV_SET_BSP
:
4663 case UNSPECV_BLOCKAGE
:
4664 case UNSPECV_INSN_GROUP_BARRIER
:
4666 case UNSPECV_PSAC_ALL
:
4667 case UNSPECV_PSAC_NORMAL
:
4676 new_flags
.is_write
= 0;
4677 need_barrier
= rws_access_regno (REG_RP
, flags
, pred
);
4678 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
4680 new_flags
.is_write
= 1;
4681 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
4682 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4686 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
4687 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
4688 switch (format_ptr
[i
])
4690 case '0': /* unused field */
4691 case 'i': /* integer */
4692 case 'n': /* note */
4693 case 'w': /* wide integer */
4694 case 's': /* pointer to string */
4695 case 'S': /* optional pointer to string */
4699 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
4704 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
4705 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
4714 return need_barrier
;
4717 /* Clear out the state for group_barrier_needed_p at the start of a
4718 sequence of insns. */
4721 init_insn_group_barriers ()
4723 memset (rws_sum
, 0, sizeof (rws_sum
));
4724 first_instruction
= 1;
4727 /* Given the current state, recorded by previous calls to this function,
4728 determine whether a group barrier (a stop bit) is necessary before INSN.
4729 Return nonzero if so. */
4732 group_barrier_needed_p (insn
)
4736 int need_barrier
= 0;
4737 struct reg_flags flags
;
4739 memset (&flags
, 0, sizeof (flags
));
4740 switch (GET_CODE (insn
))
4746 /* A barrier doesn't imply an instruction group boundary. */
4750 memset (rws_insn
, 0, sizeof (rws_insn
));
4754 flags
.is_branch
= 1;
4755 flags
.is_sibcall
= SIBLING_CALL_P (insn
);
4756 memset (rws_insn
, 0, sizeof (rws_insn
));
4758 /* Don't bundle a call following another call. */
4759 if ((pat
= prev_active_insn (insn
))
4760 && GET_CODE (pat
) == CALL_INSN
)
4766 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
4770 flags
.is_branch
= 1;
4772 /* Don't bundle a jump following a call. */
4773 if ((pat
= prev_active_insn (insn
))
4774 && GET_CODE (pat
) == CALL_INSN
)
4782 if (GET_CODE (PATTERN (insn
)) == USE
4783 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
4784 /* Don't care about USE and CLOBBER "insns"---those are used to
4785 indicate to the optimizer that it shouldn't get rid of
4786 certain operations. */
4789 pat
= PATTERN (insn
);
4791 /* Ug. Hack hacks hacked elsewhere. */
4792 switch (recog_memoized (insn
))
4794 /* We play dependency tricks with the epilogue in order
4795 to get proper schedules. Undo this for dv analysis. */
4796 case CODE_FOR_epilogue_deallocate_stack
:
4797 case CODE_FOR_prologue_allocate_stack
:
4798 pat
= XVECEXP (pat
, 0, 0);
4801 /* The pattern we use for br.cloop confuses the code above.
4802 The second element of the vector is representative. */
4803 case CODE_FOR_doloop_end_internal
:
4804 pat
= XVECEXP (pat
, 0, 1);
4807 /* Doesn't generate code. */
4808 case CODE_FOR_pred_rel_mutex
:
4809 case CODE_FOR_prologue_use
:
4816 memset (rws_insn
, 0, sizeof (rws_insn
));
4817 need_barrier
= rtx_needs_barrier (pat
, flags
, 0);
4819 /* Check to see if the previous instruction was a volatile
4822 need_barrier
= rws_access_regno (REG_VOLATILE
, flags
, 0);
4829 if (first_instruction
)
4832 first_instruction
= 0;
4835 return need_barrier
;
4838 /* Like group_barrier_needed_p, but do not clobber the current state. */
4841 safe_group_barrier_needed_p (insn
)
4844 struct reg_write_state rws_saved
[NUM_REGS
];
4845 int saved_first_instruction
;
4848 memcpy (rws_saved
, rws_sum
, NUM_REGS
* sizeof *rws_saved
);
4849 saved_first_instruction
= first_instruction
;
4851 t
= group_barrier_needed_p (insn
);
4853 memcpy (rws_sum
, rws_saved
, NUM_REGS
* sizeof *rws_saved
);
4854 first_instruction
= saved_first_instruction
;
4859 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
4860 as necessary to eliminate dependendencies. This function assumes that
4861 a final instruction scheduling pass has been run which has already
4862 inserted most of the necessary stop bits. This function only inserts
4863 new ones at basic block boundaries, since these are invisible to the
4867 emit_insn_group_barriers (dump
, insns
)
4873 int insns_since_last_label
= 0;
4875 init_insn_group_barriers ();
4877 for (insn
= insns
; insn
; insn
= NEXT_INSN (insn
))
4879 if (GET_CODE (insn
) == CODE_LABEL
)
4881 if (insns_since_last_label
)
4883 insns_since_last_label
= 0;
4885 else if (GET_CODE (insn
) == NOTE
4886 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
4888 if (insns_since_last_label
)
4890 insns_since_last_label
= 0;
4892 else if (GET_CODE (insn
) == INSN
4893 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
4894 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
4896 init_insn_group_barriers ();
4899 else if (INSN_P (insn
))
4901 insns_since_last_label
= 1;
4903 if (group_barrier_needed_p (insn
))
4908 fprintf (dump
, "Emitting stop before label %d\n",
4909 INSN_UID (last_label
));
4910 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label
);
4913 init_insn_group_barriers ();
4921 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4922 This function has to emit all necessary group barriers. */
4925 emit_all_insn_group_barriers (dump
, insns
)
4926 FILE *dump ATTRIBUTE_UNUSED
;
4931 init_insn_group_barriers ();
4933 for (insn
= insns
; insn
; insn
= NEXT_INSN (insn
))
4935 if (GET_CODE (insn
) == BARRIER
)
4937 rtx last
= prev_active_insn (insn
);
4941 if (GET_CODE (last
) == JUMP_INSN
4942 && GET_CODE (PATTERN (last
)) == ADDR_DIFF_VEC
)
4943 last
= prev_active_insn (last
);
4944 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
4945 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
4947 init_insn_group_barriers ();
4949 else if (INSN_P (insn
))
4951 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
4952 init_insn_group_barriers ();
4953 else if (group_barrier_needed_p (insn
))
4955 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
4956 init_insn_group_barriers ();
4957 group_barrier_needed_p (insn
);
4963 static int errata_find_address_regs
PARAMS ((rtx
*, void *));
4964 static void errata_emit_nops
PARAMS ((rtx
));
4965 static void fixup_errata
PARAMS ((void));
4967 /* This structure is used to track some details about the previous insns
4968 groups so we can determine if it may be necessary to insert NOPs to
4969 workaround hardware errata. */
4972 HARD_REG_SET p_reg_set
;
4973 HARD_REG_SET gr_reg_conditionally_set
;
4976 /* Index into the last_group array. */
4977 static int group_idx
;
4979 /* Called through for_each_rtx; determines if a hard register that was
4980 conditionally set in the previous group is used as an address register.
4981 It ensures that for_each_rtx returns 1 in that case. */
4983 errata_find_address_regs (xp
, data
)
4985 void *data ATTRIBUTE_UNUSED
;
4988 if (GET_CODE (x
) != MEM
)
4991 if (GET_CODE (x
) == POST_MODIFY
)
4993 if (GET_CODE (x
) == REG
)
4995 struct group
*prev_group
= last_group
+ (group_idx
^ 1);
4996 if (TEST_HARD_REG_BIT (prev_group
->gr_reg_conditionally_set
,
5004 /* Called for each insn; this function keeps track of the state in
5005 last_group and emits additional NOPs if necessary to work around
5006 an Itanium A/B step erratum. */
5008 errata_emit_nops (insn
)
5011 struct group
*this_group
= last_group
+ group_idx
;
5012 struct group
*prev_group
= last_group
+ (group_idx
^ 1);
5013 rtx pat
= PATTERN (insn
);
5014 rtx cond
= GET_CODE (pat
) == COND_EXEC
? COND_EXEC_TEST (pat
) : 0;
5015 rtx real_pat
= cond
? COND_EXEC_CODE (pat
) : pat
;
5016 enum attr_type type
;
5019 if (GET_CODE (real_pat
) == USE
5020 || GET_CODE (real_pat
) == CLOBBER
5021 || GET_CODE (real_pat
) == ASM_INPUT
5022 || GET_CODE (real_pat
) == ADDR_VEC
5023 || GET_CODE (real_pat
) == ADDR_DIFF_VEC
5024 || asm_noperands (PATTERN (insn
)) >= 0)
5027 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5030 if (GET_CODE (set
) == PARALLEL
)
5033 set
= XVECEXP (real_pat
, 0, 0);
5034 for (i
= 1; i
< XVECLEN (real_pat
, 0); i
++)
5035 if (GET_CODE (XVECEXP (real_pat
, 0, i
)) != USE
5036 && GET_CODE (XVECEXP (real_pat
, 0, i
)) != CLOBBER
)
5043 if (set
&& GET_CODE (set
) != SET
)
5046 type
= get_attr_type (insn
);
5049 && set
&& REG_P (SET_DEST (set
)) && PR_REGNO_P (REGNO (SET_DEST (set
))))
5050 SET_HARD_REG_BIT (this_group
->p_reg_set
, REGNO (SET_DEST (set
)));
5052 if ((type
== TYPE_M
|| type
== TYPE_A
) && cond
&& set
5053 && REG_P (SET_DEST (set
))
5054 && GET_CODE (SET_SRC (set
)) != PLUS
5055 && GET_CODE (SET_SRC (set
)) != MINUS
5056 && (GET_CODE (SET_SRC (set
)) != ASHIFT
5057 || !shladd_operand (XEXP (SET_SRC (set
), 1), VOIDmode
))
5058 && (GET_CODE (SET_SRC (set
)) != MEM
5059 || GET_CODE (XEXP (SET_SRC (set
), 0)) != POST_MODIFY
)
5060 && GENERAL_REGNO_P (REGNO (SET_DEST (set
))))
5062 if (GET_RTX_CLASS (GET_CODE (cond
)) != '<'
5063 || ! REG_P (XEXP (cond
, 0)))
5066 if (TEST_HARD_REG_BIT (prev_group
->p_reg_set
, REGNO (XEXP (cond
, 0))))
5067 SET_HARD_REG_BIT (this_group
->gr_reg_conditionally_set
, REGNO (SET_DEST (set
)));
5069 if (for_each_rtx (&real_pat
, errata_find_address_regs
, NULL
))
5071 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5072 emit_insn_before (gen_nop (), insn
);
5073 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5075 memset (last_group
, 0, sizeof last_group
);
5079 /* Emit extra nops if they are required to work around hardware errata. */
5086 if (! TARGET_B_STEP
)
5090 memset (last_group
, 0, sizeof last_group
);
5092 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
5097 if (ia64_safe_type (insn
) == TYPE_S
)
5100 memset (last_group
+ group_idx
, 0, sizeof last_group
[group_idx
]);
5103 errata_emit_nops (insn
);
5107 /* Instruction scheduling support. */
5108 /* Describe one bundle. */
5112 /* Zero if there's no possibility of a stop in this bundle other than
5113 at the end, otherwise the position of the optional stop bit. */
5115 /* The types of the three slots. */
5116 enum attr_type t
[3];
5117 /* The pseudo op to be emitted into the assembler output. */
5121 #define NR_BUNDLES 10
5123 /* A list of all available bundles. */
5125 static const struct bundle bundle
[NR_BUNDLES
] =
5127 { 2, { TYPE_M
, TYPE_I
, TYPE_I
}, ".mii" },
5128 { 1, { TYPE_M
, TYPE_M
, TYPE_I
}, ".mmi" },
5129 { 0, { TYPE_M
, TYPE_F
, TYPE_I
}, ".mfi" },
5130 { 0, { TYPE_M
, TYPE_M
, TYPE_F
}, ".mmf" },
5131 #if NR_BUNDLES == 10
5132 { 0, { TYPE_B
, TYPE_B
, TYPE_B
}, ".bbb" },
5133 { 0, { TYPE_M
, TYPE_B
, TYPE_B
}, ".mbb" },
5135 { 0, { TYPE_M
, TYPE_I
, TYPE_B
}, ".mib" },
5136 { 0, { TYPE_M
, TYPE_M
, TYPE_B
}, ".mmb" },
5137 { 0, { TYPE_M
, TYPE_F
, TYPE_B
}, ".mfb" },
5138 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
5139 it matches an L type insn. Otherwise we'll try to generate L type
5141 { 0, { TYPE_M
, TYPE_L
, TYPE_X
}, ".mlx" }
5144 /* Describe a packet of instructions. Packets consist of two bundles that
5145 are visible to the hardware in one scheduling window. */
5149 const struct bundle
*t1
, *t2
;
5150 /* Precomputed value of the first split issue in this packet if a cycle
5151 starts at its beginning. */
5153 /* For convenience, the insn types are replicated here so we don't have
5154 to go through T1 and T2 all the time. */
5155 enum attr_type t
[6];
5158 /* An array containing all possible packets. */
5159 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
5160 static struct ia64_packet packets
[NR_PACKETS
];
5162 /* Map attr_type to a string with the name. */
5164 static const char *const type_names
[] =
5166 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
5169 /* Nonzero if we should insert stop bits into the schedule. */
5170 int ia64_final_schedule
= 0;
5172 static int itanium_split_issue
PARAMS ((const struct ia64_packet
*, int));
5173 static rtx ia64_single_set
PARAMS ((rtx
));
5174 static int insn_matches_slot
PARAMS ((const struct ia64_packet
*, enum attr_type
, int, rtx
));
5175 static void ia64_emit_insn_before
PARAMS ((rtx
, rtx
));
5176 static void maybe_rotate
PARAMS ((FILE *));
5177 static void finish_last_head
PARAMS ((FILE *, int));
5178 static void rotate_one_bundle
PARAMS ((FILE *));
5179 static void rotate_two_bundles
PARAMS ((FILE *));
5180 static void nop_cycles_until
PARAMS ((int, FILE *));
5181 static void cycle_end_fill_slots
PARAMS ((FILE *));
5182 static int packet_matches_p
PARAMS ((const struct ia64_packet
*, int, int *));
5183 static int get_split
PARAMS ((const struct ia64_packet
*, int));
5184 static int find_best_insn
PARAMS ((rtx
*, enum attr_type
*, int,
5185 const struct ia64_packet
*, int));
5186 static void find_best_packet
PARAMS ((int *, const struct ia64_packet
**,
5187 rtx
*, enum attr_type
*, int));
5188 static int itanium_reorder
PARAMS ((FILE *, rtx
*, rtx
*, int));
5189 static void dump_current_packet
PARAMS ((FILE *));
5190 static void schedule_stop
PARAMS ((FILE *));
5191 static rtx gen_nop_type
PARAMS ((enum attr_type
));
5192 static void ia64_emit_nops
PARAMS ((void));
5194 /* Map a bundle number to its pseudo-op. */
5200 return bundle
[b
].name
;
5203 /* Compute the slot which will cause a split issue in packet P if the
5204 current cycle begins at slot BEGIN. */
5207 itanium_split_issue (p
, begin
)
5208 const struct ia64_packet
*p
;
5211 int type_count
[TYPE_S
];
5217 /* Always split before and after MMF. */
5218 if (p
->t
[0] == TYPE_M
&& p
->t
[1] == TYPE_M
&& p
->t
[2] == TYPE_F
)
5220 if (p
->t
[3] == TYPE_M
&& p
->t
[4] == TYPE_M
&& p
->t
[5] == TYPE_F
)
5222 /* Always split after MBB and BBB. */
5223 if (p
->t
[1] == TYPE_B
)
5225 /* Split after first bundle in MIB BBB combination. */
5226 if (p
->t
[2] == TYPE_B
&& p
->t
[3] == TYPE_B
)
5230 memset (type_count
, 0, sizeof type_count
);
5231 for (i
= begin
; i
< split
; i
++)
5233 enum attr_type t0
= p
->t
[i
];
5234 /* An MLX bundle reserves the same units as an MFI bundle. */
5235 enum attr_type t
= (t0
== TYPE_L
? TYPE_F
5236 : t0
== TYPE_X
? TYPE_I
5239 /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and
5240 2 integer per cycle. */
5241 int max
= (t
== TYPE_B
? 3 : 2);
5242 if (type_count
[t
] == max
)
5250 /* Return the maximum number of instructions a cpu can issue. */
5258 /* Helper function - like single_set, but look inside COND_EXEC. */
5261 ia64_single_set (insn
)
5264 rtx x
= PATTERN (insn
), ret
;
5265 if (GET_CODE (x
) == COND_EXEC
)
5266 x
= COND_EXEC_CODE (x
);
5267 if (GET_CODE (x
) == SET
)
5270 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5271 Although they are not classical single set, the second set is there just
5272 to protect it from moving past FP-relative stack accesses. */
5273 switch (recog_memoized (insn
))
5275 case CODE_FOR_prologue_allocate_stack
:
5276 case CODE_FOR_epilogue_deallocate_stack
:
5277 ret
= XVECEXP (x
, 0, 0);
5281 ret
= single_set_2 (insn
, x
);
5288 /* Adjust the cost of a scheduling dependency. Return the new cost of
5289 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5292 ia64_adjust_cost (insn
, link
, dep_insn
, cost
)
5293 rtx insn
, link
, dep_insn
;
5296 enum attr_type dep_type
;
5297 enum attr_itanium_class dep_class
;
5298 enum attr_itanium_class insn_class
;
5299 rtx dep_set
, set
, src
, addr
;
5301 if (GET_CODE (PATTERN (insn
)) == CLOBBER
5302 || GET_CODE (PATTERN (insn
)) == USE
5303 || GET_CODE (PATTERN (dep_insn
)) == CLOBBER
5304 || GET_CODE (PATTERN (dep_insn
)) == USE
5305 /* @@@ Not accurate for indirect calls. */
5306 || GET_CODE (insn
) == CALL_INSN
5307 || ia64_safe_type (insn
) == TYPE_S
)
5310 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
5311 || REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
5314 dep_type
= ia64_safe_type (dep_insn
);
5315 dep_class
= ia64_safe_itanium_class (dep_insn
);
5316 insn_class
= ia64_safe_itanium_class (insn
);
5318 /* Compares that feed a conditional branch can execute in the same
5320 dep_set
= ia64_single_set (dep_insn
);
5321 set
= ia64_single_set (insn
);
5323 if (dep_type
!= TYPE_F
5325 && GET_CODE (SET_DEST (dep_set
)) == REG
5326 && PR_REG (REGNO (SET_DEST (dep_set
)))
5327 && GET_CODE (insn
) == JUMP_INSN
)
5330 if (dep_set
&& GET_CODE (SET_DEST (dep_set
)) == MEM
)
5332 /* ??? Can't find any information in the documenation about whether
5336 splits issue. Assume it doesn't. */
5340 src
= set
? SET_SRC (set
) : 0;
5344 if (GET_CODE (SET_DEST (set
)) == MEM
)
5345 addr
= XEXP (SET_DEST (set
), 0);
5346 else if (GET_CODE (SET_DEST (set
)) == SUBREG
5347 && GET_CODE (SUBREG_REG (SET_DEST (set
))) == MEM
)
5348 addr
= XEXP (SUBREG_REG (SET_DEST (set
)), 0);
5352 if (GET_CODE (addr
) == UNSPEC
&& XVECLEN (addr
, 0) > 0)
5353 addr
= XVECEXP (addr
, 0, 0);
5354 while (GET_CODE (addr
) == SUBREG
|| GET_CODE (addr
) == ZERO_EXTEND
)
5355 addr
= XEXP (addr
, 0);
5356 if (GET_CODE (addr
) == MEM
)
5357 addr
= XEXP (addr
, 0);
5363 if (addr
&& GET_CODE (addr
) == POST_MODIFY
)
5364 addr
= XEXP (addr
, 0);
5366 set
= ia64_single_set (dep_insn
);
5368 if ((dep_class
== ITANIUM_CLASS_IALU
5369 || dep_class
== ITANIUM_CLASS_ILOG
5370 || dep_class
== ITANIUM_CLASS_LD
)
5371 && (insn_class
== ITANIUM_CLASS_LD
5372 || insn_class
== ITANIUM_CLASS_ST
))
5374 if (! addr
|| ! set
)
5376 /* This isn't completely correct - an IALU that feeds an address has
5377 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5378 otherwise. Unfortunately there's no good way to describe this. */
5379 if (reg_overlap_mentioned_p (SET_DEST (set
), addr
))
5383 if ((dep_class
== ITANIUM_CLASS_IALU
5384 || dep_class
== ITANIUM_CLASS_ILOG
5385 || dep_class
== ITANIUM_CLASS_LD
)
5386 && (insn_class
== ITANIUM_CLASS_MMMUL
5387 || insn_class
== ITANIUM_CLASS_MMSHF
5388 || insn_class
== ITANIUM_CLASS_MMSHFI
))
5391 if (dep_class
== ITANIUM_CLASS_FMAC
5392 && (insn_class
== ITANIUM_CLASS_FMISC
5393 || insn_class
== ITANIUM_CLASS_FCVTFX
5394 || insn_class
== ITANIUM_CLASS_XMPY
))
5397 if ((dep_class
== ITANIUM_CLASS_FMAC
5398 || dep_class
== ITANIUM_CLASS_FMISC
5399 || dep_class
== ITANIUM_CLASS_FCVTFX
5400 || dep_class
== ITANIUM_CLASS_XMPY
)
5401 && insn_class
== ITANIUM_CLASS_STF
)
5404 /* Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4,
5405 but HP engineers say any non-MM operation. */
5406 if ((dep_class
== ITANIUM_CLASS_MMMUL
5407 || dep_class
== ITANIUM_CLASS_MMSHF
5408 || dep_class
== ITANIUM_CLASS_MMSHFI
)
5409 && insn_class
!= ITANIUM_CLASS_MMMUL
5410 && insn_class
!= ITANIUM_CLASS_MMSHF
5411 && insn_class
!= ITANIUM_CLASS_MMSHFI
)
5417 /* Describe the current state of the Itanium pipeline. */
5420 /* The first slot that is used in the current cycle. */
5422 /* The next slot to fill. */
5424 /* The packet we have selected for the current issue window. */
5425 const struct ia64_packet
*packet
;
5426 /* The position of the split issue that occurs due to issue width
5427 limitations (6 if there's no split issue). */
5429 /* Record data about the insns scheduled so far in the same issue
5430 window. The elements up to but not including FIRST_SLOT belong
5431 to the previous cycle, the ones starting with FIRST_SLOT belong
5432 to the current cycle. */
5433 enum attr_type types
[6];
5436 /* Nonzero if we decided to schedule a stop bit. */
5440 /* Temporary arrays; they have enough elements to hold all insns that
5441 can be ready at the same time while scheduling of the current block.
5442 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5443 static rtx
*sched_ready
;
5444 static enum attr_type
*sched_types
;
5446 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5450 insn_matches_slot (p
, itype
, slot
, insn
)
5451 const struct ia64_packet
*p
;
5452 enum attr_type itype
;
5456 enum attr_itanium_requires_unit0 u0
;
5457 enum attr_type stype
= p
->t
[slot
];
5461 u0
= ia64_safe_itanium_requires_unit0 (insn
);
5462 if (u0
== ITANIUM_REQUIRES_UNIT0_YES
)
5465 for (i
= sched_data
.first_slot
; i
< slot
; i
++)
5466 if (p
->t
[i
] == stype
5467 || (stype
== TYPE_F
&& p
->t
[i
] == TYPE_L
)
5468 || (stype
== TYPE_I
&& p
->t
[i
] == TYPE_X
))
5471 if (GET_CODE (insn
) == CALL_INSN
)
5473 /* Reject calls in multiway branch packets. We want to limit
5474 the number of multiway branches we generate (since the branch
5475 predictor is limited), and this seems to work fairly well.
5476 (If we didn't do this, we'd have to add another test here to
5477 force calls into the third slot of the bundle.) */
5480 if (p
->t
[1] == TYPE_B
)
5485 if (p
->t
[4] == TYPE_B
)
5493 if (itype
== TYPE_A
)
5494 return stype
== TYPE_M
|| stype
== TYPE_I
;
5498 /* Like emit_insn_before, but skip cycle_display notes.
5499 ??? When cycle display notes are implemented, update this. */
5502 ia64_emit_insn_before (insn
, before
)
5505 emit_insn_before (insn
, before
);
5508 /* When rotating a bundle out of the issue window, insert a bundle selector
5509 insn in front of it. DUMP is the scheduling dump file or NULL. START
5510 is either 0 or 3, depending on whether we want to emit a bundle selector
5511 for the first bundle or the second bundle in the current issue window.
5513 The selector insns are emitted this late because the selected packet can
5514 be changed until parts of it get rotated out. */
5517 finish_last_head (dump
, start
)
5521 const struct ia64_packet
*p
= sched_data
.packet
;
5522 const struct bundle
*b
= start
== 0 ? p
->t1
: p
->t2
;
5523 int bundle_type
= b
- bundle
;
5527 if (! ia64_final_schedule
)
5530 for (i
= start
; sched_data
.insns
[i
] == 0; i
++)
5533 insn
= sched_data
.insns
[i
];
5536 fprintf (dump
, "// Emitting template before %d: %s\n",
5537 INSN_UID (insn
), b
->name
);
5539 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type
)), insn
);
5542 /* We can't schedule more insns this cycle. Fix up the scheduling state
5543 and advance FIRST_SLOT and CUR.
5544 We have to distribute the insns that are currently found between
5545 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5546 far, they are stored successively in the fields starting at FIRST_SLOT;
5547 now they must be moved to the correct slots.
5548 DUMP is the current scheduling dump file, or NULL. */
5551 cycle_end_fill_slots (dump
)
5554 const struct ia64_packet
*packet
= sched_data
.packet
;
5556 enum attr_type tmp_types
[6];
5559 memcpy (tmp_types
, sched_data
.types
, 6 * sizeof (enum attr_type
));
5560 memcpy (tmp_insns
, sched_data
.insns
, 6 * sizeof (rtx
));
5562 for (i
= slot
= sched_data
.first_slot
; i
< sched_data
.cur
; i
++)
5564 enum attr_type t
= tmp_types
[i
];
5565 if (t
!= ia64_safe_type (tmp_insns
[i
]))
5567 while (! insn_matches_slot (packet
, t
, slot
, tmp_insns
[i
]))
5569 if (slot
> sched_data
.split
)
5572 fprintf (dump
, "// Packet needs %s, have %s\n",
5573 type_names
[packet
->t
[slot
]], type_names
[t
]);
5574 sched_data
.types
[slot
] = packet
->t
[slot
];
5575 sched_data
.insns
[slot
] = 0;
5576 sched_data
.stopbit
[slot
] = 0;
5578 /* ??? TYPE_L instructions always fill up two slots, but we don't
5579 support TYPE_L nops. */
5580 if (packet
->t
[slot
] == TYPE_L
)
5586 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5587 actual slot type later. */
5588 sched_data
.types
[slot
] = packet
->t
[slot
];
5589 sched_data
.insns
[slot
] = tmp_insns
[i
];
5590 sched_data
.stopbit
[slot
] = 0;
5593 /* TYPE_L instructions always fill up two slots. */
5596 sched_data
.types
[slot
] = packet
->t
[slot
];
5597 sched_data
.insns
[slot
] = 0;
5598 sched_data
.stopbit
[slot
] = 0;
5603 /* This isn't right - there's no need to pad out until the forced split;
5604 the CPU will automatically split if an insn isn't ready. */
5606 while (slot
< sched_data
.split
)
5608 sched_data
.types
[slot
] = packet
->t
[slot
];
5609 sched_data
.insns
[slot
] = 0;
5610 sched_data
.stopbit
[slot
] = 0;
5615 sched_data
.first_slot
= sched_data
.cur
= slot
;
5618 /* Bundle rotations, as described in the Itanium optimization manual.
5619 We can rotate either one or both bundles out of the issue window.
5620 DUMP is the current scheduling dump file, or NULL. */
5623 rotate_one_bundle (dump
)
5627 fprintf (dump
, "// Rotating one bundle.\n");
5629 finish_last_head (dump
, 0);
5630 if (sched_data
.cur
> 3)
5632 sched_data
.cur
-= 3;
5633 sched_data
.first_slot
-= 3;
5634 memmove (sched_data
.types
,
5635 sched_data
.types
+ 3,
5636 sched_data
.cur
* sizeof *sched_data
.types
);
5637 memmove (sched_data
.stopbit
,
5638 sched_data
.stopbit
+ 3,
5639 sched_data
.cur
* sizeof *sched_data
.stopbit
);
5640 memmove (sched_data
.insns
,
5641 sched_data
.insns
+ 3,
5642 sched_data
.cur
* sizeof *sched_data
.insns
);
5644 = &packets
[(sched_data
.packet
->t2
- bundle
) * NR_BUNDLES
];
5649 sched_data
.first_slot
= 0;
5654 rotate_two_bundles (dump
)
5658 fprintf (dump
, "// Rotating two bundles.\n");
5660 if (sched_data
.cur
== 0)
5663 finish_last_head (dump
, 0);
5664 if (sched_data
.cur
> 3)
5665 finish_last_head (dump
, 3);
5667 sched_data
.first_slot
= 0;
5670 /* We're beginning a new block. Initialize data structures as necessary. */
5673 ia64_sched_init (dump
, sched_verbose
, max_ready
)
5674 FILE *dump ATTRIBUTE_UNUSED
;
5675 int sched_verbose ATTRIBUTE_UNUSED
;
5678 static int initialized
= 0;
5686 for (i
= b1
= 0; b1
< NR_BUNDLES
; b1
++)
5688 const struct bundle
*t1
= bundle
+ b1
;
5689 for (b2
= 0; b2
< NR_BUNDLES
; b2
++, i
++)
5691 const struct bundle
*t2
= bundle
+ b2
;
5697 for (i
= 0; i
< NR_PACKETS
; i
++)
5700 for (j
= 0; j
< 3; j
++)
5701 packets
[i
].t
[j
] = packets
[i
].t1
->t
[j
];
5702 for (j
= 0; j
< 3; j
++)
5703 packets
[i
].t
[j
+ 3] = packets
[i
].t2
->t
[j
];
5704 packets
[i
].first_split
= itanium_split_issue (packets
+ i
, 0);
5709 init_insn_group_barriers ();
5711 memset (&sched_data
, 0, sizeof sched_data
);
5712 sched_types
= (enum attr_type
*) xmalloc (max_ready
5713 * sizeof (enum attr_type
));
5714 sched_ready
= (rtx
*) xmalloc (max_ready
* sizeof (rtx
));
5717 /* See if the packet P can match the insns we have already scheduled. Return
5718 nonzero if so. In *PSLOT, we store the first slot that is available for
5719 more instructions if we choose this packet.
5720 SPLIT holds the last slot we can use, there's a split issue after it so
5721 scheduling beyond it would cause us to use more than one cycle. */
5724 packet_matches_p (p
, split
, pslot
)
5725 const struct ia64_packet
*p
;
5729 int filled
= sched_data
.cur
;
5730 int first
= sched_data
.first_slot
;
5733 /* First, check if the first of the two bundles must be a specific one (due
5735 if (first
> 0 && sched_data
.stopbit
[0] && p
->t1
->possible_stop
!= 1)
5737 if (first
> 1 && sched_data
.stopbit
[1] && p
->t1
->possible_stop
!= 2)
5740 for (i
= 0; i
< first
; i
++)
5741 if (! insn_matches_slot (p
, sched_data
.types
[i
], i
,
5742 sched_data
.insns
[i
]))
5744 for (i
= slot
= first
; i
< filled
; i
++)
5746 while (slot
< split
)
5748 if (insn_matches_slot (p
, sched_data
.types
[i
], slot
,
5749 sched_data
.insns
[i
]))
5763 /* A frontend for itanium_split_issue. For a packet P and a slot
5764 number FIRST that describes the start of the current clock cycle,
5765 return the slot number of the first split issue. This function
5766 uses the cached number found in P if possible. */
5769 get_split (p
, first
)
5770 const struct ia64_packet
*p
;
5774 return p
->first_split
;
5775 return itanium_split_issue (p
, first
);
5778 /* Given N_READY insns in the array READY, whose types are found in the
5779 corresponding array TYPES, return the insn that is best suited to be
5780 scheduled in slot SLOT of packet P. */
5783 find_best_insn (ready
, types
, n_ready
, p
, slot
)
5785 enum attr_type
*types
;
5787 const struct ia64_packet
*p
;
5792 while (n_ready
-- > 0)
5794 rtx insn
= ready
[n_ready
];
5797 if (best
>= 0 && INSN_PRIORITY (ready
[n_ready
]) < best_pri
)
5799 /* If we have equally good insns, one of which has a stricter
5800 slot requirement, prefer the one with the stricter requirement. */
5801 if (best
>= 0 && types
[n_ready
] == TYPE_A
)
5803 if (insn_matches_slot (p
, types
[n_ready
], slot
, insn
))
5806 best_pri
= INSN_PRIORITY (ready
[best
]);
5808 /* If there's no way we could get a stricter requirement, stop
5810 if (types
[n_ready
] != TYPE_A
5811 && ia64_safe_itanium_requires_unit0 (ready
[n_ready
]))
5819 /* Select the best packet to use given the current scheduler state and the
5821 READY is an array holding N_READY ready insns; TYPES is a corresponding
5822 array that holds their types. Store the best packet in *PPACKET and the
5823 number of insns that can be scheduled in the current cycle in *PBEST. */
5826 find_best_packet (pbest
, ppacket
, ready
, types
, n_ready
)
5828 const struct ia64_packet
**ppacket
;
5830 enum attr_type
*types
;
5833 int first
= sched_data
.first_slot
;
5836 const struct ia64_packet
*best_packet
= NULL
;
5839 for (i
= 0; i
< NR_PACKETS
; i
++)
5841 const struct ia64_packet
*p
= packets
+ i
;
5843 int split
= get_split (p
, first
);
5845 int first_slot
, last_slot
;
5848 if (! packet_matches_p (p
, split
, &first_slot
))
5851 memcpy (sched_ready
, ready
, n_ready
* sizeof (rtx
));
5855 for (slot
= first_slot
; slot
< split
; slot
++)
5859 /* Disallow a degenerate case where the first bundle doesn't
5860 contain anything but NOPs! */
5861 if (first_slot
== 0 && win
== 0 && slot
== 3)
5867 insn_nr
= find_best_insn (sched_ready
, types
, n_ready
, p
, slot
);
5870 sched_ready
[insn_nr
] = 0;
5874 else if (p
->t
[slot
] == TYPE_B
)
5877 /* We must disallow MBB/BBB packets if any of their B slots would be
5878 filled with nops. */
5881 if (p
->t
[1] == TYPE_B
&& (b_nops
|| last_slot
< 2))
5886 if (p
->t
[4] == TYPE_B
&& (b_nops
|| last_slot
< 5))
5891 || (win
== best
&& last_slot
< lowest_end
))
5894 lowest_end
= last_slot
;
5899 *ppacket
= best_packet
;
5902 /* Reorder the ready list so that the insns that can be issued in this cycle
5903 are found in the correct order at the end of the list.
5904 DUMP is the scheduling dump file, or NULL. READY points to the start,
5905 E_READY to the end of the ready list. MAY_FAIL determines what should be
5906 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5907 otherwise we return 0.
5908 Return 1 if any insns can be scheduled in this cycle. */
5911 itanium_reorder (dump
, ready
, e_ready
, may_fail
)
5917 const struct ia64_packet
*best_packet
;
5918 int n_ready
= e_ready
- ready
;
5919 int first
= sched_data
.first_slot
;
5920 int i
, best
, best_split
, filled
;
5922 for (i
= 0; i
< n_ready
; i
++)
5923 sched_types
[i
] = ia64_safe_type (ready
[i
]);
5925 find_best_packet (&best
, &best_packet
, ready
, sched_types
, n_ready
);
5936 fprintf (dump
, "// Selected bundles: %s %s (%d insns)\n",
5937 best_packet
->t1
->name
,
5938 best_packet
->t2
? best_packet
->t2
->name
: NULL
, best
);
5941 best_split
= itanium_split_issue (best_packet
, first
);
5942 packet_matches_p (best_packet
, best_split
, &filled
);
5944 for (i
= filled
; i
< best_split
; i
++)
5948 insn_nr
= find_best_insn (ready
, sched_types
, n_ready
, best_packet
, i
);
5951 rtx insn
= ready
[insn_nr
];
5952 memmove (ready
+ insn_nr
, ready
+ insn_nr
+ 1,
5953 (n_ready
- insn_nr
- 1) * sizeof (rtx
));
5954 memmove (sched_types
+ insn_nr
, sched_types
+ insn_nr
+ 1,
5955 (n_ready
- insn_nr
- 1) * sizeof (enum attr_type
));
5956 ready
[--n_ready
] = insn
;
5960 sched_data
.packet
= best_packet
;
5961 sched_data
.split
= best_split
;
5965 /* Dump information about the current scheduling state to file DUMP. */
5968 dump_current_packet (dump
)
5972 fprintf (dump
, "// %d slots filled:", sched_data
.cur
);
5973 for (i
= 0; i
< sched_data
.first_slot
; i
++)
5975 rtx insn
= sched_data
.insns
[i
];
5976 fprintf (dump
, " %s", type_names
[sched_data
.types
[i
]]);
5978 fprintf (dump
, "/%s", type_names
[ia64_safe_type (insn
)]);
5979 if (sched_data
.stopbit
[i
])
5980 fprintf (dump
, " ;;");
5982 fprintf (dump
, " :::");
5983 for (i
= sched_data
.first_slot
; i
< sched_data
.cur
; i
++)
5985 rtx insn
= sched_data
.insns
[i
];
5986 enum attr_type t
= ia64_safe_type (insn
);
5987 fprintf (dump
, " (%d) %s", INSN_UID (insn
), type_names
[t
]);
5989 fprintf (dump
, "\n");
5992 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
5996 schedule_stop (dump
)
5999 const struct ia64_packet
*best
= sched_data
.packet
;
6004 fprintf (dump
, "// Stop bit, cur = %d.\n", sched_data
.cur
);
6006 if (sched_data
.cur
== 0)
6009 fprintf (dump
, "// At start of bundle, so nothing to do.\n");
6011 rotate_two_bundles (NULL
);
6015 for (i
= -1; i
< NR_PACKETS
; i
++)
6017 /* This is a slight hack to give the current packet the first chance.
6018 This is done to avoid e.g. switching from MIB to MBB bundles. */
6019 const struct ia64_packet
*p
= (i
>= 0 ? packets
+ i
: sched_data
.packet
);
6020 int split
= get_split (p
, sched_data
.first_slot
);
6021 const struct bundle
*compare
;
6024 if (! packet_matches_p (p
, split
, &next
))
6027 compare
= next
> 3 ? p
->t2
: p
->t1
;
6030 if (compare
->possible_stop
)
6031 stoppos
= compare
->possible_stop
;
6035 if (stoppos
< next
|| stoppos
>= best_stop
)
6037 if (compare
->possible_stop
== 0)
6039 stoppos
= (next
> 3 ? 6 : 3);
6041 if (stoppos
< next
|| stoppos
>= best_stop
)
6045 fprintf (dump
, "// switching from %s %s to %s %s (stop at %d)\n",
6046 best
->t1
->name
, best
->t2
->name
, p
->t1
->name
, p
->t2
->name
,
6049 best_stop
= stoppos
;
6053 sched_data
.packet
= best
;
6054 cycle_end_fill_slots (dump
);
6055 while (sched_data
.cur
< best_stop
)
6057 sched_data
.types
[sched_data
.cur
] = best
->t
[sched_data
.cur
];
6058 sched_data
.insns
[sched_data
.cur
] = 0;
6059 sched_data
.stopbit
[sched_data
.cur
] = 0;
6062 sched_data
.stopbit
[sched_data
.cur
- 1] = 1;
6063 sched_data
.first_slot
= best_stop
;
6066 dump_current_packet (dump
);
6069 /* If necessary, perform one or two rotations on the scheduling state.
6070 This should only be called if we are starting a new cycle. */
6076 cycle_end_fill_slots (dump
);
6077 if (sched_data
.cur
== 6)
6078 rotate_two_bundles (dump
);
6079 else if (sched_data
.cur
>= 3)
6080 rotate_one_bundle (dump
);
6081 sched_data
.first_slot
= sched_data
.cur
;
6084 /* The clock cycle when ia64_sched_reorder was last called. */
6085 static int prev_cycle
;
6087 /* The first insn scheduled in the previous cycle. This is the saved
6088 value of sched_data.first_slot. */
6089 static int prev_first
;
6091 /* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
6092 pad out the delay between MM (shifts, etc.) and integer operations. */
6095 nop_cycles_until (clock_var
, dump
)
6099 int prev_clock
= prev_cycle
;
6100 int cycles_left
= clock_var
- prev_clock
;
6101 bool did_stop
= false;
6103 /* Finish the previous cycle; pad it out with NOPs. */
6104 if (sched_data
.cur
== 3)
6106 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6108 maybe_rotate (dump
);
6110 else if (sched_data
.cur
> 0)
6113 int split
= itanium_split_issue (sched_data
.packet
, prev_first
);
6115 if (sched_data
.cur
< 3 && split
> 3)
6121 if (split
> sched_data
.cur
)
6124 for (i
= sched_data
.cur
; i
< split
; i
++)
6126 rtx t
= sched_emit_insn (gen_nop_type (sched_data
.packet
->t
[i
]));
6127 sched_data
.types
[i
] = sched_data
.packet
->t
[i
];
6128 sched_data
.insns
[i
] = t
;
6129 sched_data
.stopbit
[i
] = 0;
6131 sched_data
.cur
= split
;
6134 if (! need_stop
&& sched_data
.cur
> 0 && sched_data
.cur
< 6
6138 for (i
= sched_data
.cur
; i
< 6; i
++)
6140 rtx t
= sched_emit_insn (gen_nop_type (sched_data
.packet
->t
[i
]));
6141 sched_data
.types
[i
] = sched_data
.packet
->t
[i
];
6142 sched_data
.insns
[i
] = t
;
6143 sched_data
.stopbit
[i
] = 0;
6150 if (need_stop
|| sched_data
.cur
== 6)
6152 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6155 maybe_rotate (dump
);
6159 while (cycles_left
> 0)
6161 sched_emit_insn (gen_bundle_selector (GEN_INT (0)));
6162 sched_emit_insn (gen_nop_type (TYPE_M
));
6163 sched_emit_insn (gen_nop_type (TYPE_I
));
6164 if (cycles_left
> 1)
6166 sched_emit_insn (gen_insn_group_barrier (GEN_INT (2)));
6169 sched_emit_insn (gen_nop_type (TYPE_I
));
6170 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6176 init_insn_group_barriers ();
6179 /* We are about to being issuing insns for this clock cycle.
6180 Override the default sort algorithm to better slot instructions. */
6183 ia64_internal_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
,
6184 reorder_type
, clock_var
)
6185 FILE *dump ATTRIBUTE_UNUSED
;
6186 int sched_verbose ATTRIBUTE_UNUSED
;
6189 int reorder_type
, clock_var
;
6192 int n_ready
= *pn_ready
;
6193 rtx
*e_ready
= ready
+ n_ready
;
6198 fprintf (dump
, "// ia64_sched_reorder (type %d):\n", reorder_type
);
6199 dump_current_packet (dump
);
6202 /* Work around the pipeline flush that will occurr if the results of
6203 an MM instruction are accessed before the result is ready. Intel
6204 documentation says this only happens with IALU, ISHF, ILOG, LD,
6205 and ST consumers, but experimental evidence shows that *any* non-MM
6206 type instruction will incurr the flush. */
6207 if (reorder_type
== 0 && clock_var
> 0 && ia64_final_schedule
)
6209 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
6211 rtx insn
= *insnp
, link
;
6212 enum attr_itanium_class t
= ia64_safe_itanium_class (insn
);
6214 if (t
== ITANIUM_CLASS_MMMUL
6215 || t
== ITANIUM_CLASS_MMSHF
6216 || t
== ITANIUM_CLASS_MMSHFI
)
6219 for (link
= LOG_LINKS (insn
); link
; link
= XEXP (link
, 1))
6220 if (REG_NOTE_KIND (link
) == 0)
6222 rtx other
= XEXP (link
, 0);
6223 enum attr_itanium_class t0
= ia64_safe_itanium_class (other
);
6224 if (t0
== ITANIUM_CLASS_MMSHF
|| t0
== ITANIUM_CLASS_MMMUL
)
6226 nop_cycles_until (clock_var
, sched_verbose
? dump
: NULL
);
6234 prev_first
= sched_data
.first_slot
;
6235 prev_cycle
= clock_var
;
6237 if (reorder_type
== 0)
6238 maybe_rotate (sched_verbose
? dump
: NULL
);
6240 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6242 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
6243 if (insnp
< e_ready
)
6246 enum attr_type t
= ia64_safe_type (insn
);
6247 if (t
== TYPE_UNKNOWN
)
6249 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6250 || asm_noperands (PATTERN (insn
)) >= 0)
6252 rtx lowest
= ready
[n_asms
];
6253 ready
[n_asms
] = insn
;
6259 rtx highest
= ready
[n_ready
- 1];
6260 ready
[n_ready
- 1] = insn
;
6262 if (ia64_final_schedule
&& group_barrier_needed_p (insn
))
6264 schedule_stop (sched_verbose
? dump
: NULL
);
6265 sched_data
.last_was_stop
= 1;
6266 maybe_rotate (sched_verbose
? dump
: NULL
);
6273 if (n_asms
< n_ready
)
6275 /* Some normal insns to process. Skip the asms. */
6279 else if (n_ready
> 0)
6281 /* Only asm insns left. */
6282 if (ia64_final_schedule
&& group_barrier_needed_p (ready
[n_ready
- 1]))
6284 schedule_stop (sched_verbose
? dump
: NULL
);
6285 sched_data
.last_was_stop
= 1;
6286 maybe_rotate (sched_verbose
? dump
: NULL
);
6288 cycle_end_fill_slots (sched_verbose
? dump
: NULL
);
6292 if (ia64_final_schedule
)
6294 int nr_need_stop
= 0;
6296 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
6297 if (safe_group_barrier_needed_p (*insnp
))
6300 /* Schedule a stop bit if
6301 - all insns require a stop bit, or
6302 - we are starting a new cycle and _any_ insns require a stop bit.
6303 The reason for the latter is that if our schedule is accurate, then
6304 the additional stop won't decrease performance at this point (since
6305 there's a split issue at this point anyway), but it gives us more
6306 freedom when scheduling the currently ready insns. */
6307 if ((reorder_type
== 0 && nr_need_stop
)
6308 || (reorder_type
== 1 && n_ready
== nr_need_stop
))
6310 schedule_stop (sched_verbose
? dump
: NULL
);
6311 sched_data
.last_was_stop
= 1;
6312 maybe_rotate (sched_verbose
? dump
: NULL
);
6313 if (reorder_type
== 1)
6320 /* Move down everything that needs a stop bit, preserving relative
6322 while (insnp
-- > ready
+ deleted
)
6323 while (insnp
>= ready
+ deleted
)
6326 if (! safe_group_barrier_needed_p (insn
))
6328 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
6334 if (deleted
!= nr_need_stop
)
6339 return itanium_reorder (sched_verbose
? dump
: NULL
,
6340 ready
, e_ready
, reorder_type
== 1);
6344 ia64_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
, clock_var
)
6351 return ia64_internal_sched_reorder (dump
, sched_verbose
, ready
,
6352 pn_ready
, 0, clock_var
);
6355 /* Like ia64_sched_reorder, but called after issuing each insn.
6356 Override the default sort algorithm to better slot instructions. */
6359 ia64_sched_reorder2 (dump
, sched_verbose
, ready
, pn_ready
, clock_var
)
6360 FILE *dump ATTRIBUTE_UNUSED
;
6361 int sched_verbose ATTRIBUTE_UNUSED
;
6366 if (sched_data
.last_was_stop
)
6369 /* Detect one special case and try to optimize it.
6370 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6371 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6372 if (sched_data
.first_slot
== 1
6373 && sched_data
.stopbit
[0]
6374 && ((sched_data
.cur
== 4
6375 && (sched_data
.types
[1] == TYPE_M
|| sched_data
.types
[1] == TYPE_A
)
6376 && (sched_data
.types
[2] == TYPE_I
|| sched_data
.types
[2] == TYPE_A
)
6377 && (sched_data
.types
[3] != TYPE_M
&& sched_data
.types
[3] != TYPE_A
))
6378 || (sched_data
.cur
== 3
6379 && (sched_data
.types
[1] == TYPE_M
6380 || sched_data
.types
[1] == TYPE_A
)
6381 && (sched_data
.types
[2] != TYPE_M
6382 && sched_data
.types
[2] != TYPE_I
6383 && sched_data
.types
[2] != TYPE_A
))))
6387 rtx stop
= sched_data
.insns
[1];
6389 /* Search backward for the stop bit that must be there. */
6394 stop
= PREV_INSN (stop
);
6395 if (GET_CODE (stop
) != INSN
)
6397 insn_code
= recog_memoized (stop
);
6399 /* Ignore .pred.rel.mutex.
6401 ??? Update this to ignore cycle display notes too
6402 ??? once those are implemented */
6403 if (insn_code
== CODE_FOR_pred_rel_mutex
6404 || insn_code
== CODE_FOR_prologue_use
)
6407 if (insn_code
== CODE_FOR_insn_group_barrier
)
6412 /* Adjust the stop bit's slot selector. */
6413 if (INTVAL (XVECEXP (PATTERN (stop
), 0, 0)) != 1)
6415 XVECEXP (PATTERN (stop
), 0, 0) = GEN_INT (3);
6417 sched_data
.stopbit
[0] = 0;
6418 sched_data
.stopbit
[2] = 1;
6420 sched_data
.types
[5] = sched_data
.types
[3];
6421 sched_data
.types
[4] = sched_data
.types
[2];
6422 sched_data
.types
[3] = sched_data
.types
[1];
6423 sched_data
.insns
[5] = sched_data
.insns
[3];
6424 sched_data
.insns
[4] = sched_data
.insns
[2];
6425 sched_data
.insns
[3] = sched_data
.insns
[1];
6426 sched_data
.stopbit
[5] = sched_data
.stopbit
[4] = sched_data
.stopbit
[3] = 0;
6427 sched_data
.cur
+= 2;
6428 sched_data
.first_slot
= 3;
6429 for (i
= 0; i
< NR_PACKETS
; i
++)
6431 const struct ia64_packet
*p
= packets
+ i
;
6432 if (p
->t
[0] == TYPE_M
&& p
->t
[1] == TYPE_F
&& p
->t
[2] == TYPE_B
)
6434 sched_data
.packet
= p
;
6438 rotate_one_bundle (sched_verbose
? dump
: NULL
);
6441 for (i
= 0; i
< NR_PACKETS
; i
++)
6443 const struct ia64_packet
*p
= packets
+ i
;
6444 int split
= get_split (p
, sched_data
.first_slot
);
6447 /* Disallow multiway branches here. */
6448 if (p
->t
[1] == TYPE_B
)
6451 if (packet_matches_p (p
, split
, &next
) && next
< best
)
6454 sched_data
.packet
= p
;
6455 sched_data
.split
= split
;
6464 int more
= ia64_internal_sched_reorder (dump
, sched_verbose
,
6469 /* Did we schedule a stop? If so, finish this cycle. */
6470 if (sched_data
.cur
== sched_data
.first_slot
)
6475 fprintf (dump
, "// Can't issue more this cycle; updating type array.\n");
6477 cycle_end_fill_slots (sched_verbose
? dump
: NULL
);
6479 dump_current_packet (dump
);
6483 /* We are about to issue INSN. Return the number of insns left on the
6484 ready queue that can be issued this cycle. */
6487 ia64_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
6491 int can_issue_more ATTRIBUTE_UNUSED
;
6493 enum attr_type t
= ia64_safe_type (insn
);
6495 if (sched_data
.last_was_stop
)
6497 int t
= sched_data
.first_slot
;
6500 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t
)), insn
);
6501 init_insn_group_barriers ();
6502 sched_data
.last_was_stop
= 0;
6505 if (t
== TYPE_UNKNOWN
)
6508 fprintf (dump
, "// Ignoring type %s\n", type_names
[t
]);
6509 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6510 || asm_noperands (PATTERN (insn
)) >= 0)
6512 /* This must be some kind of asm. Clear the scheduling state. */
6513 rotate_two_bundles (sched_verbose
? dump
: NULL
);
6514 if (ia64_final_schedule
)
6515 group_barrier_needed_p (insn
);
6520 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6521 important state info. Don't delete this test. */
6522 if (ia64_final_schedule
6523 && group_barrier_needed_p (insn
))
6526 sched_data
.stopbit
[sched_data
.cur
] = 0;
6527 sched_data
.insns
[sched_data
.cur
] = insn
;
6528 sched_data
.types
[sched_data
.cur
] = t
;
6532 fprintf (dump
, "// Scheduling insn %d of type %s\n",
6533 INSN_UID (insn
), type_names
[t
]);
6535 if (GET_CODE (insn
) == CALL_INSN
&& ia64_final_schedule
)
6537 schedule_stop (sched_verbose
? dump
: NULL
);
6538 sched_data
.last_was_stop
= 1;
6544 /* Free data allocated by ia64_sched_init. */
6547 ia64_sched_finish (dump
, sched_verbose
)
6552 fprintf (dump
, "// Finishing schedule.\n");
6553 rotate_two_bundles (NULL
);
6558 /* Emit pseudo-ops for the assembler to describe predicate relations.
6559 At present this assumes that we only consider predicate pairs to
6560 be mutex, and that the assembler can deduce proper values from
6561 straight-line code. */
6564 emit_predicate_relation_info ()
6568 for (i
= n_basic_blocks
- 1; i
>= 0; --i
)
6570 basic_block bb
= BASIC_BLOCK (i
);
6572 rtx head
= bb
->head
;
6574 /* We only need such notes at code labels. */
6575 if (GET_CODE (head
) != CODE_LABEL
)
6577 if (GET_CODE (NEXT_INSN (head
)) == NOTE
6578 && NOTE_LINE_NUMBER (NEXT_INSN (head
)) == NOTE_INSN_BASIC_BLOCK
)
6579 head
= NEXT_INSN (head
);
6581 for (r
= PR_REG (0); r
< PR_REG (64); r
+= 2)
6582 if (REGNO_REG_SET_P (bb
->global_live_at_start
, r
))
6584 rtx p
= gen_rtx_REG (BImode
, r
);
6585 rtx n
= emit_insn_after (gen_pred_rel_mutex (p
), head
);
6586 if (head
== bb
->end
)
6592 /* Look for conditional calls that do not return, and protect predicate
6593 relations around them. Otherwise the assembler will assume the call
6594 returns, and complain about uses of call-clobbered predicates after
6596 for (i
= n_basic_blocks
- 1; i
>= 0; --i
)
6598 basic_block bb
= BASIC_BLOCK (i
);
6599 rtx insn
= bb
->head
;
6603 if (GET_CODE (insn
) == CALL_INSN
6604 && GET_CODE (PATTERN (insn
)) == COND_EXEC
6605 && find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
6607 rtx b
= emit_insn_before (gen_safe_across_calls_all (), insn
);
6608 rtx a
= emit_insn_after (gen_safe_across_calls_normal (), insn
);
6609 if (bb
->head
== insn
)
6611 if (bb
->end
== insn
)
6615 if (insn
== bb
->end
)
6617 insn
= NEXT_INSN (insn
);
6622 /* Generate a NOP instruction of type T. We will never generate L type
6632 return gen_nop_m ();
6634 return gen_nop_i ();
6636 return gen_nop_b ();
6638 return gen_nop_f ();
6640 return gen_nop_x ();
6646 /* After the last scheduling pass, fill in NOPs. It's easier to do this
6647 here than while scheduling. */
6653 const struct bundle
*b
= 0;
6656 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6660 pat
= INSN_P (insn
) ? PATTERN (insn
) : const0_rtx
;
6661 if (GET_CODE (pat
) == USE
|| GET_CODE (pat
) == CLOBBER
)
6663 if ((GET_CODE (pat
) == UNSPEC
&& XINT (pat
, 1) == UNSPEC_BUNDLE_SELECTOR
)
6664 || GET_CODE (insn
) == CODE_LABEL
)
6667 while (bundle_pos
< 3)
6669 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6672 if (GET_CODE (insn
) != CODE_LABEL
)
6673 b
= bundle
+ INTVAL (XVECEXP (pat
, 0, 0));
6679 else if (GET_CODE (pat
) == UNSPEC_VOLATILE
6680 && XINT (pat
, 1) == UNSPECV_INSN_GROUP_BARRIER
)
6682 int t
= INTVAL (XVECEXP (pat
, 0, 0));
6684 while (bundle_pos
< t
)
6686 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6692 if (bundle_pos
== 3)
6695 if (b
&& INSN_P (insn
))
6697 t
= ia64_safe_type (insn
);
6698 if (asm_noperands (PATTERN (insn
)) >= 0
6699 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)
6701 while (bundle_pos
< 3)
6703 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6709 if (t
== TYPE_UNKNOWN
)
6711 while (bundle_pos
< 3)
6713 if (t
== b
->t
[bundle_pos
]
6714 || (t
== TYPE_A
&& (b
->t
[bundle_pos
] == TYPE_M
6715 || b
->t
[bundle_pos
] == TYPE_I
)))
6718 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6727 /* Perform machine dependent operations on the rtl chain INSNS. */
6733 /* We are freeing block_for_insn in the toplev to keep compatibility
6734 with old MDEP_REORGS that are not CFG based. Recompute it now. */
6735 compute_bb_for_insn (get_max_uid ());
6737 /* If optimizing, we'll have split before scheduling. */
6739 split_all_insns (0);
6741 update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES
,
6744 if (ia64_flag_schedule_insns2
)
6746 timevar_push (TV_SCHED2
);
6747 ia64_final_schedule
= 1;
6748 schedule_ebbs (rtl_dump_file
);
6749 ia64_final_schedule
= 0;
6750 timevar_pop (TV_SCHED2
);
6752 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6753 place as they were during scheduling. */
6754 emit_insn_group_barriers (rtl_dump_file
, insns
);
6758 emit_all_insn_group_barriers (rtl_dump_file
, insns
);
6760 /* A call must not be the last instruction in a function, so that the
6761 return address is still within the function, so that unwinding works
6762 properly. Note that IA-64 differs from dwarf2 on this point. */
6763 if (flag_unwind_tables
|| (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
6768 insn
= get_last_insn ();
6769 if (! INSN_P (insn
))
6770 insn
= prev_active_insn (insn
);
6771 if (GET_CODE (insn
) == INSN
6772 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
6773 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
6776 insn
= prev_active_insn (insn
);
6778 if (GET_CODE (insn
) == CALL_INSN
)
6781 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6782 emit_insn (gen_break_f ());
6783 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6788 emit_predicate_relation_info ();
6791 /* Return true if REGNO is used by the epilogue. */
6794 ia64_epilogue_uses (regno
)
6800 /* When a function makes a call through a function descriptor, we
6801 will write a (potentially) new value to "gp". After returning
6802 from such a call, we need to make sure the function restores the
6803 original gp-value, even if the function itself does not use the
6805 return (TARGET_CONST_GP
&& !(TARGET_AUTO_PIC
|| TARGET_NO_PIC
));
6807 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
6808 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
6809 /* For functions defined with the syscall_linkage attribute, all
6810 input registers are marked as live at all function exits. This
6811 prevents the register allocator from using the input registers,
6812 which in turn makes it possible to restart a system call after
6813 an interrupt without having to save/restore the input registers.
6814 This also prevents kernel data from leaking to application code. */
6815 return lookup_attribute ("syscall_linkage",
6816 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))) != NULL
;
6819 /* Conditional return patterns can't represent the use of `b0' as
6820 the return address, so we force the value live this way. */
6824 /* Likewise for ar.pfs, which is used by br.ret. */
6832 /* Return true if REGNO is used by the frame unwinder. */
6835 ia64_eh_uses (regno
)
6838 if (! reload_completed
)
6841 if (current_frame_info
.reg_save_b0
6842 && regno
== current_frame_info
.reg_save_b0
)
6844 if (current_frame_info
.reg_save_pr
6845 && regno
== current_frame_info
.reg_save_pr
)
6847 if (current_frame_info
.reg_save_ar_pfs
6848 && regno
== current_frame_info
.reg_save_ar_pfs
)
6850 if (current_frame_info
.reg_save_ar_unat
6851 && regno
== current_frame_info
.reg_save_ar_unat
)
6853 if (current_frame_info
.reg_save_ar_lc
6854 && regno
== current_frame_info
.reg_save_ar_lc
)
6860 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6862 We add @ to the name if this goes in small data/bss. We can only put
6863 a variable in small data/bss if it is defined in this module or a module
6864 that we are statically linked with. We can't check the second condition,
6865 but TREE_STATIC gives us the first one. */
6867 /* ??? If we had IPA, we could check the second condition. We could support
6868 programmer added section attributes if the variable is not defined in this
6871 /* ??? See the v850 port for a cleaner way to do this. */
6873 /* ??? We could also support own long data here. Generating movl/add/ld8
6874 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6875 code faster because there is one less load. This also includes incomplete
6876 types which can't go in sdata/sbss. */
6879 ia64_in_small_data_p (exp
)
6882 if (TARGET_NO_SDATA
)
6885 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
6887 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
6888 if (strcmp (section
, ".sdata") == 0
6889 || strcmp (section
, ".sbss") == 0)
6894 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
6896 /* If this is an incomplete type with size 0, then we can't put it
6897 in sdata because it might be too big when completed. */
6898 if (size
> 0 && size
<= ia64_section_threshold
)
6906 ia64_encode_section_info (decl
, first
)
6908 int first ATTRIBUTE_UNUSED
;
6910 const char *symbol_str
;
6911 bool is_local
, is_small
;
6914 if (TREE_CODE (decl
) == FUNCTION_DECL
)
6916 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl
), 0)) = 1;
6920 /* Careful not to prod global register variables. */
6921 if (TREE_CODE (decl
) != VAR_DECL
6922 || GET_CODE (DECL_RTL (decl
)) != MEM
6923 || GET_CODE (XEXP (DECL_RTL (decl
), 0)) != SYMBOL_REF
)
6926 symbol
= XEXP (DECL_RTL (decl
), 0);
6927 symbol_str
= XSTR (symbol
, 0);
6929 /* A variable is considered "local" if it is defined by this module. */
6931 if (MODULE_LOCAL_P (decl
))
6933 /* Otherwise, variables defined outside this object may not be local. */
6934 else if (DECL_EXTERNAL (decl
))
6936 /* Linkonce and weak data are never local. */
6937 else if (DECL_ONE_ONLY (decl
) || DECL_WEAK (decl
))
6939 /* Static variables are always local. */
6940 else if (! TREE_PUBLIC (decl
))
6942 /* If PIC, then assume that any global name can be overridden by
6943 symbols resolved from other modules. */
6946 /* Uninitialized COMMON variable may be unified with symbols
6947 resolved from other modules. */
6948 else if (DECL_COMMON (decl
)
6949 && (DECL_INITIAL (decl
) == NULL
6950 || DECL_INITIAL (decl
) == error_mark_node
))
6952 /* Otherwise we're left with initialized (or non-common) global data
6953 which is of necessity defined locally. */
6957 /* Determine if DECL will wind up in .sdata/.sbss. */
6958 is_small
= ia64_in_small_data_p (decl
);
6960 /* Finally, encode this into the symbol string. */
6961 if (is_local
&& is_small
)
6966 if (symbol_str
[0] == SDATA_NAME_FLAG_CHAR
)
6969 len
= strlen (symbol_str
) + 1;
6970 newstr
= alloca (len
+ 1);
6971 newstr
[0] = SDATA_NAME_FLAG_CHAR
;
6972 memcpy (newstr
+ 1, symbol_str
, len
);
6974 XSTR (symbol
, 0) = ggc_alloc_string (newstr
, len
);
6977 /* This decl is marked as being in small data/bss but it shouldn't
6978 be; one likely explanation for this is that the decl has been
6979 moved into a different section from the one it was in when
6980 targetm.encode_section_info was first called. Remove the '@'. */
6981 else if (symbol_str
[0] == SDATA_NAME_FLAG_CHAR
)
6982 XSTR (symbol
, 0) = ggc_strdup (symbol_str
+ 1);
6985 /* Output assembly directives for prologue regions. */
6987 /* The current basic block number. */
6989 static int block_num
;
6991 /* True if we need a copy_state command at the start of the next block. */
6993 static int need_copy_state
;
6995 /* The function emits unwind directives for the start of an epilogue. */
7000 /* If this isn't the last block of the function, then we need to label the
7001 current state, and copy it back in at the start of the next block. */
7003 if (block_num
!= n_basic_blocks
- 1)
7005 fprintf (asm_out_file
, "\t.label_state 1\n");
7006 need_copy_state
= 1;
7009 fprintf (asm_out_file
, "\t.restore sp\n");
7012 /* This function processes a SET pattern looking for specific patterns
7013 which result in emitting an assembly directive required for unwinding. */
7016 process_set (asm_out_file
, pat
)
7020 rtx src
= SET_SRC (pat
);
7021 rtx dest
= SET_DEST (pat
);
7022 int src_regno
, dest_regno
;
7024 /* Look for the ALLOC insn. */
7025 if (GET_CODE (src
) == UNSPEC_VOLATILE
7026 && XINT (src
, 1) == UNSPECV_ALLOC
7027 && GET_CODE (dest
) == REG
)
7029 dest_regno
= REGNO (dest
);
7031 /* If this isn't the final destination for ar.pfs, the alloc
7032 shouldn't have been marked frame related. */
7033 if (dest_regno
!= current_frame_info
.reg_save_ar_pfs
)
7036 fprintf (asm_out_file
, "\t.save ar.pfs, r%d\n",
7037 ia64_dbx_register_number (dest_regno
));
7041 /* Look for SP = .... */
7042 if (GET_CODE (dest
) == REG
&& REGNO (dest
) == STACK_POINTER_REGNUM
)
7044 if (GET_CODE (src
) == PLUS
)
7046 rtx op0
= XEXP (src
, 0);
7047 rtx op1
= XEXP (src
, 1);
7048 if (op0
== dest
&& GET_CODE (op1
) == CONST_INT
)
7050 if (INTVAL (op1
) < 0)
7052 fputs ("\t.fframe ", asm_out_file
);
7053 fprintf (asm_out_file
, HOST_WIDE_INT_PRINT_DEC
,
7055 fputc ('\n', asm_out_file
);
7058 process_epilogue ();
7063 else if (GET_CODE (src
) == REG
7064 && REGNO (src
) == HARD_FRAME_POINTER_REGNUM
)
7065 process_epilogue ();
7072 /* Register move we need to look at. */
7073 if (GET_CODE (dest
) == REG
&& GET_CODE (src
) == REG
)
7075 src_regno
= REGNO (src
);
7076 dest_regno
= REGNO (dest
);
7081 /* Saving return address pointer. */
7082 if (dest_regno
!= current_frame_info
.reg_save_b0
)
7084 fprintf (asm_out_file
, "\t.save rp, r%d\n",
7085 ia64_dbx_register_number (dest_regno
));
7089 if (dest_regno
!= current_frame_info
.reg_save_pr
)
7091 fprintf (asm_out_file
, "\t.save pr, r%d\n",
7092 ia64_dbx_register_number (dest_regno
));
7095 case AR_UNAT_REGNUM
:
7096 if (dest_regno
!= current_frame_info
.reg_save_ar_unat
)
7098 fprintf (asm_out_file
, "\t.save ar.unat, r%d\n",
7099 ia64_dbx_register_number (dest_regno
));
7103 if (dest_regno
!= current_frame_info
.reg_save_ar_lc
)
7105 fprintf (asm_out_file
, "\t.save ar.lc, r%d\n",
7106 ia64_dbx_register_number (dest_regno
));
7109 case STACK_POINTER_REGNUM
:
7110 if (dest_regno
!= HARD_FRAME_POINTER_REGNUM
7111 || ! frame_pointer_needed
)
7113 fprintf (asm_out_file
, "\t.vframe r%d\n",
7114 ia64_dbx_register_number (dest_regno
));
7118 /* Everything else should indicate being stored to memory. */
7123 /* Memory store we need to look at. */
7124 if (GET_CODE (dest
) == MEM
&& GET_CODE (src
) == REG
)
7130 if (GET_CODE (XEXP (dest
, 0)) == REG
)
7132 base
= XEXP (dest
, 0);
7135 else if (GET_CODE (XEXP (dest
, 0)) == PLUS
7136 && GET_CODE (XEXP (XEXP (dest
, 0), 1)) == CONST_INT
)
7138 base
= XEXP (XEXP (dest
, 0), 0);
7139 off
= INTVAL (XEXP (XEXP (dest
, 0), 1));
7144 if (base
== hard_frame_pointer_rtx
)
7146 saveop
= ".savepsp";
7149 else if (base
== stack_pointer_rtx
)
7154 src_regno
= REGNO (src
);
7158 if (current_frame_info
.reg_save_b0
!= 0)
7160 fprintf (asm_out_file
, "\t%s rp, %ld\n", saveop
, off
);
7164 if (current_frame_info
.reg_save_pr
!= 0)
7166 fprintf (asm_out_file
, "\t%s pr, %ld\n", saveop
, off
);
7170 if (current_frame_info
.reg_save_ar_lc
!= 0)
7172 fprintf (asm_out_file
, "\t%s ar.lc, %ld\n", saveop
, off
);
7176 if (current_frame_info
.reg_save_ar_pfs
!= 0)
7178 fprintf (asm_out_file
, "\t%s ar.pfs, %ld\n", saveop
, off
);
7181 case AR_UNAT_REGNUM
:
7182 if (current_frame_info
.reg_save_ar_unat
!= 0)
7184 fprintf (asm_out_file
, "\t%s ar.unat, %ld\n", saveop
, off
);
7191 fprintf (asm_out_file
, "\t.save.g 0x%x\n",
7192 1 << (src_regno
- GR_REG (4)));
7200 fprintf (asm_out_file
, "\t.save.b 0x%x\n",
7201 1 << (src_regno
- BR_REG (1)));
7208 fprintf (asm_out_file
, "\t.save.f 0x%x\n",
7209 1 << (src_regno
- FR_REG (2)));
7212 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7213 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7214 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7215 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7216 fprintf (asm_out_file
, "\t.save.gf 0x0, 0x%x\n",
7217 1 << (src_regno
- FR_REG (12)));
7229 /* This function looks at a single insn and emits any directives
7230 required to unwind this insn. */
7232 process_for_unwind_directive (asm_out_file
, insn
)
7236 if (flag_unwind_tables
7237 || (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
7241 if (GET_CODE (insn
) == NOTE
7242 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
7244 block_num
= NOTE_BASIC_BLOCK (insn
)->index
;
7246 /* Restore unwind state from immediately before the epilogue. */
7247 if (need_copy_state
)
7249 fprintf (asm_out_file
, "\t.body\n");
7250 fprintf (asm_out_file
, "\t.copy_state 1\n");
7251 need_copy_state
= 0;
7255 if (GET_CODE (insn
) == NOTE
|| ! RTX_FRAME_RELATED_P (insn
))
7258 pat
= find_reg_note (insn
, REG_FRAME_RELATED_EXPR
, NULL_RTX
);
7260 pat
= XEXP (pat
, 0);
7262 pat
= PATTERN (insn
);
7264 switch (GET_CODE (pat
))
7267 process_set (asm_out_file
, pat
);
7273 int limit
= XVECLEN (pat
, 0);
7274 for (par_index
= 0; par_index
< limit
; par_index
++)
7276 rtx x
= XVECEXP (pat
, 0, par_index
);
7277 if (GET_CODE (x
) == SET
)
7278 process_set (asm_out_file
, x
);
7291 ia64_init_builtins ()
7293 tree psi_type_node
= build_pointer_type (integer_type_node
);
7294 tree pdi_type_node
= build_pointer_type (long_integer_type_node
);
7295 tree endlink
= void_list_node
;
7297 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7298 tree si_ftype_psi_si_si
7299 = build_function_type (integer_type_node
,
7300 tree_cons (NULL_TREE
, psi_type_node
,
7301 tree_cons (NULL_TREE
, integer_type_node
,
7302 tree_cons (NULL_TREE
,
7306 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
7307 tree di_ftype_pdi_di_di
7308 = build_function_type (long_integer_type_node
,
7309 tree_cons (NULL_TREE
, pdi_type_node
,
7310 tree_cons (NULL_TREE
,
7311 long_integer_type_node
,
7312 tree_cons (NULL_TREE
,
7313 long_integer_type_node
,
7315 /* __sync_synchronize */
7316 tree void_ftype_void
7317 = build_function_type (void_type_node
, endlink
);
7319 /* __sync_lock_test_and_set_si */
7320 tree si_ftype_psi_si
7321 = build_function_type (integer_type_node
,
7322 tree_cons (NULL_TREE
, psi_type_node
,
7323 tree_cons (NULL_TREE
, integer_type_node
, endlink
)));
7325 /* __sync_lock_test_and_set_di */
7326 tree di_ftype_pdi_di
7327 = build_function_type (long_integer_type_node
,
7328 tree_cons (NULL_TREE
, pdi_type_node
,
7329 tree_cons (NULL_TREE
, long_integer_type_node
,
7332 /* __sync_lock_release_si */
7334 = build_function_type (void_type_node
, tree_cons (NULL_TREE
, psi_type_node
,
7337 /* __sync_lock_release_di */
7339 = build_function_type (void_type_node
, tree_cons (NULL_TREE
, pdi_type_node
,
7342 #define def_builtin(name, type, code) \
7343 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
7345 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si
,
7346 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
);
7347 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di
,
7348 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
);
7349 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si
,
7350 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
);
7351 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di
,
7352 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
);
7354 def_builtin ("__sync_synchronize", void_ftype_void
,
7355 IA64_BUILTIN_SYNCHRONIZE
);
7357 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si
,
7358 IA64_BUILTIN_LOCK_TEST_AND_SET_SI
);
7359 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di
,
7360 IA64_BUILTIN_LOCK_TEST_AND_SET_DI
);
7361 def_builtin ("__sync_lock_release_si", void_ftype_psi
,
7362 IA64_BUILTIN_LOCK_RELEASE_SI
);
7363 def_builtin ("__sync_lock_release_di", void_ftype_pdi
,
7364 IA64_BUILTIN_LOCK_RELEASE_DI
);
7366 def_builtin ("__builtin_ia64_bsp",
7367 build_function_type (ptr_type_node
, endlink
),
7370 def_builtin ("__builtin_ia64_flushrs",
7371 build_function_type (void_type_node
, endlink
),
7372 IA64_BUILTIN_FLUSHRS
);
7374 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si
,
7375 IA64_BUILTIN_FETCH_AND_ADD_SI
);
7376 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si
,
7377 IA64_BUILTIN_FETCH_AND_SUB_SI
);
7378 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si
,
7379 IA64_BUILTIN_FETCH_AND_OR_SI
);
7380 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si
,
7381 IA64_BUILTIN_FETCH_AND_AND_SI
);
7382 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si
,
7383 IA64_BUILTIN_FETCH_AND_XOR_SI
);
7384 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si
,
7385 IA64_BUILTIN_FETCH_AND_NAND_SI
);
7387 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si
,
7388 IA64_BUILTIN_ADD_AND_FETCH_SI
);
7389 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si
,
7390 IA64_BUILTIN_SUB_AND_FETCH_SI
);
7391 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si
,
7392 IA64_BUILTIN_OR_AND_FETCH_SI
);
7393 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si
,
7394 IA64_BUILTIN_AND_AND_FETCH_SI
);
7395 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si
,
7396 IA64_BUILTIN_XOR_AND_FETCH_SI
);
7397 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si
,
7398 IA64_BUILTIN_NAND_AND_FETCH_SI
);
7400 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di
,
7401 IA64_BUILTIN_FETCH_AND_ADD_DI
);
7402 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di
,
7403 IA64_BUILTIN_FETCH_AND_SUB_DI
);
7404 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di
,
7405 IA64_BUILTIN_FETCH_AND_OR_DI
);
7406 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di
,
7407 IA64_BUILTIN_FETCH_AND_AND_DI
);
7408 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di
,
7409 IA64_BUILTIN_FETCH_AND_XOR_DI
);
7410 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di
,
7411 IA64_BUILTIN_FETCH_AND_NAND_DI
);
7413 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di
,
7414 IA64_BUILTIN_ADD_AND_FETCH_DI
);
7415 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di
,
7416 IA64_BUILTIN_SUB_AND_FETCH_DI
);
7417 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di
,
7418 IA64_BUILTIN_OR_AND_FETCH_DI
);
7419 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di
,
7420 IA64_BUILTIN_AND_AND_FETCH_DI
);
7421 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di
,
7422 IA64_BUILTIN_XOR_AND_FETCH_DI
);
7423 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di
,
7424 IA64_BUILTIN_NAND_AND_FETCH_DI
);
7429 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7437 cmpxchgsz.acq tmp = [ptr], tmp
7438 } while (tmp != ret)
7442 ia64_expand_fetch_and_op (binoptab
, mode
, arglist
, target
)
7444 enum machine_mode mode
;
7448 rtx ret
, label
, tmp
, ccv
, insn
, mem
, value
;
7451 arg0
= TREE_VALUE (arglist
);
7452 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7453 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7454 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7456 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7457 MEM_VOLATILE_P (mem
) = 1;
7459 if (target
&& register_operand (target
, mode
))
7462 ret
= gen_reg_rtx (mode
);
7464 emit_insn (gen_mf ());
7466 /* Special case for fetchadd instructions. */
7467 if (binoptab
== add_optab
&& fetchadd_operand (value
, VOIDmode
))
7470 insn
= gen_fetchadd_acq_si (ret
, mem
, value
);
7472 insn
= gen_fetchadd_acq_di (ret
, mem
, value
);
7477 tmp
= gen_reg_rtx (mode
);
7478 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
7479 emit_move_insn (tmp
, mem
);
7481 label
= gen_label_rtx ();
7483 emit_move_insn (ret
, tmp
);
7484 emit_move_insn (ccv
, tmp
);
7486 /* Perform the specific operation. Special case NAND by noticing
7487 one_cmpl_optab instead. */
7488 if (binoptab
== one_cmpl_optab
)
7490 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
7491 binoptab
= and_optab
;
7493 tmp
= expand_binop (mode
, binoptab
, tmp
, value
, tmp
, 1, OPTAB_WIDEN
);
7496 insn
= gen_cmpxchg_acq_si (tmp
, mem
, tmp
, ccv
);
7498 insn
= gen_cmpxchg_acq_di (tmp
, mem
, tmp
, ccv
);
7501 emit_cmp_and_jump_insns (tmp
, ret
, NE
, 0, mode
, 1, label
);
7506 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7514 cmpxchgsz.acq tmp = [ptr], ret
7515 } while (tmp != old)
7519 ia64_expand_op_and_fetch (binoptab
, mode
, arglist
, target
)
7521 enum machine_mode mode
;
7525 rtx old
, label
, tmp
, ret
, ccv
, insn
, mem
, value
;
7528 arg0
= TREE_VALUE (arglist
);
7529 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7530 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7531 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7533 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7534 MEM_VOLATILE_P (mem
) = 1;
7536 if (target
&& ! register_operand (target
, mode
))
7539 emit_insn (gen_mf ());
7540 tmp
= gen_reg_rtx (mode
);
7541 old
= gen_reg_rtx (mode
);
7542 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
7544 emit_move_insn (tmp
, mem
);
7546 label
= gen_label_rtx ();
7548 emit_move_insn (old
, tmp
);
7549 emit_move_insn (ccv
, tmp
);
7551 /* Perform the specific operation. Special case NAND by noticing
7552 one_cmpl_optab instead. */
7553 if (binoptab
== one_cmpl_optab
)
7555 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
7556 binoptab
= and_optab
;
7558 ret
= expand_binop (mode
, binoptab
, tmp
, value
, target
, 1, OPTAB_WIDEN
);
7561 insn
= gen_cmpxchg_acq_si (tmp
, mem
, ret
, ccv
);
7563 insn
= gen_cmpxchg_acq_di (tmp
, mem
, ret
, ccv
);
7566 emit_cmp_and_jump_insns (tmp
, old
, NE
, 0, mode
, 1, label
);
7571 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7575 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7578 For bool_ it's the same except return ret == oldval.
7582 ia64_expand_compare_and_swap (mode
, boolp
, arglist
, target
)
7583 enum machine_mode mode
;
7588 tree arg0
, arg1
, arg2
;
7589 rtx mem
, old
, new, ccv
, tmp
, insn
;
7591 arg0
= TREE_VALUE (arglist
);
7592 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7593 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
7594 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7595 old
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7596 new = expand_expr (arg2
, NULL_RTX
, mode
, 0);
7598 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7599 MEM_VOLATILE_P (mem
) = 1;
7601 if (! register_operand (old
, mode
))
7602 old
= copy_to_mode_reg (mode
, old
);
7603 if (! register_operand (new, mode
))
7604 new = copy_to_mode_reg (mode
, new);
7606 if (! boolp
&& target
&& register_operand (target
, mode
))
7609 tmp
= gen_reg_rtx (mode
);
7611 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
7612 emit_move_insn (ccv
, old
);
7613 emit_insn (gen_mf ());
7615 insn
= gen_cmpxchg_acq_si (tmp
, mem
, new, ccv
);
7617 insn
= gen_cmpxchg_acq_di (tmp
, mem
, new, ccv
);
7623 target
= gen_reg_rtx (mode
);
7624 return emit_store_flag_force (target
, EQ
, tmp
, old
, mode
, 1, 1);
7630 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7633 ia64_expand_lock_test_and_set (mode
, arglist
, target
)
7634 enum machine_mode mode
;
7639 rtx mem
, new, ret
, insn
;
7641 arg0
= TREE_VALUE (arglist
);
7642 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7643 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7644 new = expand_expr (arg1
, NULL_RTX
, mode
, 0);
7646 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7647 MEM_VOLATILE_P (mem
) = 1;
7648 if (! register_operand (new, mode
))
7649 new = copy_to_mode_reg (mode
, new);
7651 if (target
&& register_operand (target
, mode
))
7654 ret
= gen_reg_rtx (mode
);
7657 insn
= gen_xchgsi (ret
, mem
, new);
7659 insn
= gen_xchgdi (ret
, mem
, new);
7665 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7668 ia64_expand_lock_release (mode
, arglist
, target
)
7669 enum machine_mode mode
;
7671 rtx target ATTRIBUTE_UNUSED
;
7676 arg0
= TREE_VALUE (arglist
);
7677 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7679 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7680 MEM_VOLATILE_P (mem
) = 1;
7682 emit_move_insn (mem
, const0_rtx
);
7688 ia64_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
7691 rtx subtarget ATTRIBUTE_UNUSED
;
7692 enum machine_mode mode ATTRIBUTE_UNUSED
;
7693 int ignore ATTRIBUTE_UNUSED
;
7695 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
7696 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
7697 tree arglist
= TREE_OPERAND (exp
, 1);
7701 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
7702 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
7703 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
7704 case IA64_BUILTIN_LOCK_RELEASE_SI
:
7705 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
7706 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
7707 case IA64_BUILTIN_FETCH_AND_OR_SI
:
7708 case IA64_BUILTIN_FETCH_AND_AND_SI
:
7709 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
7710 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
7711 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
7712 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
7713 case IA64_BUILTIN_OR_AND_FETCH_SI
:
7714 case IA64_BUILTIN_AND_AND_FETCH_SI
:
7715 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
7716 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
7720 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
7721 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
7722 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
7723 case IA64_BUILTIN_LOCK_RELEASE_DI
:
7724 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
7725 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
7726 case IA64_BUILTIN_FETCH_AND_OR_DI
:
7727 case IA64_BUILTIN_FETCH_AND_AND_DI
:
7728 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
7729 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
7730 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
7731 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
7732 case IA64_BUILTIN_OR_AND_FETCH_DI
:
7733 case IA64_BUILTIN_AND_AND_FETCH_DI
:
7734 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
7735 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
7745 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
7746 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
7747 return ia64_expand_compare_and_swap (mode
, 1, arglist
, target
);
7749 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
7750 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
7751 return ia64_expand_compare_and_swap (mode
, 0, arglist
, target
);
7753 case IA64_BUILTIN_SYNCHRONIZE
:
7754 emit_insn (gen_mf ());
7757 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
7758 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
7759 return ia64_expand_lock_test_and_set (mode
, arglist
, target
);
7761 case IA64_BUILTIN_LOCK_RELEASE_SI
:
7762 case IA64_BUILTIN_LOCK_RELEASE_DI
:
7763 return ia64_expand_lock_release (mode
, arglist
, target
);
7765 case IA64_BUILTIN_BSP
:
7766 if (! target
|| ! register_operand (target
, DImode
))
7767 target
= gen_reg_rtx (DImode
);
7768 emit_insn (gen_bsp_value (target
));
7771 case IA64_BUILTIN_FLUSHRS
:
7772 emit_insn (gen_flushrs ());
7775 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
7776 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
7777 return ia64_expand_fetch_and_op (add_optab
, mode
, arglist
, target
);
7779 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
7780 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
7781 return ia64_expand_fetch_and_op (sub_optab
, mode
, arglist
, target
);
7783 case IA64_BUILTIN_FETCH_AND_OR_SI
:
7784 case IA64_BUILTIN_FETCH_AND_OR_DI
:
7785 return ia64_expand_fetch_and_op (ior_optab
, mode
, arglist
, target
);
7787 case IA64_BUILTIN_FETCH_AND_AND_SI
:
7788 case IA64_BUILTIN_FETCH_AND_AND_DI
:
7789 return ia64_expand_fetch_and_op (and_optab
, mode
, arglist
, target
);
7791 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
7792 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
7793 return ia64_expand_fetch_and_op (xor_optab
, mode
, arglist
, target
);
7795 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
7796 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
7797 return ia64_expand_fetch_and_op (one_cmpl_optab
, mode
, arglist
, target
);
7799 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
7800 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
7801 return ia64_expand_op_and_fetch (add_optab
, mode
, arglist
, target
);
7803 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
7804 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
7805 return ia64_expand_op_and_fetch (sub_optab
, mode
, arglist
, target
);
7807 case IA64_BUILTIN_OR_AND_FETCH_SI
:
7808 case IA64_BUILTIN_OR_AND_FETCH_DI
:
7809 return ia64_expand_op_and_fetch (ior_optab
, mode
, arglist
, target
);
7811 case IA64_BUILTIN_AND_AND_FETCH_SI
:
7812 case IA64_BUILTIN_AND_AND_FETCH_DI
:
7813 return ia64_expand_op_and_fetch (and_optab
, mode
, arglist
, target
);
7815 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
7816 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
7817 return ia64_expand_op_and_fetch (xor_optab
, mode
, arglist
, target
);
7819 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
7820 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
7821 return ia64_expand_op_and_fetch (one_cmpl_optab
, mode
, arglist
, target
);
7830 /* For the HP-UX IA64 aggregate parameters are passed stored in the
7831 most significant bits of the stack slot. */
7834 ia64_hpux_function_arg_padding (mode
, type
)
7835 enum machine_mode mode
;
7838 /* Exception to normal case for structures/unions/etc. */
7840 if (type
&& AGGREGATE_TYPE_P (type
)
7841 && int_size_in_bytes (type
) < UNITS_PER_WORD
)
7844 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
7845 hardwired to be true. */
7847 return((mode
== BLKmode
7848 ? (type
&& TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
7849 && int_size_in_bytes (type
) < (PARM_BOUNDARY
/ BITS_PER_UNIT
))
7850 : GET_MODE_BITSIZE (mode
) < PARM_BOUNDARY
)
7851 ? downward
: upward
);
7854 /* Switch to the section to which we should output X. The only thing
7855 special we do here is to honor small data. */
7858 ia64_select_rtx_section (mode
, x
, align
)
7859 enum machine_mode mode
;
7861 unsigned HOST_WIDE_INT align
;
7863 if (GET_MODE_SIZE (mode
) > 0
7864 && GET_MODE_SIZE (mode
) <= ia64_section_threshold
)
7867 default_elf_select_rtx_section (mode
, x
, align
);
7870 /* It is illegal to have relocations in shared segments on AIX.
7871 Pretend flag_pic is always set. */
7874 ia64_aix_select_section (exp
, reloc
, align
)
7877 unsigned HOST_WIDE_INT align
;
7879 int save_pic
= flag_pic
;
7881 default_elf_select_section (exp
, reloc
, align
);
7882 flag_pic
= save_pic
;
7886 ia64_aix_unique_section (decl
, reloc
)
7890 int save_pic
= flag_pic
;
7892 default_unique_section (decl
, reloc
);
7893 flag_pic
= save_pic
;
7897 ia64_aix_select_rtx_section (mode
, x
, align
)
7898 enum machine_mode mode
;
7900 unsigned HOST_WIDE_INT align
;
7902 int save_pic
= flag_pic
;
7904 ia64_select_rtx_section (mode
, x
, align
);
7905 flag_pic
= save_pic
;