1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-attr.h"
42 #include "basic-block.h"
44 #include "sched-int.h"
46 /* This is used for communication between ASM_OUTPUT_LABEL and
47 ASM_OUTPUT_LABELREF. */
48 int ia64_asm_output_label
= 0;
50 /* Define the information needed to generate branch and scc insns. This is
51 stored from the compare operation. */
52 struct rtx_def
* ia64_compare_op0
;
53 struct rtx_def
* ia64_compare_op1
;
55 /* Register names for ia64_expand_prologue. */
56 static const char * const ia64_reg_numbers
[96] =
57 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
58 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
59 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
60 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
61 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
62 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
63 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
64 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
65 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
66 "r104","r105","r106","r107","r108","r109","r110","r111",
67 "r112","r113","r114","r115","r116","r117","r118","r119",
68 "r120","r121","r122","r123","r124","r125","r126","r127"};
70 /* ??? These strings could be shared with REGISTER_NAMES. */
71 static const char * const ia64_input_reg_names
[8] =
72 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
74 /* ??? These strings could be shared with REGISTER_NAMES. */
75 static const char * const ia64_local_reg_names
[80] =
76 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
77 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
78 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
79 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
80 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
81 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
82 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
83 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
84 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
85 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
87 /* ??? These strings could be shared with REGISTER_NAMES. */
88 static const char * const ia64_output_reg_names
[8] =
89 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
91 /* String used with the -mfixed-range= option. */
92 const char *ia64_fixed_range_string
;
94 /* Determines whether we run our final scheduling pass or not. We always
95 avoid the normal second scheduling pass. */
96 static int ia64_flag_schedule_insns2
;
98 /* Variables which are this size or smaller are put in the sdata/sbss
101 unsigned int ia64_section_threshold
;
103 static int find_gr_spill
PARAMS ((int));
104 static int next_scratch_gr_reg
PARAMS ((void));
105 static void mark_reg_gr_used_mask
PARAMS ((rtx
, void *));
106 static void ia64_compute_frame_size
PARAMS ((HOST_WIDE_INT
));
107 static void setup_spill_pointers
PARAMS ((int, rtx
, HOST_WIDE_INT
));
108 static void finish_spill_pointers
PARAMS ((void));
109 static rtx spill_restore_mem
PARAMS ((rtx
, HOST_WIDE_INT
));
110 static void do_spill
PARAMS ((rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
, rtx
));
111 static void do_restore
PARAMS ((rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
));
112 static rtx gen_movdi_x
PARAMS ((rtx
, rtx
, rtx
));
113 static rtx gen_fr_spill_x
PARAMS ((rtx
, rtx
, rtx
));
114 static rtx gen_fr_restore_x
PARAMS ((rtx
, rtx
, rtx
));
116 static enum machine_mode hfa_element_mode
PARAMS ((tree
, int));
117 static void fix_range
PARAMS ((const char *));
118 static void ia64_add_gc_roots
PARAMS ((void));
119 static void ia64_init_machine_status
PARAMS ((struct function
*));
120 static void ia64_mark_machine_status
PARAMS ((struct function
*));
121 static void ia64_free_machine_status
PARAMS ((struct function
*));
122 static void emit_insn_group_barriers
PARAMS ((FILE *, rtx
));
123 static void emit_all_insn_group_barriers
PARAMS ((FILE *, rtx
));
124 static void emit_predicate_relation_info
PARAMS ((void));
125 static void process_epilogue
PARAMS ((void));
126 static int process_set
PARAMS ((FILE *, rtx
));
128 static rtx ia64_expand_fetch_and_op
PARAMS ((optab
, enum machine_mode
,
130 static rtx ia64_expand_op_and_fetch
PARAMS ((optab
, enum machine_mode
,
132 static rtx ia64_expand_compare_and_swap
PARAMS ((enum machine_mode
, int,
134 static rtx ia64_expand_lock_test_and_set
PARAMS ((enum machine_mode
,
136 static rtx ia64_expand_lock_release
PARAMS ((enum machine_mode
, tree
, rtx
));
138 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
141 call_operand (op
, mode
)
143 enum machine_mode mode
;
145 if (mode
!= GET_MODE (op
))
148 return (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == REG
149 || (GET_CODE (op
) == SUBREG
&& GET_CODE (XEXP (op
, 0)) == REG
));
152 /* Return 1 if OP refers to a symbol in the sdata section. */
155 sdata_symbolic_operand (op
, mode
)
157 enum machine_mode mode ATTRIBUTE_UNUSED
;
159 switch (GET_CODE (op
))
162 if (GET_CODE (XEXP (op
, 0)) != PLUS
163 || GET_CODE (XEXP (XEXP (op
, 0), 0)) != SYMBOL_REF
)
165 op
= XEXP (XEXP (op
, 0), 0);
169 if (CONSTANT_POOL_ADDRESS_P (op
))
170 return GET_MODE_SIZE (get_pool_mode (op
)) <= ia64_section_threshold
;
172 return XSTR (op
, 0)[0] == SDATA_NAME_FLAG_CHAR
;
181 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
184 got_symbolic_operand (op
, mode
)
186 enum machine_mode mode ATTRIBUTE_UNUSED
;
188 switch (GET_CODE (op
))
192 if (GET_CODE (op
) != PLUS
)
194 if (GET_CODE (XEXP (op
, 0)) != SYMBOL_REF
)
197 if (GET_CODE (op
) != CONST_INT
)
202 /* Ok if we're not using GOT entries at all. */
203 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
206 /* "Ok" while emitting rtl, since otherwise we won't be provided
207 with the entire offset during emission, which makes it very
208 hard to split the offset into high and low parts. */
209 if (rtx_equal_function_value_matters
)
212 /* Force the low 14 bits of the constant to zero so that we do not
213 use up so many GOT entries. */
214 return (INTVAL (op
) & 0x3fff) == 0;
226 /* Return 1 if OP refers to a symbol. */
229 symbolic_operand (op
, mode
)
231 enum machine_mode mode ATTRIBUTE_UNUSED
;
233 switch (GET_CODE (op
))
246 /* Return 1 if OP refers to a function. */
249 function_operand (op
, mode
)
251 enum machine_mode mode ATTRIBUTE_UNUSED
;
253 if (GET_CODE (op
) == SYMBOL_REF
&& SYMBOL_REF_FLAG (op
))
259 /* Return 1 if OP is setjmp or a similar function. */
261 /* ??? This is an unsatisfying solution. Should rethink. */
264 setjmp_operand (op
, mode
)
266 enum machine_mode mode ATTRIBUTE_UNUSED
;
271 if (GET_CODE (op
) != SYMBOL_REF
)
276 /* The following code is borrowed from special_function_p in calls.c. */
278 /* Disregard prefix _, __ or __x. */
281 if (name
[1] == '_' && name
[2] == 'x')
283 else if (name
[1] == '_')
293 && (! strcmp (name
, "setjmp")
294 || ! strcmp (name
, "setjmp_syscall")))
296 && ! strcmp (name
, "sigsetjmp"))
298 && ! strcmp (name
, "savectx")));
300 else if ((name
[0] == 'q' && name
[1] == 's'
301 && ! strcmp (name
, "qsetjmp"))
302 || (name
[0] == 'v' && name
[1] == 'f'
303 && ! strcmp (name
, "vfork")))
309 /* Return 1 if OP is a general operand, but when pic exclude symbolic
312 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
313 from PREDICATE_CODES. */
316 move_operand (op
, mode
)
318 enum machine_mode mode
;
320 if (! TARGET_NO_PIC
&& symbolic_operand (op
, mode
))
323 return general_operand (op
, mode
);
326 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
329 gr_register_operand (op
, mode
)
331 enum machine_mode mode
;
333 if (! register_operand (op
, mode
))
335 if (GET_CODE (op
) == SUBREG
)
336 op
= SUBREG_REG (op
);
337 if (GET_CODE (op
) == REG
)
339 unsigned int regno
= REGNO (op
);
340 if (regno
< FIRST_PSEUDO_REGISTER
)
341 return GENERAL_REGNO_P (regno
);
346 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
349 fr_register_operand (op
, mode
)
351 enum machine_mode mode
;
353 if (! register_operand (op
, mode
))
355 if (GET_CODE (op
) == SUBREG
)
356 op
= SUBREG_REG (op
);
357 if (GET_CODE (op
) == REG
)
359 unsigned int regno
= REGNO (op
);
360 if (regno
< FIRST_PSEUDO_REGISTER
)
361 return FR_REGNO_P (regno
);
366 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
369 grfr_register_operand (op
, mode
)
371 enum machine_mode mode
;
373 if (! register_operand (op
, mode
))
375 if (GET_CODE (op
) == SUBREG
)
376 op
= SUBREG_REG (op
);
377 if (GET_CODE (op
) == REG
)
379 unsigned int regno
= REGNO (op
);
380 if (regno
< FIRST_PSEUDO_REGISTER
)
381 return GENERAL_REGNO_P (regno
) || FR_REGNO_P (regno
);
386 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
389 gr_nonimmediate_operand (op
, mode
)
391 enum machine_mode mode
;
393 if (! nonimmediate_operand (op
, mode
))
395 if (GET_CODE (op
) == SUBREG
)
396 op
= SUBREG_REG (op
);
397 if (GET_CODE (op
) == REG
)
399 unsigned int regno
= REGNO (op
);
400 if (regno
< FIRST_PSEUDO_REGISTER
)
401 return GENERAL_REGNO_P (regno
);
406 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
409 fr_nonimmediate_operand (op
, mode
)
411 enum machine_mode mode
;
413 if (! nonimmediate_operand (op
, mode
))
415 if (GET_CODE (op
) == SUBREG
)
416 op
= SUBREG_REG (op
);
417 if (GET_CODE (op
) == REG
)
419 unsigned int regno
= REGNO (op
);
420 if (regno
< FIRST_PSEUDO_REGISTER
)
421 return FR_REGNO_P (regno
);
426 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
429 grfr_nonimmediate_operand (op
, mode
)
431 enum machine_mode mode
;
433 if (! nonimmediate_operand (op
, mode
))
435 if (GET_CODE (op
) == SUBREG
)
436 op
= SUBREG_REG (op
);
437 if (GET_CODE (op
) == REG
)
439 unsigned int regno
= REGNO (op
);
440 if (regno
< FIRST_PSEUDO_REGISTER
)
441 return GENERAL_REGNO_P (regno
) || FR_REGNO_P (regno
);
446 /* Return 1 if OP is a GR register operand, or zero. */
449 gr_reg_or_0_operand (op
, mode
)
451 enum machine_mode mode
;
453 return (op
== const0_rtx
|| gr_register_operand (op
, mode
));
456 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
459 gr_reg_or_5bit_operand (op
, mode
)
461 enum machine_mode mode
;
463 return ((GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 32)
464 || GET_CODE (op
) == CONSTANT_P_RTX
465 || gr_register_operand (op
, mode
));
468 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
471 gr_reg_or_6bit_operand (op
, mode
)
473 enum machine_mode mode
;
475 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_M (INTVAL (op
)))
476 || GET_CODE (op
) == CONSTANT_P_RTX
477 || gr_register_operand (op
, mode
));
480 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
483 gr_reg_or_8bit_operand (op
, mode
)
485 enum machine_mode mode
;
487 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
)))
488 || GET_CODE (op
) == CONSTANT_P_RTX
489 || gr_register_operand (op
, mode
));
492 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
495 grfr_reg_or_8bit_operand (op
, mode
)
497 enum machine_mode mode
;
499 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
)))
500 || GET_CODE (op
) == CONSTANT_P_RTX
501 || grfr_register_operand (op
, mode
));
504 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
508 gr_reg_or_8bit_adjusted_operand (op
, mode
)
510 enum machine_mode mode
;
512 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_L (INTVAL (op
)))
513 || GET_CODE (op
) == CONSTANT_P_RTX
514 || gr_register_operand (op
, mode
));
517 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
518 immediate and an 8 bit adjusted immediate operand. This is necessary
519 because when we emit a compare, we don't know what the condition will be,
520 so we need the union of the immediates accepted by GT and LT. */
523 gr_reg_or_8bit_and_adjusted_operand (op
, mode
)
525 enum machine_mode mode
;
527 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
))
528 && CONST_OK_FOR_L (INTVAL (op
)))
529 || GET_CODE (op
) == CONSTANT_P_RTX
530 || gr_register_operand (op
, mode
));
533 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
536 gr_reg_or_14bit_operand (op
, mode
)
538 enum machine_mode mode
;
540 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_I (INTVAL (op
)))
541 || GET_CODE (op
) == CONSTANT_P_RTX
542 || gr_register_operand (op
, mode
));
545 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
548 gr_reg_or_22bit_operand (op
, mode
)
550 enum machine_mode mode
;
552 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_J (INTVAL (op
)))
553 || GET_CODE (op
) == CONSTANT_P_RTX
554 || gr_register_operand (op
, mode
));
557 /* Return 1 if OP is a 6 bit immediate operand. */
560 shift_count_operand (op
, mode
)
562 enum machine_mode mode ATTRIBUTE_UNUSED
;
564 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_M (INTVAL (op
)))
565 || GET_CODE (op
) == CONSTANT_P_RTX
);
568 /* Return 1 if OP is a 5 bit immediate operand. */
571 shift_32bit_count_operand (op
, mode
)
573 enum machine_mode mode ATTRIBUTE_UNUSED
;
575 return ((GET_CODE (op
) == CONST_INT
576 && (INTVAL (op
) >= 0 && INTVAL (op
) < 32))
577 || GET_CODE (op
) == CONSTANT_P_RTX
);
580 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
583 shladd_operand (op
, mode
)
585 enum machine_mode mode ATTRIBUTE_UNUSED
;
587 return (GET_CODE (op
) == CONST_INT
588 && (INTVAL (op
) == 2 || INTVAL (op
) == 4
589 || INTVAL (op
) == 8 || INTVAL (op
) == 16));
592 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
595 fetchadd_operand (op
, mode
)
597 enum machine_mode mode ATTRIBUTE_UNUSED
;
599 return (GET_CODE (op
) == CONST_INT
600 && (INTVAL (op
) == -16 || INTVAL (op
) == -8 ||
601 INTVAL (op
) == -4 || INTVAL (op
) == -1 ||
602 INTVAL (op
) == 1 || INTVAL (op
) == 4 ||
603 INTVAL (op
) == 8 || INTVAL (op
) == 16));
606 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
609 fr_reg_or_fp01_operand (op
, mode
)
611 enum machine_mode mode
;
613 return ((GET_CODE (op
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (op
))
614 || fr_register_operand (op
, mode
));
617 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
618 POST_MODIFY with a REG as displacement. */
621 destination_operand (op
, mode
)
623 enum machine_mode mode
;
625 if (! nonimmediate_operand (op
, mode
))
627 if (GET_CODE (op
) == MEM
628 && GET_CODE (XEXP (op
, 0)) == POST_MODIFY
629 && GET_CODE (XEXP (XEXP (XEXP (op
, 0), 1), 1)) == REG
)
634 /* Like memory_operand, but don't allow post-increments. */
637 not_postinc_memory_operand (op
, mode
)
639 enum machine_mode mode
;
641 return (memory_operand (op
, mode
)
642 && GET_RTX_CLASS (GET_CODE (XEXP (op
, 0))) != 'a');
645 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
646 signed immediate operand. */
649 normal_comparison_operator (op
, mode
)
651 enum machine_mode mode
;
653 enum rtx_code code
= GET_CODE (op
);
654 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
655 && (code
== EQ
|| code
== NE
656 || code
== GT
|| code
== LE
|| code
== GTU
|| code
== LEU
));
659 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
660 signed immediate operand. */
663 adjusted_comparison_operator (op
, mode
)
665 enum machine_mode mode
;
667 enum rtx_code code
= GET_CODE (op
);
668 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
669 && (code
== LT
|| code
== GE
|| code
== LTU
|| code
== GEU
));
672 /* Return 1 if this is a signed inequality operator. */
675 signed_inequality_operator (op
, mode
)
677 enum machine_mode mode
;
679 enum rtx_code code
= GET_CODE (op
);
680 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
681 && (code
== GE
|| code
== GT
682 || code
== LE
|| code
== LT
));
685 /* Return 1 if this operator is valid for predication. */
688 predicate_operator (op
, mode
)
690 enum machine_mode mode
;
692 enum rtx_code code
= GET_CODE (op
);
693 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
694 && (code
== EQ
|| code
== NE
));
697 /* Return 1 if this is the ar.lc register. */
700 ar_lc_reg_operand (op
, mode
)
702 enum machine_mode mode
;
704 return (GET_MODE (op
) == DImode
705 && (mode
== DImode
|| mode
== VOIDmode
)
706 && GET_CODE (op
) == REG
707 && REGNO (op
) == AR_LC_REGNUM
);
710 /* Return 1 if this is the ar.ccv register. */
713 ar_ccv_reg_operand (op
, mode
)
715 enum machine_mode mode
;
717 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
718 && GET_CODE (op
) == REG
719 && REGNO (op
) == AR_CCV_REGNUM
);
722 /* Like general_operand, but don't allow (mem (addressof)). */
725 general_tfmode_operand (op
, mode
)
727 enum machine_mode mode
;
729 if (! general_operand (op
, mode
))
731 if (GET_CODE (op
) == MEM
&& GET_CODE (XEXP (op
, 0)) == ADDRESSOF
)
739 destination_tfmode_operand (op
, mode
)
741 enum machine_mode mode
;
743 if (! destination_operand (op
, mode
))
745 if (GET_CODE (op
) == MEM
&& GET_CODE (XEXP (op
, 0)) == ADDRESSOF
)
753 tfreg_or_fp01_operand (op
, mode
)
755 enum machine_mode mode
;
757 if (GET_CODE (op
) == SUBREG
)
759 return fr_reg_or_fp01_operand (op
, mode
);
762 /* Return 1 if the operands of a move are ok. */
765 ia64_move_ok (dst
, src
)
768 /* If we're under init_recog_no_volatile, we'll not be able to use
769 memory_operand. So check the code directly and don't worry about
770 the validity of the underlying address, which should have been
771 checked elsewhere anyway. */
772 if (GET_CODE (dst
) != MEM
)
774 if (GET_CODE (src
) == MEM
)
776 if (register_operand (src
, VOIDmode
))
779 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
780 if (INTEGRAL_MODE_P (GET_MODE (dst
)))
781 return src
== const0_rtx
;
783 return GET_CODE (src
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (src
);
786 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
787 Return the length of the field, or <= 0 on failure. */
790 ia64_depz_field_mask (rop
, rshift
)
793 unsigned HOST_WIDE_INT op
= INTVAL (rop
);
794 unsigned HOST_WIDE_INT shift
= INTVAL (rshift
);
796 /* Get rid of the zero bits we're shifting in. */
799 /* We must now have a solid block of 1's at bit 0. */
800 return exact_log2 (op
+ 1);
803 /* Expand a symbolic constant load. */
804 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
807 ia64_expand_load_address (dest
, src
, scratch
)
808 rtx dest
, src
, scratch
;
812 /* The destination could be a MEM during initial rtl generation,
813 which isn't a valid destination for the PIC load address patterns. */
814 if (! register_operand (dest
, DImode
))
815 temp
= gen_reg_rtx (DImode
);
820 emit_insn (gen_load_gprel64 (temp
, src
));
821 else if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_FLAG (src
))
822 emit_insn (gen_load_fptr (temp
, src
));
823 else if (sdata_symbolic_operand (src
, DImode
))
824 emit_insn (gen_load_gprel (temp
, src
));
825 else if (GET_CODE (src
) == CONST
826 && GET_CODE (XEXP (src
, 0)) == PLUS
827 && GET_CODE (XEXP (XEXP (src
, 0), 1)) == CONST_INT
828 && (INTVAL (XEXP (XEXP (src
, 0), 1)) & 0x1fff) != 0)
830 rtx subtarget
= no_new_pseudos
? temp
: gen_reg_rtx (DImode
);
831 rtx sym
= XEXP (XEXP (src
, 0), 0);
832 HOST_WIDE_INT ofs
, hi
, lo
;
834 /* Split the offset into a sign extended 14-bit low part
835 and a complementary high part. */
836 ofs
= INTVAL (XEXP (XEXP (src
, 0), 1));
837 lo
= ((ofs
& 0x3fff) ^ 0x2000) - 0x2000;
841 scratch
= no_new_pseudos
? subtarget
: gen_reg_rtx (DImode
);
843 emit_insn (gen_load_symptr (subtarget
, plus_constant (sym
, hi
),
845 emit_insn (gen_adddi3 (temp
, subtarget
, GEN_INT (lo
)));
851 scratch
= no_new_pseudos
? temp
: gen_reg_rtx (DImode
);
853 insn
= emit_insn (gen_load_symptr (temp
, src
, scratch
));
854 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_EQUAL
, src
, REG_NOTES (insn
));
858 emit_move_insn (dest
, temp
);
862 ia64_gp_save_reg (setjmp_p
)
865 rtx save
= cfun
->machine
->ia64_gp_save
;
869 /* We can't save GP in a pseudo if we are calling setjmp, because
870 pseudos won't be restored by longjmp. For now, we save it in r4. */
871 /* ??? It would be more efficient to save this directly into a stack
872 slot. Unfortunately, the stack slot address gets cse'd across
873 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
876 /* ??? Get the barf bag, Virginia. We've got to replace this thing
877 in place, since this rtx is used in exception handling receivers.
878 Moreover, we must get this rtx out of regno_reg_rtx or reload
879 will do the wrong thing. */
880 unsigned int old_regno
= REGNO (save
);
881 if (setjmp_p
&& old_regno
!= GR_REG (4))
883 REGNO (save
) = GR_REG (4);
884 regno_reg_rtx
[old_regno
] = gen_rtx_raw_REG (DImode
, old_regno
);
890 save
= gen_rtx_REG (DImode
, GR_REG (4));
892 save
= gen_rtx_REG (DImode
, LOC_REG (0));
894 save
= gen_reg_rtx (DImode
);
895 cfun
->machine
->ia64_gp_save
= save
;
901 /* Split a post-reload TImode reference into two DImode components. */
904 ia64_split_timode (out
, in
, scratch
)
908 switch (GET_CODE (in
))
911 out
[0] = gen_rtx_REG (DImode
, REGNO (in
));
912 out
[1] = gen_rtx_REG (DImode
, REGNO (in
) + 1);
917 rtx base
= XEXP (in
, 0);
919 switch (GET_CODE (base
))
922 out
[0] = change_address (in
, DImode
, NULL_RTX
);
925 base
= XEXP (base
, 0);
926 out
[0] = change_address (in
, DImode
, NULL_RTX
);
929 /* Since we're changing the mode, we need to change to POST_MODIFY
930 as well to preserve the size of the increment. Either that or
931 do the update in two steps, but we've already got this scratch
932 register handy so let's use it. */
934 base
= XEXP (base
, 0);
935 out
[0] = change_address (in
, DImode
,
936 gen_rtx_POST_MODIFY (Pmode
, base
,plus_constant (base
, 16)));
939 base
= XEXP (base
, 0);
940 out
[0] = change_address (in
, DImode
,
941 gen_rtx_POST_MODIFY (Pmode
, base
,plus_constant (base
, -16)));
947 if (scratch
== NULL_RTX
)
949 out
[1] = change_address (in
, DImode
, scratch
);
950 return gen_adddi3 (scratch
, base
, GEN_INT (8));
955 split_double (in
, &out
[0], &out
[1]);
963 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
964 through memory plus an extra GR scratch register. Except that you can
965 either get the first from SECONDARY_MEMORY_NEEDED or the second from
966 SECONDARY_RELOAD_CLASS, but not both.
968 We got into problems in the first place by allowing a construct like
969 (subreg:TF (reg:TI)), which we got from a union containing a long double.
970 This solution attempts to prevent this situation from ocurring. When
971 we see something like the above, we spill the inner register to memory. */
974 spill_tfmode_operand (in
, force
)
978 if (GET_CODE (in
) == SUBREG
979 && GET_MODE (SUBREG_REG (in
)) == TImode
980 && GET_CODE (SUBREG_REG (in
)) == REG
)
982 rtx mem
= gen_mem_addressof (SUBREG_REG (in
), NULL_TREE
);
983 return gen_rtx_MEM (TFmode
, copy_to_reg (XEXP (mem
, 0)));
985 else if (force
&& GET_CODE (in
) == REG
)
987 rtx mem
= gen_mem_addressof (in
, NULL_TREE
);
988 return gen_rtx_MEM (TFmode
, copy_to_reg (XEXP (mem
, 0)));
990 else if (GET_CODE (in
) == MEM
991 && GET_CODE (XEXP (in
, 0)) == ADDRESSOF
)
993 return change_address (in
, TFmode
, copy_to_reg (XEXP (in
, 0)));
999 /* Emit comparison instruction if necessary, returning the expression
1000 that holds the compare result in the proper mode. */
1003 ia64_expand_compare (code
, mode
)
1005 enum machine_mode mode
;
1007 rtx op0
= ia64_compare_op0
, op1
= ia64_compare_op1
;
1010 /* If we have a BImode input, then we already have a compare result, and
1011 do not need to emit another comparison. */
1012 if (GET_MODE (op0
) == BImode
)
1014 if ((code
== NE
|| code
== EQ
) && op1
== const0_rtx
)
1021 cmp
= gen_reg_rtx (BImode
);
1022 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1023 gen_rtx_fmt_ee (code
, BImode
, op0
, op1
)));
1027 return gen_rtx_fmt_ee (code
, mode
, cmp
, const0_rtx
);
1030 /* Emit the appropriate sequence for a call. */
1033 ia64_expand_call (retval
, addr
, nextarg
, sibcall_p
)
1039 rtx insn
, b0
, gp_save
, narg_rtx
;
1042 addr
= XEXP (addr
, 0);
1043 b0
= gen_rtx_REG (DImode
, R_BR (0));
1047 else if (IN_REGNO_P (REGNO (nextarg
)))
1048 narg
= REGNO (nextarg
) - IN_REG (0);
1050 narg
= REGNO (nextarg
) - OUT_REG (0);
1051 narg_rtx
= GEN_INT (narg
);
1053 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
1056 insn
= gen_sibcall_nopic (addr
, narg_rtx
, b0
);
1058 insn
= gen_call_nopic (addr
, narg_rtx
, b0
);
1060 insn
= gen_call_value_nopic (retval
, addr
, narg_rtx
, b0
);
1061 emit_call_insn (insn
);
1068 gp_save
= ia64_gp_save_reg (setjmp_operand (addr
, VOIDmode
));
1070 /* If this is an indirect call, then we have the address of a descriptor. */
1071 if (! symbolic_operand (addr
, VOIDmode
))
1076 emit_move_insn (gp_save
, pic_offset_table_rtx
);
1078 dest
= force_reg (DImode
, gen_rtx_MEM (DImode
, addr
));
1079 emit_move_insn (pic_offset_table_rtx
,
1080 gen_rtx_MEM (DImode
, plus_constant (addr
, 8)));
1083 insn
= gen_sibcall_pic (dest
, narg_rtx
, b0
);
1085 insn
= gen_call_pic (dest
, narg_rtx
, b0
);
1087 insn
= gen_call_value_pic (retval
, dest
, narg_rtx
, b0
);
1088 emit_call_insn (insn
);
1091 emit_move_insn (pic_offset_table_rtx
, gp_save
);
1093 else if (TARGET_CONST_GP
)
1096 insn
= gen_sibcall_nopic (addr
, narg_rtx
, b0
);
1098 insn
= gen_call_nopic (addr
, narg_rtx
, b0
);
1100 insn
= gen_call_value_nopic (retval
, addr
, narg_rtx
, b0
);
1101 emit_call_insn (insn
);
1106 emit_call_insn (gen_sibcall_pic (addr
, narg_rtx
, b0
));
1109 emit_move_insn (gp_save
, pic_offset_table_rtx
);
1112 insn
= gen_call_pic (addr
, narg_rtx
, b0
);
1114 insn
= gen_call_value_pic (retval
, addr
, narg_rtx
, b0
);
1115 emit_call_insn (insn
);
1117 emit_move_insn (pic_offset_table_rtx
, gp_save
);
1122 /* Begin the assembly file. */
1125 emit_safe_across_calls (f
)
1128 unsigned int rs
, re
;
1135 while (rs
< 64 && call_used_regs
[PR_REG (rs
)])
1139 for (re
= rs
+ 1; re
< 64 && ! call_used_regs
[PR_REG (re
)]; re
++)
1143 fputs ("\t.pred.safe_across_calls ", f
);
1149 fprintf (f
, "p%u", rs
);
1151 fprintf (f
, "p%u-p%u", rs
, re
- 1);
1159 /* Structure to be filled in by ia64_compute_frame_size with register
1160 save masks and offsets for the current function. */
1162 struct ia64_frame_info
1164 HOST_WIDE_INT total_size
; /* size of the stack frame, not including
1165 the caller's scratch area. */
1166 HOST_WIDE_INT spill_cfa_off
; /* top of the reg spill area from the cfa. */
1167 HOST_WIDE_INT spill_size
; /* size of the gr/br/fr spill area. */
1168 HOST_WIDE_INT extra_spill_size
; /* size of spill area for others. */
1169 HARD_REG_SET mask
; /* mask of saved registers. */
1170 unsigned int gr_used_mask
; /* mask of registers in use as gr spill
1171 registers or long-term scratches. */
1172 int n_spilled
; /* number of spilled registers. */
1173 int reg_fp
; /* register for fp. */
1174 int reg_save_b0
; /* save register for b0. */
1175 int reg_save_pr
; /* save register for prs. */
1176 int reg_save_ar_pfs
; /* save register for ar.pfs. */
1177 int reg_save_ar_unat
; /* save register for ar.unat. */
1178 int reg_save_ar_lc
; /* save register for ar.lc. */
1179 int n_input_regs
; /* number of input registers used. */
1180 int n_local_regs
; /* number of local registers used. */
1181 int n_output_regs
; /* number of output registers used. */
1182 int n_rotate_regs
; /* number of rotating registers used. */
1184 char need_regstk
; /* true if a .regstk directive needed. */
1185 char initialized
; /* true if the data is finalized. */
1188 /* Current frame information calculated by ia64_compute_frame_size. */
1189 static struct ia64_frame_info current_frame_info
;
1191 /* Helper function for ia64_compute_frame_size: find an appropriate general
1192 register to spill some special register to. SPECIAL_SPILL_MASK contains
1193 bits in GR0 to GR31 that have already been allocated by this routine.
1194 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1197 find_gr_spill (try_locals
)
1202 /* If this is a leaf function, first try an otherwise unused
1203 call-clobbered register. */
1204 if (current_function_is_leaf
)
1206 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1207 if (! regs_ever_live
[regno
]
1208 && call_used_regs
[regno
]
1209 && ! fixed_regs
[regno
]
1210 && ! global_regs
[regno
]
1211 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1213 current_frame_info
.gr_used_mask
|= 1 << regno
;
1220 regno
= current_frame_info
.n_local_regs
;
1221 /* If there is a frame pointer, then we can't use loc79, because
1222 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1223 reg_name switching code in ia64_expand_prologue. */
1224 if (regno
< (80 - frame_pointer_needed
))
1226 current_frame_info
.n_local_regs
= regno
+ 1;
1227 return LOC_REG (0) + regno
;
1231 /* Failed to find a general register to spill to. Must use stack. */
1235 /* In order to make for nice schedules, we try to allocate every temporary
1236 to a different register. We must of course stay away from call-saved,
1237 fixed, and global registers. We must also stay away from registers
1238 allocated in current_frame_info.gr_used_mask, since those include regs
1239 used all through the prologue.
1241 Any register allocated here must be used immediately. The idea is to
1242 aid scheduling, not to solve data flow problems. */
1244 static int last_scratch_gr_reg
;
1247 next_scratch_gr_reg ()
1251 for (i
= 0; i
< 32; ++i
)
1253 regno
= (last_scratch_gr_reg
+ i
+ 1) & 31;
1254 if (call_used_regs
[regno
]
1255 && ! fixed_regs
[regno
]
1256 && ! global_regs
[regno
]
1257 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1259 last_scratch_gr_reg
= regno
;
1264 /* There must be _something_ available. */
1268 /* Helper function for ia64_compute_frame_size, called through
1269 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1272 mark_reg_gr_used_mask (reg
, data
)
1274 void *data ATTRIBUTE_UNUSED
;
1276 unsigned int regno
= REGNO (reg
);
1278 current_frame_info
.gr_used_mask
|= 1 << regno
;
1281 /* Returns the number of bytes offset between the frame pointer and the stack
1282 pointer for the current function. SIZE is the number of bytes of space
1283 needed for local variables. */
1286 ia64_compute_frame_size (size
)
1289 HOST_WIDE_INT total_size
;
1290 HOST_WIDE_INT spill_size
= 0;
1291 HOST_WIDE_INT extra_spill_size
= 0;
1292 HOST_WIDE_INT pretend_args_size
;
1295 int spilled_gr_p
= 0;
1296 int spilled_fr_p
= 0;
1300 if (current_frame_info
.initialized
)
1303 memset (¤t_frame_info
, 0, sizeof current_frame_info
);
1304 CLEAR_HARD_REG_SET (mask
);
1306 /* Don't allocate scratches to the return register. */
1307 diddle_return_value (mark_reg_gr_used_mask
, NULL
);
1309 /* Don't allocate scratches to the EH scratch registers. */
1310 if (cfun
->machine
->ia64_eh_epilogue_sp
)
1311 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_sp
, NULL
);
1312 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
1313 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_bsp
, NULL
);
1315 /* Find the size of the register stack frame. We have only 80 local
1316 registers, because we reserve 8 for the inputs and 8 for the
1319 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1320 since we'll be adjusting that down later. */
1321 regno
= LOC_REG (78) + ! frame_pointer_needed
;
1322 for (; regno
>= LOC_REG (0); regno
--)
1323 if (regs_ever_live
[regno
])
1325 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
1327 /* For functions marked with the syscall_linkage attribute, we must mark
1328 all eight input registers as in use, so that locals aren't visible to
1331 if (cfun
->machine
->n_varargs
> 0
1332 || lookup_attribute ("syscall_linkage",
1333 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
1334 current_frame_info
.n_input_regs
= 8;
1337 for (regno
= IN_REG (7); regno
>= IN_REG (0); regno
--)
1338 if (regs_ever_live
[regno
])
1340 current_frame_info
.n_input_regs
= regno
- IN_REG (0) + 1;
1343 for (regno
= OUT_REG (7); regno
>= OUT_REG (0); regno
--)
1344 if (regs_ever_live
[regno
])
1346 i
= regno
- OUT_REG (0) + 1;
1348 /* When -p profiling, we need one output register for the mcount argument.
1349 Likwise for -a profiling for the bb_init_func argument. For -ax
1350 profiling, we need two output registers for the two bb_init_trace_func
1352 if (profile_flag
|| profile_block_flag
== 1)
1354 else if (profile_block_flag
== 2)
1356 current_frame_info
.n_output_regs
= i
;
1358 /* ??? No rotating register support yet. */
1359 current_frame_info
.n_rotate_regs
= 0;
1361 /* Discover which registers need spilling, and how much room that
1362 will take. Begin with floating point and general registers,
1363 which will always wind up on the stack. */
1365 for (regno
= FR_REG (2); regno
<= FR_REG (127); regno
++)
1366 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1368 SET_HARD_REG_BIT (mask
, regno
);
1374 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1375 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1377 SET_HARD_REG_BIT (mask
, regno
);
1383 for (regno
= BR_REG (1); regno
<= BR_REG (7); regno
++)
1384 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1386 SET_HARD_REG_BIT (mask
, regno
);
1391 /* Now come all special registers that might get saved in other
1392 general registers. */
1394 if (frame_pointer_needed
)
1396 current_frame_info
.reg_fp
= find_gr_spill (1);
1397 /* If we did not get a register, then we take LOC79. This is guaranteed
1398 to be free, even if regs_ever_live is already set, because this is
1399 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1400 as we don't count loc79 above. */
1401 if (current_frame_info
.reg_fp
== 0)
1403 current_frame_info
.reg_fp
= LOC_REG (79);
1404 current_frame_info
.n_local_regs
++;
1408 if (! current_function_is_leaf
)
1410 /* Emit a save of BR0 if we call other functions. Do this even
1411 if this function doesn't return, as EH depends on this to be
1412 able to unwind the stack. */
1413 SET_HARD_REG_BIT (mask
, BR_REG (0));
1415 current_frame_info
.reg_save_b0
= find_gr_spill (1);
1416 if (current_frame_info
.reg_save_b0
== 0)
1422 /* Similarly for ar.pfs. */
1423 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
1424 current_frame_info
.reg_save_ar_pfs
= find_gr_spill (1);
1425 if (current_frame_info
.reg_save_ar_pfs
== 0)
1427 extra_spill_size
+= 8;
1433 if (regs_ever_live
[BR_REG (0)] && ! call_used_regs
[BR_REG (0)])
1435 SET_HARD_REG_BIT (mask
, BR_REG (0));
1441 /* Unwind descriptor hackery: things are most efficient if we allocate
1442 consecutive GR save registers for RP, PFS, FP in that order. However,
1443 it is absolutely critical that FP get the only hard register that's
1444 guaranteed to be free, so we allocated it first. If all three did
1445 happen to be allocated hard regs, and are consecutive, rearrange them
1446 into the preferred order now. */
1447 if (current_frame_info
.reg_fp
!= 0
1448 && current_frame_info
.reg_save_b0
== current_frame_info
.reg_fp
+ 1
1449 && current_frame_info
.reg_save_ar_pfs
== current_frame_info
.reg_fp
+ 2)
1451 current_frame_info
.reg_save_b0
= current_frame_info
.reg_fp
;
1452 current_frame_info
.reg_save_ar_pfs
= current_frame_info
.reg_fp
+ 1;
1453 current_frame_info
.reg_fp
= current_frame_info
.reg_fp
+ 2;
1456 /* See if we need to store the predicate register block. */
1457 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
1458 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1460 if (regno
<= PR_REG (63))
1462 SET_HARD_REG_BIT (mask
, PR_REG (0));
1463 current_frame_info
.reg_save_pr
= find_gr_spill (1);
1464 if (current_frame_info
.reg_save_pr
== 0)
1466 extra_spill_size
+= 8;
1470 /* ??? Mark them all as used so that register renaming and such
1471 are free to use them. */
1472 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
1473 regs_ever_live
[regno
] = 1;
1476 /* If we're forced to use st8.spill, we're forced to save and restore
1478 if (spilled_gr_p
|| cfun
->machine
->n_varargs
)
1480 SET_HARD_REG_BIT (mask
, AR_UNAT_REGNUM
);
1481 current_frame_info
.reg_save_ar_unat
= find_gr_spill (spill_size
== 0);
1482 if (current_frame_info
.reg_save_ar_unat
== 0)
1484 extra_spill_size
+= 8;
1489 if (regs_ever_live
[AR_LC_REGNUM
])
1491 SET_HARD_REG_BIT (mask
, AR_LC_REGNUM
);
1492 current_frame_info
.reg_save_ar_lc
= find_gr_spill (spill_size
== 0);
1493 if (current_frame_info
.reg_save_ar_lc
== 0)
1495 extra_spill_size
+= 8;
1500 /* If we have an odd number of words of pretend arguments written to
1501 the stack, then the FR save area will be unaligned. We round the
1502 size of this area up to keep things 16 byte aligned. */
1504 pretend_args_size
= IA64_STACK_ALIGN (current_function_pretend_args_size
);
1506 pretend_args_size
= current_function_pretend_args_size
;
1508 total_size
= (spill_size
+ extra_spill_size
+ size
+ pretend_args_size
1509 + current_function_outgoing_args_size
);
1510 total_size
= IA64_STACK_ALIGN (total_size
);
1512 /* We always use the 16-byte scratch area provided by the caller, but
1513 if we are a leaf function, there's no one to which we need to provide
1515 if (current_function_is_leaf
)
1516 total_size
= MAX (0, total_size
- 16);
1518 current_frame_info
.total_size
= total_size
;
1519 current_frame_info
.spill_cfa_off
= pretend_args_size
- 16;
1520 current_frame_info
.spill_size
= spill_size
;
1521 current_frame_info
.extra_spill_size
= extra_spill_size
;
1522 COPY_HARD_REG_SET (current_frame_info
.mask
, mask
);
1523 current_frame_info
.n_spilled
= n_spilled
;
1524 current_frame_info
.initialized
= reload_completed
;
1527 /* Compute the initial difference between the specified pair of registers. */
1530 ia64_initial_elimination_offset (from
, to
)
1533 HOST_WIDE_INT offset
;
1535 ia64_compute_frame_size (get_frame_size ());
1538 case FRAME_POINTER_REGNUM
:
1539 if (to
== HARD_FRAME_POINTER_REGNUM
)
1541 if (current_function_is_leaf
)
1542 offset
= -current_frame_info
.total_size
;
1544 offset
= -(current_frame_info
.total_size
1545 - current_function_outgoing_args_size
- 16);
1547 else if (to
== STACK_POINTER_REGNUM
)
1549 if (current_function_is_leaf
)
1552 offset
= 16 + current_function_outgoing_args_size
;
1558 case ARG_POINTER_REGNUM
:
1559 /* Arguments start above the 16 byte save area, unless stdarg
1560 in which case we store through the 16 byte save area. */
1561 if (to
== HARD_FRAME_POINTER_REGNUM
)
1562 offset
= 16 - current_function_pretend_args_size
;
1563 else if (to
== STACK_POINTER_REGNUM
)
1564 offset
= (current_frame_info
.total_size
1565 + 16 - current_function_pretend_args_size
);
1570 case RETURN_ADDRESS_POINTER_REGNUM
:
1581 /* If there are more than a trivial number of register spills, we use
1582 two interleaved iterators so that we can get two memory references
1585 In order to simplify things in the prologue and epilogue expanders,
1586 we use helper functions to fix up the memory references after the
1587 fact with the appropriate offsets to a POST_MODIFY memory mode.
1588 The following data structure tracks the state of the two iterators
1589 while insns are being emitted. */
1591 struct spill_fill_data
1593 rtx init_after
; /* point at which to emit intializations */
1594 rtx init_reg
[2]; /* initial base register */
1595 rtx iter_reg
[2]; /* the iterator registers */
1596 rtx
*prev_addr
[2]; /* address of last memory use */
1597 HOST_WIDE_INT prev_off
[2]; /* last offset */
1598 int n_iter
; /* number of iterators in use */
1599 int next_iter
; /* next iterator to use */
1600 unsigned int save_gr_used_mask
;
1603 static struct spill_fill_data spill_fill_data
;
1606 setup_spill_pointers (n_spills
, init_reg
, cfa_off
)
1609 HOST_WIDE_INT cfa_off
;
1613 spill_fill_data
.init_after
= get_last_insn ();
1614 spill_fill_data
.init_reg
[0] = init_reg
;
1615 spill_fill_data
.init_reg
[1] = init_reg
;
1616 spill_fill_data
.prev_addr
[0] = NULL
;
1617 spill_fill_data
.prev_addr
[1] = NULL
;
1618 spill_fill_data
.prev_off
[0] = cfa_off
;
1619 spill_fill_data
.prev_off
[1] = cfa_off
;
1620 spill_fill_data
.next_iter
= 0;
1621 spill_fill_data
.save_gr_used_mask
= current_frame_info
.gr_used_mask
;
1623 spill_fill_data
.n_iter
= 1 + (n_spills
> 2);
1624 for (i
= 0; i
< spill_fill_data
.n_iter
; ++i
)
1626 int regno
= next_scratch_gr_reg ();
1627 spill_fill_data
.iter_reg
[i
] = gen_rtx_REG (DImode
, regno
);
1628 current_frame_info
.gr_used_mask
|= 1 << regno
;
1633 finish_spill_pointers ()
1635 current_frame_info
.gr_used_mask
= spill_fill_data
.save_gr_used_mask
;
1639 spill_restore_mem (reg
, cfa_off
)
1641 HOST_WIDE_INT cfa_off
;
1643 int iter
= spill_fill_data
.next_iter
;
1644 HOST_WIDE_INT disp
= spill_fill_data
.prev_off
[iter
] - cfa_off
;
1645 rtx disp_rtx
= GEN_INT (disp
);
1648 if (spill_fill_data
.prev_addr
[iter
])
1650 if (CONST_OK_FOR_N (disp
))
1651 *spill_fill_data
.prev_addr
[iter
]
1652 = gen_rtx_POST_MODIFY (DImode
, spill_fill_data
.iter_reg
[iter
],
1653 gen_rtx_PLUS (DImode
,
1654 spill_fill_data
.iter_reg
[iter
],
1658 /* ??? Could use register post_modify for loads. */
1659 if (! CONST_OK_FOR_I (disp
))
1661 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
1662 emit_move_insn (tmp
, disp_rtx
);
1665 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
1666 spill_fill_data
.iter_reg
[iter
], disp_rtx
));
1669 /* Micro-optimization: if we've created a frame pointer, it's at
1670 CFA 0, which may allow the real iterator to be initialized lower,
1671 slightly increasing parallelism. Also, if there are few saves
1672 it may eliminate the iterator entirely. */
1674 && spill_fill_data
.init_reg
[iter
] == stack_pointer_rtx
1675 && frame_pointer_needed
)
1677 mem
= gen_rtx_MEM (GET_MODE (reg
), hard_frame_pointer_rtx
);
1678 MEM_ALIAS_SET (mem
) = get_varargs_alias_set ();
1686 seq
= gen_movdi (spill_fill_data
.iter_reg
[iter
],
1687 spill_fill_data
.init_reg
[iter
]);
1692 if (! CONST_OK_FOR_I (disp
))
1694 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
1695 emit_move_insn (tmp
, disp_rtx
);
1699 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
1700 spill_fill_data
.init_reg
[iter
],
1703 seq
= gen_sequence ();
1707 /* Careful for being the first insn in a sequence. */
1708 if (spill_fill_data
.init_after
)
1709 spill_fill_data
.init_after
1710 = emit_insn_after (seq
, spill_fill_data
.init_after
);
1713 rtx first
= get_insns ();
1715 spill_fill_data
.init_after
1716 = emit_insn_before (seq
, first
);
1718 spill_fill_data
.init_after
= emit_insn (seq
);
1722 mem
= gen_rtx_MEM (GET_MODE (reg
), spill_fill_data
.iter_reg
[iter
]);
1724 /* ??? Not all of the spills are for varargs, but some of them are.
1725 The rest of the spills belong in an alias set of their own. But
1726 it doesn't actually hurt to include them here. */
1727 MEM_ALIAS_SET (mem
) = get_varargs_alias_set ();
1729 spill_fill_data
.prev_addr
[iter
] = &XEXP (mem
, 0);
1730 spill_fill_data
.prev_off
[iter
] = cfa_off
;
1732 if (++iter
>= spill_fill_data
.n_iter
)
1734 spill_fill_data
.next_iter
= iter
;
1740 do_spill (move_fn
, reg
, cfa_off
, frame_reg
)
1741 rtx (*move_fn
) PARAMS ((rtx
, rtx
, rtx
));
1743 HOST_WIDE_INT cfa_off
;
1747 mem
= spill_restore_mem (reg
, cfa_off
);
1748 insn
= emit_insn ((*move_fn
) (mem
, reg
, GEN_INT (cfa_off
)));
1755 RTX_FRAME_RELATED_P (insn
) = 1;
1757 /* Don't even pretend that the unwind code can intuit its way
1758 through a pair of interleaved post_modify iterators. Just
1759 provide the correct answer. */
1761 if (frame_pointer_needed
)
1763 base
= hard_frame_pointer_rtx
;
1768 base
= stack_pointer_rtx
;
1769 off
= current_frame_info
.total_size
- cfa_off
;
1773 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1774 gen_rtx_SET (VOIDmode
,
1775 gen_rtx_MEM (GET_MODE (reg
),
1776 plus_constant (base
, off
)),
1783 do_restore (move_fn
, reg
, cfa_off
)
1784 rtx (*move_fn
) PARAMS ((rtx
, rtx
, rtx
));
1786 HOST_WIDE_INT cfa_off
;
1788 emit_insn ((*move_fn
) (reg
, spill_restore_mem (reg
, cfa_off
),
1789 GEN_INT (cfa_off
)));
1792 /* Wrapper functions that discards the CONST_INT spill offset. These
1793 exist so that we can give gr_spill/gr_fill the offset they need and
1794 use a consistant function interface. */
1797 gen_movdi_x (dest
, src
, offset
)
1799 rtx offset ATTRIBUTE_UNUSED
;
1801 return gen_movdi (dest
, src
);
1805 gen_fr_spill_x (dest
, src
, offset
)
1807 rtx offset ATTRIBUTE_UNUSED
;
1809 return gen_fr_spill (dest
, src
);
1813 gen_fr_restore_x (dest
, src
, offset
)
1815 rtx offset ATTRIBUTE_UNUSED
;
1817 return gen_fr_restore (dest
, src
);
1820 /* Called after register allocation to add any instructions needed for the
1821 prologue. Using a prologue insn is favored compared to putting all of the
1822 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
1823 to intermix instructions with the saves of the caller saved registers. In
1824 some cases, it might be necessary to emit a barrier instruction as the last
1825 insn to prevent such scheduling.
1827 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
1828 so that the debug info generation code can handle them properly.
1830 The register save area is layed out like so:
1832 [ varargs spill area ]
1833 [ fr register spill area ]
1834 [ br register spill area ]
1835 [ ar register spill area ]
1836 [ pr register spill area ]
1837 [ gr register spill area ] */
1839 /* ??? Get inefficient code when the frame size is larger than can fit in an
1840 adds instruction. */
1843 ia64_expand_prologue ()
1845 rtx insn
, ar_pfs_save_reg
, ar_unat_save_reg
;
1846 int i
, epilogue_p
, regno
, alt_regno
, cfa_off
, n_varargs
;
1849 ia64_compute_frame_size (get_frame_size ());
1850 last_scratch_gr_reg
= 15;
1852 /* If there is no epilogue, then we don't need some prologue insns.
1853 We need to avoid emitting the dead prologue insns, because flow
1854 will complain about them. */
1859 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
1860 if ((e
->flags
& EDGE_FAKE
) == 0
1861 && (e
->flags
& EDGE_FALLTHRU
) != 0)
1863 epilogue_p
= (e
!= NULL
);
1868 /* Set the local, input, and output register names. We need to do this
1869 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1870 half. If we use in/loc/out register names, then we get assembler errors
1871 in crtn.S because there is no alloc insn or regstk directive in there. */
1872 if (! TARGET_REG_NAMES
)
1874 int inputs
= current_frame_info
.n_input_regs
;
1875 int locals
= current_frame_info
.n_local_regs
;
1876 int outputs
= current_frame_info
.n_output_regs
;
1878 for (i
= 0; i
< inputs
; i
++)
1879 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
1880 for (i
= 0; i
< locals
; i
++)
1881 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
1882 for (i
= 0; i
< outputs
; i
++)
1883 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
1886 /* Set the frame pointer register name. The regnum is logically loc79,
1887 but of course we'll not have allocated that many locals. Rather than
1888 worrying about renumbering the existing rtxs, we adjust the name. */
1889 /* ??? This code means that we can never use one local register when
1890 there is a frame pointer. loc79 gets wasted in this case, as it is
1891 renamed to a register that will never be used. See also the try_locals
1892 code in find_gr_spill. */
1893 if (current_frame_info
.reg_fp
)
1895 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
1896 reg_names
[HARD_FRAME_POINTER_REGNUM
]
1897 = reg_names
[current_frame_info
.reg_fp
];
1898 reg_names
[current_frame_info
.reg_fp
] = tmp
;
1901 /* Fix up the return address placeholder. */
1902 /* ??? We can fail if __builtin_return_address is used, and we didn't
1903 allocate a register in which to save b0. I can't think of a way to
1904 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
1905 then be sure that I got the right one. Further, reload doesn't seem
1906 to care if an eliminable register isn't used, and "eliminates" it
1908 if (regs_ever_live
[RETURN_ADDRESS_POINTER_REGNUM
]
1909 && current_frame_info
.reg_save_b0
!= 0)
1910 XINT (return_address_pointer_rtx
, 0) = current_frame_info
.reg_save_b0
;
1912 /* We don't need an alloc instruction if we've used no outputs or locals. */
1913 if (current_frame_info
.n_local_regs
== 0
1914 && current_frame_info
.n_output_regs
== 0
1915 && current_frame_info
.n_input_regs
<= current_function_args_info
.words
)
1917 /* If there is no alloc, but there are input registers used, then we
1918 need a .regstk directive. */
1919 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
1920 ar_pfs_save_reg
= NULL_RTX
;
1924 current_frame_info
.need_regstk
= 0;
1926 if (current_frame_info
.reg_save_ar_pfs
)
1927 regno
= current_frame_info
.reg_save_ar_pfs
;
1929 regno
= next_scratch_gr_reg ();
1930 ar_pfs_save_reg
= gen_rtx_REG (DImode
, regno
);
1932 insn
= emit_insn (gen_alloc (ar_pfs_save_reg
,
1933 GEN_INT (current_frame_info
.n_input_regs
),
1934 GEN_INT (current_frame_info
.n_local_regs
),
1935 GEN_INT (current_frame_info
.n_output_regs
),
1936 GEN_INT (current_frame_info
.n_rotate_regs
)));
1937 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_pfs
!= 0);
1940 /* Set up frame pointer, stack pointer, and spill iterators. */
1942 n_varargs
= cfun
->machine
->n_varargs
;
1943 setup_spill_pointers (current_frame_info
.n_spilled
+ n_varargs
,
1944 stack_pointer_rtx
, 0);
1946 if (frame_pointer_needed
)
1948 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
1949 RTX_FRAME_RELATED_P (insn
) = 1;
1952 if (current_frame_info
.total_size
!= 0)
1954 rtx frame_size_rtx
= GEN_INT (- current_frame_info
.total_size
);
1957 if (CONST_OK_FOR_I (- current_frame_info
.total_size
))
1958 offset
= frame_size_rtx
;
1961 regno
= next_scratch_gr_reg ();
1962 offset
= gen_rtx_REG (DImode
, regno
);
1963 emit_move_insn (offset
, frame_size_rtx
);
1966 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
,
1967 stack_pointer_rtx
, offset
));
1969 if (! frame_pointer_needed
)
1971 RTX_FRAME_RELATED_P (insn
) = 1;
1972 if (GET_CODE (offset
) != CONST_INT
)
1975 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1976 gen_rtx_SET (VOIDmode
,
1978 gen_rtx_PLUS (DImode
,
1985 /* ??? At this point we must generate a magic insn that appears to
1986 modify the stack pointer, the frame pointer, and all spill
1987 iterators. This would allow the most scheduling freedom. For
1988 now, just hard stop. */
1989 emit_insn (gen_blockage ());
1992 /* Must copy out ar.unat before doing any integer spills. */
1993 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
1995 if (current_frame_info
.reg_save_ar_unat
)
1997 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2000 alt_regno
= next_scratch_gr_reg ();
2001 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2002 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2005 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2006 insn
= emit_move_insn (ar_unat_save_reg
, reg
);
2007 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_unat
!= 0);
2009 /* Even if we're not going to generate an epilogue, we still
2010 need to save the register so that EH works. */
2011 if (! epilogue_p
&& current_frame_info
.reg_save_ar_unat
)
2012 emit_insn (gen_rtx_USE (VOIDmode
, ar_unat_save_reg
));
2015 ar_unat_save_reg
= NULL_RTX
;
2017 /* Spill all varargs registers. Do this before spilling any GR registers,
2018 since we want the UNAT bits for the GR registers to override the UNAT
2019 bits from varargs, which we don't care about. */
2022 for (regno
= GR_ARG_FIRST
+ 7; n_varargs
> 0; --n_varargs
, --regno
)
2024 reg
= gen_rtx_REG (DImode
, regno
);
2025 do_spill (gen_gr_spill
, reg
, cfa_off
+= 8, NULL_RTX
);
2028 /* Locate the bottom of the register save area. */
2029 cfa_off
= (current_frame_info
.spill_cfa_off
2030 + current_frame_info
.spill_size
2031 + current_frame_info
.extra_spill_size
);
2033 /* Save the predicate register block either in a register or in memory. */
2034 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2036 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2037 if (current_frame_info
.reg_save_pr
!= 0)
2039 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2040 insn
= emit_move_insn (alt_reg
, reg
);
2042 /* ??? Denote pr spill/fill by a DImode move that modifies all
2043 64 hard registers. */
2044 RTX_FRAME_RELATED_P (insn
) = 1;
2046 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2047 gen_rtx_SET (VOIDmode
, alt_reg
, reg
),
2050 /* Even if we're not going to generate an epilogue, we still
2051 need to save the register so that EH works. */
2053 emit_insn (gen_rtx_USE (VOIDmode
, alt_reg
));
2057 alt_regno
= next_scratch_gr_reg ();
2058 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2059 insn
= emit_move_insn (alt_reg
, reg
);
2060 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2065 /* Handle AR regs in numerical order. All of them get special handling. */
2066 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
2067 && current_frame_info
.reg_save_ar_unat
== 0)
2069 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2070 do_spill (gen_movdi_x
, ar_unat_save_reg
, cfa_off
, reg
);
2074 /* The alloc insn already copied ar.pfs into a general register. The
2075 only thing we have to do now is copy that register to a stack slot
2076 if we'd not allocated a local register for the job. */
2077 if (current_frame_info
.reg_save_ar_pfs
== 0
2078 && ! current_function_is_leaf
)
2080 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2081 do_spill (gen_movdi_x
, ar_pfs_save_reg
, cfa_off
, reg
);
2085 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2087 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2088 if (current_frame_info
.reg_save_ar_lc
!= 0)
2090 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2091 insn
= emit_move_insn (alt_reg
, reg
);
2092 RTX_FRAME_RELATED_P (insn
) = 1;
2094 /* Even if we're not going to generate an epilogue, we still
2095 need to save the register so that EH works. */
2097 emit_insn (gen_rtx_USE (VOIDmode
, alt_reg
));
2101 alt_regno
= next_scratch_gr_reg ();
2102 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2103 emit_move_insn (alt_reg
, reg
);
2104 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2109 /* We should now be at the base of the gr/br/fr spill area. */
2110 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2111 + current_frame_info
.spill_size
))
2114 /* Spill all general registers. */
2115 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2116 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2118 reg
= gen_rtx_REG (DImode
, regno
);
2119 do_spill (gen_gr_spill
, reg
, cfa_off
, reg
);
2123 /* Handle BR0 specially -- it may be getting stored permanently in
2124 some GR register. */
2125 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2127 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2128 if (current_frame_info
.reg_save_b0
!= 0)
2130 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2131 insn
= emit_move_insn (alt_reg
, reg
);
2132 RTX_FRAME_RELATED_P (insn
) = 1;
2134 /* Even if we're not going to generate an epilogue, we still
2135 need to save the register so that EH works. */
2137 emit_insn (gen_rtx_USE (VOIDmode
, alt_reg
));
2141 alt_regno
= next_scratch_gr_reg ();
2142 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2143 emit_move_insn (alt_reg
, reg
);
2144 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2149 /* Spill the rest of the BR registers. */
2150 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2151 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2153 alt_regno
= next_scratch_gr_reg ();
2154 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2155 reg
= gen_rtx_REG (DImode
, regno
);
2156 emit_move_insn (alt_reg
, reg
);
2157 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2161 /* Align the frame and spill all FR registers. */
2162 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2163 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2167 reg
= gen_rtx_REG (TFmode
, regno
);
2168 do_spill (gen_fr_spill_x
, reg
, cfa_off
, reg
);
2172 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2175 finish_spill_pointers ();
2178 /* Called after register allocation to add any instructions needed for the
2179 epilogue. Using a epilogue insn is favored compared to putting all of the
2180 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
2181 to intermix instructions with the saves of the caller saved registers. In
2182 some cases, it might be necessary to emit a barrier instruction as the last
2183 insn to prevent such scheduling. */
2186 ia64_expand_epilogue (sibcall_p
)
2189 rtx insn
, reg
, alt_reg
, ar_unat_save_reg
;
2190 int regno
, alt_regno
, cfa_off
;
2192 ia64_compute_frame_size (get_frame_size ());
2194 /* If there is a frame pointer, then we use it instead of the stack
2195 pointer, so that the stack pointer does not need to be valid when
2196 the epilogue starts. See EXIT_IGNORE_STACK. */
2197 if (frame_pointer_needed
)
2198 setup_spill_pointers (current_frame_info
.n_spilled
,
2199 hard_frame_pointer_rtx
, 0);
2201 setup_spill_pointers (current_frame_info
.n_spilled
, stack_pointer_rtx
,
2202 current_frame_info
.total_size
);
2204 if (current_frame_info
.total_size
!= 0)
2206 /* ??? At this point we must generate a magic insn that appears to
2207 modify the spill iterators and the frame pointer. This would
2208 allow the most scheduling freedom. For now, just hard stop. */
2209 emit_insn (gen_blockage ());
2212 /* Locate the bottom of the register save area. */
2213 cfa_off
= (current_frame_info
.spill_cfa_off
2214 + current_frame_info
.spill_size
2215 + current_frame_info
.extra_spill_size
);
2217 /* Restore the predicate registers. */
2218 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2220 if (current_frame_info
.reg_save_pr
!= 0)
2221 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2224 alt_regno
= next_scratch_gr_reg ();
2225 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2226 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2229 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2230 emit_move_insn (reg
, alt_reg
);
2233 /* Restore the application registers. */
2235 /* Load the saved unat from the stack, but do not restore it until
2236 after the GRs have been restored. */
2237 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2239 if (current_frame_info
.reg_save_ar_unat
!= 0)
2241 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2244 alt_regno
= next_scratch_gr_reg ();
2245 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2246 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2247 do_restore (gen_movdi_x
, ar_unat_save_reg
, cfa_off
);
2252 ar_unat_save_reg
= NULL_RTX
;
2254 if (current_frame_info
.reg_save_ar_pfs
!= 0)
2256 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_pfs
);
2257 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2258 emit_move_insn (reg
, alt_reg
);
2260 else if (! current_function_is_leaf
)
2262 alt_regno
= next_scratch_gr_reg ();
2263 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2264 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2266 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2267 emit_move_insn (reg
, alt_reg
);
2270 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2272 if (current_frame_info
.reg_save_ar_lc
!= 0)
2273 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2276 alt_regno
= next_scratch_gr_reg ();
2277 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2278 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2281 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2282 emit_move_insn (reg
, alt_reg
);
2285 /* We should now be at the base of the gr/br/fr spill area. */
2286 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2287 + current_frame_info
.spill_size
))
2290 /* Restore all general registers. */
2291 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2292 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2294 reg
= gen_rtx_REG (DImode
, regno
);
2295 do_restore (gen_gr_restore
, reg
, cfa_off
);
2299 /* Restore the branch registers. Handle B0 specially, as it may
2300 have gotten stored in some GR register. */
2301 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2303 if (current_frame_info
.reg_save_b0
!= 0)
2304 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2307 alt_regno
= next_scratch_gr_reg ();
2308 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2309 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2312 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2313 emit_move_insn (reg
, alt_reg
);
2316 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2317 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2319 alt_regno
= next_scratch_gr_reg ();
2320 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2321 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2323 reg
= gen_rtx_REG (DImode
, regno
);
2324 emit_move_insn (reg
, alt_reg
);
2327 /* Restore floating point registers. */
2328 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2329 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2333 reg
= gen_rtx_REG (TFmode
, regno
);
2334 do_restore (gen_fr_restore_x
, reg
, cfa_off
);
2338 /* Restore ar.unat for real. */
2339 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2341 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2342 emit_move_insn (reg
, ar_unat_save_reg
);
2345 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2348 finish_spill_pointers ();
2350 if (current_frame_info
.total_size
|| cfun
->machine
->ia64_eh_epilogue_sp
)
2352 /* ??? At this point we must generate a magic insn that appears to
2353 modify the spill iterators, the stack pointer, and the frame
2354 pointer. This would allow the most scheduling freedom. For now,
2356 emit_insn (gen_blockage ());
2359 if (cfun
->machine
->ia64_eh_epilogue_sp
)
2360 emit_move_insn (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
);
2361 else if (frame_pointer_needed
)
2363 insn
= emit_move_insn (stack_pointer_rtx
, hard_frame_pointer_rtx
);
2364 RTX_FRAME_RELATED_P (insn
) = 1;
2366 else if (current_frame_info
.total_size
)
2368 rtx offset
, frame_size_rtx
;
2370 frame_size_rtx
= GEN_INT (current_frame_info
.total_size
);
2371 if (CONST_OK_FOR_I (current_frame_info
.total_size
))
2372 offset
= frame_size_rtx
;
2375 regno
= next_scratch_gr_reg ();
2376 offset
= gen_rtx_REG (DImode
, regno
);
2377 emit_move_insn (offset
, frame_size_rtx
);
2380 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
2383 RTX_FRAME_RELATED_P (insn
) = 1;
2384 if (GET_CODE (offset
) != CONST_INT
)
2387 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2388 gen_rtx_SET (VOIDmode
,
2390 gen_rtx_PLUS (DImode
,
2397 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
2398 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
2401 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
2403 /* We must emit an alloc to force the input registers to become output
2404 registers. Otherwise, if the callee tries to pass its parameters
2405 through to another call without an intervening alloc, then these
2407 /* ??? We don't need to preserve all input registers. We only need to
2408 preserve those input registers used as arguments to the sibling call.
2409 It is unclear how to compute that number here. */
2410 emit_insn (gen_alloc (gen_rtx_REG (DImode
, GR_REG (2)),
2411 GEN_INT (0), GEN_INT (0),
2412 GEN_INT (current_frame_info
.n_input_regs
),
2416 /* Return 1 if br.ret can do all the work required to return from a
2420 ia64_direct_return ()
2422 if (reload_completed
&& ! frame_pointer_needed
)
2424 ia64_compute_frame_size (get_frame_size ());
2426 return (current_frame_info
.total_size
== 0
2427 && current_frame_info
.n_spilled
== 0
2428 && current_frame_info
.reg_save_b0
== 0
2429 && current_frame_info
.reg_save_pr
== 0
2430 && current_frame_info
.reg_save_ar_pfs
== 0
2431 && current_frame_info
.reg_save_ar_unat
== 0
2432 && current_frame_info
.reg_save_ar_lc
== 0);
2438 ia64_hard_regno_rename_ok (from
, to
)
2442 /* Don't clobber any of the registers we reserved for the prologue. */
2443 if (to
== current_frame_info
.reg_fp
2444 || to
== current_frame_info
.reg_save_b0
2445 || to
== current_frame_info
.reg_save_pr
2446 || to
== current_frame_info
.reg_save_ar_pfs
2447 || to
== current_frame_info
.reg_save_ar_unat
2448 || to
== current_frame_info
.reg_save_ar_lc
)
2451 if (from
== current_frame_info
.reg_fp
2452 || from
== current_frame_info
.reg_save_b0
2453 || from
== current_frame_info
.reg_save_pr
2454 || from
== current_frame_info
.reg_save_ar_pfs
2455 || from
== current_frame_info
.reg_save_ar_unat
2456 || from
== current_frame_info
.reg_save_ar_lc
)
2459 /* Don't use output registers outside the register frame. */
2460 if (OUT_REGNO_P (to
) && to
>= OUT_REG (current_frame_info
.n_output_regs
))
2463 /* Retain even/oddness on predicate register pairs. */
2464 if (PR_REGNO_P (from
) && PR_REGNO_P (to
))
2465 return (from
& 1) == (to
& 1);
2467 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2468 if (from
== GR_REG (4) && current_function_calls_setjmp
)
2474 /* Emit the function prologue. */
2477 ia64_function_prologue (file
, size
)
2479 int size ATTRIBUTE_UNUSED
;
2481 int mask
, grsave
, grsave_prev
;
2483 if (current_frame_info
.need_regstk
)
2484 fprintf (file
, "\t.regstk %d, %d, %d, %d\n",
2485 current_frame_info
.n_input_regs
,
2486 current_frame_info
.n_local_regs
,
2487 current_frame_info
.n_output_regs
,
2488 current_frame_info
.n_rotate_regs
);
2490 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
2493 /* Emit the .prologue directive. */
2496 grsave
= grsave_prev
= 0;
2497 if (current_frame_info
.reg_save_b0
!= 0)
2500 grsave
= grsave_prev
= current_frame_info
.reg_save_b0
;
2502 if (current_frame_info
.reg_save_ar_pfs
!= 0
2503 && (grsave_prev
== 0
2504 || current_frame_info
.reg_save_ar_pfs
== grsave_prev
+ 1))
2507 if (grsave_prev
== 0)
2508 grsave
= current_frame_info
.reg_save_ar_pfs
;
2509 grsave_prev
= current_frame_info
.reg_save_ar_pfs
;
2511 if (current_frame_info
.reg_fp
!= 0
2512 && (grsave_prev
== 0
2513 || current_frame_info
.reg_fp
== grsave_prev
+ 1))
2516 if (grsave_prev
== 0)
2517 grsave
= HARD_FRAME_POINTER_REGNUM
;
2518 grsave_prev
= current_frame_info
.reg_fp
;
2520 if (current_frame_info
.reg_save_pr
!= 0
2521 && (grsave_prev
== 0
2522 || current_frame_info
.reg_save_pr
== grsave_prev
+ 1))
2525 if (grsave_prev
== 0)
2526 grsave
= current_frame_info
.reg_save_pr
;
2530 fprintf (file
, "\t.prologue %d, %d\n", mask
,
2531 ia64_dbx_register_number (grsave
));
2533 fputs ("\t.prologue\n", file
);
2535 /* Emit a .spill directive, if necessary, to relocate the base of
2536 the register spill area. */
2537 if (current_frame_info
.spill_cfa_off
!= -16)
2538 fprintf (file
, "\t.spill %ld\n",
2539 (long) (current_frame_info
.spill_cfa_off
2540 + current_frame_info
.spill_size
));
2543 /* Emit the .body directive at the scheduled end of the prologue. */
2546 ia64_output_end_prologue (file
)
2549 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
2552 fputs ("\t.body\n", file
);
2555 /* Emit the function epilogue. */
2558 ia64_function_epilogue (file
, size
)
2559 FILE *file ATTRIBUTE_UNUSED
;
2560 int size ATTRIBUTE_UNUSED
;
2564 /* Reset from the function's potential modifications. */
2565 XINT (return_address_pointer_rtx
, 0) = RETURN_ADDRESS_POINTER_REGNUM
;
2567 if (current_frame_info
.reg_fp
)
2569 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
2570 reg_names
[HARD_FRAME_POINTER_REGNUM
]
2571 = reg_names
[current_frame_info
.reg_fp
];
2572 reg_names
[current_frame_info
.reg_fp
] = tmp
;
2574 if (! TARGET_REG_NAMES
)
2576 for (i
= 0; i
< current_frame_info
.n_input_regs
; i
++)
2577 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
2578 for (i
= 0; i
< current_frame_info
.n_local_regs
; i
++)
2579 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
2580 for (i
= 0; i
< current_frame_info
.n_output_regs
; i
++)
2581 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
2584 current_frame_info
.initialized
= 0;
2588 ia64_dbx_register_number (regno
)
2591 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2592 from its home at loc79 to something inside the register frame. We
2593 must perform the same renumbering here for the debug info. */
2594 if (current_frame_info
.reg_fp
)
2596 if (regno
== HARD_FRAME_POINTER_REGNUM
)
2597 regno
= current_frame_info
.reg_fp
;
2598 else if (regno
== current_frame_info
.reg_fp
)
2599 regno
= HARD_FRAME_POINTER_REGNUM
;
2602 if (IN_REGNO_P (regno
))
2603 return 32 + regno
- IN_REG (0);
2604 else if (LOC_REGNO_P (regno
))
2605 return 32 + current_frame_info
.n_input_regs
+ regno
- LOC_REG (0);
2606 else if (OUT_REGNO_P (regno
))
2607 return (32 + current_frame_info
.n_input_regs
2608 + current_frame_info
.n_local_regs
+ regno
- OUT_REG (0));
2614 ia64_initialize_trampoline (addr
, fnaddr
, static_chain
)
2615 rtx addr
, fnaddr
, static_chain
;
2617 rtx addr_reg
, eight
= GEN_INT (8);
2619 /* Load up our iterator. */
2620 addr_reg
= gen_reg_rtx (Pmode
);
2621 emit_move_insn (addr_reg
, addr
);
2623 /* The first two words are the fake descriptor:
2624 __ia64_trampoline, ADDR+16. */
2625 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
2626 gen_rtx_SYMBOL_REF (Pmode
, "__ia64_trampoline"));
2627 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2629 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
2630 copy_to_reg (plus_constant (addr
, 16)));
2631 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2633 /* The third word is the target descriptor. */
2634 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), fnaddr
);
2635 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2637 /* The fourth word is the static chain. */
2638 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), static_chain
);
2641 /* Do any needed setup for a variadic function. CUM has not been updated
2642 for the last named argument which has type TYPE and mode MODE.
2644 We generate the actual spill instructions during prologue generation. */
2647 ia64_setup_incoming_varargs (cum
, int_mode
, type
, pretend_size
, second_time
)
2648 CUMULATIVE_ARGS cum
;
2652 int second_time ATTRIBUTE_UNUSED
;
2654 /* If this is a stdarg function, then skip the current argument. */
2655 if (! current_function_varargs
)
2656 ia64_function_arg_advance (&cum
, int_mode
, type
, 1);
2658 if (cum
.words
< MAX_ARGUMENT_SLOTS
)
2660 int n
= MAX_ARGUMENT_SLOTS
- cum
.words
;
2661 *pretend_size
= n
* UNITS_PER_WORD
;
2662 cfun
->machine
->n_varargs
= n
;
2666 /* Check whether TYPE is a homogeneous floating point aggregate. If
2667 it is, return the mode of the floating point type that appears
2668 in all leafs. If it is not, return VOIDmode.
2670 An aggregate is a homogeneous floating point aggregate is if all
2671 fields/elements in it have the same floating point type (e.g,
2672 SFmode). 128-bit quad-precision floats are excluded. */
2674 static enum machine_mode
2675 hfa_element_mode (type
, nested
)
2679 enum machine_mode element_mode
= VOIDmode
;
2680 enum machine_mode mode
;
2681 enum tree_code code
= TREE_CODE (type
);
2682 int know_element_mode
= 0;
2687 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
2688 case BOOLEAN_TYPE
: case CHAR_TYPE
: case POINTER_TYPE
:
2689 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
2690 case FILE_TYPE
: case SET_TYPE
: case LANG_TYPE
:
2694 /* Fortran complex types are supposed to be HFAs, so we need to handle
2695 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2698 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
)
2699 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type
))
2700 * BITS_PER_UNIT
, MODE_FLOAT
, 0);
2705 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2706 mode if this is contained within an aggregate. */
2708 return TYPE_MODE (type
);
2713 return TYPE_MODE (TREE_TYPE (type
));
2717 case QUAL_UNION_TYPE
:
2718 for (t
= TYPE_FIELDS (type
); t
; t
= TREE_CHAIN (t
))
2720 if (TREE_CODE (t
) != FIELD_DECL
)
2723 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
2724 if (know_element_mode
)
2726 if (mode
!= element_mode
)
2729 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
2733 know_element_mode
= 1;
2734 element_mode
= mode
;
2737 return element_mode
;
2740 /* If we reach here, we probably have some front-end specific type
2741 that the backend doesn't know about. This can happen via the
2742 aggregate_value_p call in init_function_start. All we can do is
2743 ignore unknown tree types. */
2750 /* Return rtx for register where argument is passed, or zero if it is passed
2753 /* ??? 128-bit quad-precision floats are always passed in general
2757 ia64_function_arg (cum
, mode
, type
, named
, incoming
)
2758 CUMULATIVE_ARGS
*cum
;
2759 enum machine_mode mode
;
2764 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
2765 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
2766 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
2769 enum machine_mode hfa_mode
= VOIDmode
;
2771 /* Integer and float arguments larger than 8 bytes start at the next even
2772 boundary. Aggregates larger than 8 bytes start at the next even boundary
2773 if the aggregate has 16 byte alignment. Net effect is that types with
2774 alignment greater than 8 start at the next even boundary. */
2775 /* ??? The ABI does not specify how to handle aggregates with alignment from
2776 9 to 15 bytes, or greater than 16. We handle them all as if they had
2777 16 byte alignment. Such aggregates can occur only if gcc extensions are
2779 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
2781 && (cum
->words
& 1))
2784 /* If all argument slots are used, then it must go on the stack. */
2785 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
2788 /* Check for and handle homogeneous FP aggregates. */
2790 hfa_mode
= hfa_element_mode (type
, 0);
2792 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2793 and unprototyped hfas are passed specially. */
2794 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
2798 int fp_regs
= cum
->fp_regs
;
2799 int int_regs
= cum
->words
+ offset
;
2800 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
2804 /* If prototyped, pass it in FR regs then GR regs.
2805 If not prototyped, pass it in both FR and GR regs.
2807 If this is an SFmode aggregate, then it is possible to run out of
2808 FR regs while GR regs are still left. In that case, we pass the
2809 remaining part in the GR regs. */
2811 /* Fill the FP regs. We do this always. We stop if we reach the end
2812 of the argument, the last FP register, or the last argument slot. */
2814 byte_size
= ((mode
== BLKmode
)
2815 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
2816 args_byte_size
= int_regs
* UNITS_PER_WORD
;
2818 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
2819 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
2821 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
2822 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
2826 args_byte_size
+= hfa_size
;
2830 /* If no prototype, then the whole thing must go in GR regs. */
2831 if (! cum
->prototype
)
2833 /* If this is an SFmode aggregate, then we might have some left over
2834 that needs to go in GR regs. */
2835 else if (byte_size
!= offset
)
2836 int_regs
+= offset
/ UNITS_PER_WORD
;
2838 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
2840 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
2842 enum machine_mode gr_mode
= DImode
;
2844 /* If we have an odd 4 byte hunk because we ran out of FR regs,
2845 then this goes in a GR reg left adjusted/little endian, right
2846 adjusted/big endian. */
2847 /* ??? Currently this is handled wrong, because 4-byte hunks are
2848 always right adjusted/little endian. */
2851 /* If we have an even 4 byte hunk because the aggregate is a
2852 multiple of 4 bytes in size, then this goes in a GR reg right
2853 adjusted/little endian. */
2854 else if (byte_size
- offset
== 4)
2857 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
2858 gen_rtx_REG (gr_mode
, (basereg
2861 offset
+= GET_MODE_SIZE (gr_mode
);
2865 /* If we ended up using just one location, just return that one loc. */
2867 return XEXP (loc
[0], 0);
2869 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
2872 /* Integral and aggregates go in general registers. If we have run out of
2873 FR registers, then FP values must also go in general registers. This can
2874 happen when we have a SFmode HFA. */
2875 else if (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
)
2876 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
2878 /* If there is a prototype, then FP values go in a FR register when
2879 named, and in a GR registeer when unnamed. */
2880 else if (cum
->prototype
)
2883 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
2885 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
2887 /* If there is no prototype, then FP values go in both FR and GR
2891 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
2892 gen_rtx_REG (mode
, (FR_ARG_FIRST
2895 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
2897 (basereg
+ cum
->words
2901 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, fp_reg
, gr_reg
));
2905 /* Return number of words, at the beginning of the argument, that must be
2906 put in registers. 0 is the argument is entirely in registers or entirely
2910 ia64_function_arg_partial_nregs (cum
, mode
, type
, named
)
2911 CUMULATIVE_ARGS
*cum
;
2912 enum machine_mode mode
;
2914 int named ATTRIBUTE_UNUSED
;
2916 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
2917 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
2921 /* Arguments with alignment larger than 8 bytes start at the next even
2923 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
2925 && (cum
->words
& 1))
2928 /* If all argument slots are used, then it must go on the stack. */
2929 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
2932 /* It doesn't matter whether the argument goes in FR or GR regs. If
2933 it fits within the 8 argument slots, then it goes entirely in
2934 registers. If it extends past the last argument slot, then the rest
2935 goes on the stack. */
2937 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
2940 return MAX_ARGUMENT_SLOTS
- cum
->words
- offset
;
2943 /* Update CUM to point after this argument. This is patterned after
2944 ia64_function_arg. */
2947 ia64_function_arg_advance (cum
, mode
, type
, named
)
2948 CUMULATIVE_ARGS
*cum
;
2949 enum machine_mode mode
;
2953 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
2954 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
2957 enum machine_mode hfa_mode
= VOIDmode
;
2959 /* If all arg slots are already full, then there is nothing to do. */
2960 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
2963 /* Arguments with alignment larger than 8 bytes start at the next even
2965 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
2967 && (cum
->words
& 1))
2970 cum
->words
+= words
+ offset
;
2972 /* Check for and handle homogeneous FP aggregates. */
2974 hfa_mode
= hfa_element_mode (type
, 0);
2976 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2977 and unprototyped hfas are passed specially. */
2978 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
2980 int fp_regs
= cum
->fp_regs
;
2981 /* This is the original value of cum->words + offset. */
2982 int int_regs
= cum
->words
- words
;
2983 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
2987 /* If prototyped, pass it in FR regs then GR regs.
2988 If not prototyped, pass it in both FR and GR regs.
2990 If this is an SFmode aggregate, then it is possible to run out of
2991 FR regs while GR regs are still left. In that case, we pass the
2992 remaining part in the GR regs. */
2994 /* Fill the FP regs. We do this always. We stop if we reach the end
2995 of the argument, the last FP register, or the last argument slot. */
2997 byte_size
= ((mode
== BLKmode
)
2998 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
2999 args_byte_size
= int_regs
* UNITS_PER_WORD
;
3001 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
3002 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
3005 args_byte_size
+= hfa_size
;
3009 cum
->fp_regs
= fp_regs
;
3012 /* Integral and aggregates go in general registers. If we have run out of
3013 FR registers, then FP values must also go in general registers. This can
3014 happen when we have a SFmode HFA. */
3015 else if (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
)
3018 /* If there is a prototype, then FP values go in a FR register when
3019 named, and in a GR registeer when unnamed. */
3020 else if (cum
->prototype
)
3025 /* ??? Complex types should not reach here. */
3026 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3028 /* If there is no prototype, then FP values go in both FR and GR
3031 /* ??? Complex types should not reach here. */
3032 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3037 /* Implement va_start. */
3040 ia64_va_start (stdarg_p
, valist
, nextarg
)
3048 arg_words
= current_function_args_info
.words
;
3053 ofs
= (arg_words
>= MAX_ARGUMENT_SLOTS
? -UNITS_PER_WORD
: 0);
3055 nextarg
= plus_constant (nextarg
, ofs
);
3056 std_expand_builtin_va_start (1, valist
, nextarg
);
3059 /* Implement va_arg. */
3062 ia64_va_arg (valist
, type
)
3067 /* Arguments with alignment larger than 8 bytes start at the next even
3069 if (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3071 t
= build (PLUS_EXPR
, TREE_TYPE (valist
), valist
,
3072 build_int_2 (2 * UNITS_PER_WORD
- 1, 0));
3073 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
3074 build_int_2 (-2 * UNITS_PER_WORD
, -1));
3075 t
= build (MODIFY_EXPR
, TREE_TYPE (valist
), valist
, t
);
3076 TREE_SIDE_EFFECTS (t
) = 1;
3077 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3080 return std_expand_builtin_va_arg (valist
, type
);
3083 /* Return 1 if function return value returned in memory. Return 0 if it is
3087 ia64_return_in_memory (valtype
)
3090 enum machine_mode mode
;
3091 enum machine_mode hfa_mode
;
3094 mode
= TYPE_MODE (valtype
);
3095 byte_size
= ((mode
== BLKmode
)
3096 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
3098 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3100 hfa_mode
= hfa_element_mode (valtype
, 0);
3101 if (hfa_mode
!= VOIDmode
)
3103 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3105 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
3111 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
3117 /* Return rtx for register that holds the function return value. */
3120 ia64_function_value (valtype
, func
)
3122 tree func ATTRIBUTE_UNUSED
;
3124 enum machine_mode mode
;
3125 enum machine_mode hfa_mode
;
3127 mode
= TYPE_MODE (valtype
);
3128 hfa_mode
= hfa_element_mode (valtype
, 0);
3130 if (hfa_mode
!= VOIDmode
)
3138 hfa_size
= GET_MODE_SIZE (hfa_mode
);
3139 byte_size
= ((mode
== BLKmode
)
3140 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
3142 for (i
= 0; offset
< byte_size
; i
++)
3144 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3145 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
3151 return XEXP (loc
[0], 0);
3153 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3155 else if (FLOAT_TYPE_P (valtype
))
3156 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
3158 return gen_rtx_REG (mode
, GR_RET_FIRST
);
3161 /* Print a memory address as an operand to reference that memory location. */
3163 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3164 also call this from ia64_print_operand for memory addresses. */
3167 ia64_print_operand_address (stream
, address
)
3168 FILE * stream ATTRIBUTE_UNUSED
;
3169 rtx address ATTRIBUTE_UNUSED
;
3173 /* Print an operand to a assembler instruction.
3174 C Swap and print a comparison operator.
3175 D Print an FP comparison operator.
3176 E Print 32 - constant, for SImode shifts as extract.
3177 e Print 64 - constant, for DImode rotates.
3178 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3179 a floating point register emitted normally.
3180 I Invert a predicate register by adding 1.
3181 J Select the proper predicate register for a condition.
3182 j Select the inverse predicate register for a condition.
3183 O Append .acq for volatile load.
3184 P Postincrement of a MEM.
3185 Q Append .rel for volatile store.
3186 S Shift amount for shladd instruction.
3187 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3188 for Intel assembler.
3189 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3190 for Intel assembler.
3191 r Print register name, or constant 0 as r0. HP compatibility for
3194 ia64_print_operand (file
, x
, code
)
3204 /* Handled below. */
3209 enum rtx_code c
= swap_condition (GET_CODE (x
));
3210 fputs (GET_RTX_NAME (c
), file
);
3215 switch (GET_CODE (x
))
3227 str
= GET_RTX_NAME (GET_CODE (x
));
3234 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
3238 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - INTVAL (x
));
3242 if (x
== CONST0_RTX (GET_MODE (x
)))
3243 str
= reg_names
[FR_REG (0)];
3244 else if (x
== CONST1_RTX (GET_MODE (x
)))
3245 str
= reg_names
[FR_REG (1)];
3246 else if (GET_CODE (x
) == REG
)
3247 str
= reg_names
[REGNO (x
)];
3254 fputs (reg_names
[REGNO (x
) + 1], file
);
3260 unsigned int regno
= REGNO (XEXP (x
, 0));
3261 if (GET_CODE (x
) == EQ
)
3265 fputs (reg_names
[regno
], file
);
3270 if (MEM_VOLATILE_P (x
))
3271 fputs(".acq", file
);
3276 HOST_WIDE_INT value
;
3278 switch (GET_CODE (XEXP (x
, 0)))
3284 x
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
3285 if (GET_CODE (x
) == CONST_INT
)
3287 else if (GET_CODE (x
) == REG
)
3289 fprintf (file
, ", %s", reg_names
[REGNO (x
)]);
3297 value
= GET_MODE_SIZE (GET_MODE (x
));
3301 value
= - (HOST_WIDE_INT
) GET_MODE_SIZE (GET_MODE (x
));
3307 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, value
);
3312 if (MEM_VOLATILE_P (x
))
3313 fputs(".rel", file
);
3317 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
3321 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
3323 fprintf (file
, "0x%x", (int) INTVAL (x
) & 0xffffffff);
3329 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
3331 const char *prefix
= "0x";
3332 if (INTVAL (x
) & 0x80000000)
3334 fprintf (file
, "0xffffffff");
3337 fprintf (file
, "%s%x", prefix
, (int) INTVAL (x
) & 0xffffffff);
3343 /* If this operand is the constant zero, write it as register zero.
3344 Any register, zero, or CONST_INT value is OK here. */
3345 if (GET_CODE (x
) == REG
)
3346 fputs (reg_names
[REGNO (x
)], file
);
3347 else if (x
== CONST0_RTX (GET_MODE (x
)))
3349 else if (GET_CODE (x
) == CONST_INT
)
3350 output_addr_const (file
, x
);
3352 output_operand_lossage ("invalid %%r value");
3359 /* For conditional branches, returns or calls, substitute
3360 sptk, dptk, dpnt, or spnt for %s. */
3361 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
3364 int pred_val
= INTVAL (XEXP (x
, 0));
3366 /* Guess top and bottom 10% statically predicted. */
3367 if (pred_val
< REG_BR_PROB_BASE
/ 50)
3369 else if (pred_val
< REG_BR_PROB_BASE
/ 2)
3371 else if (pred_val
< REG_BR_PROB_BASE
/ 100 * 98)
3376 else if (GET_CODE (current_output_insn
) == CALL_INSN
)
3381 fputs (which
, file
);
3386 x
= current_insn_predicate
;
3389 unsigned int regno
= REGNO (XEXP (x
, 0));
3390 if (GET_CODE (x
) == EQ
)
3392 fprintf (file
, "(%s) ", reg_names
[regno
]);
3397 output_operand_lossage ("ia64_print_operand: unknown code");
3401 switch (GET_CODE (x
))
3403 /* This happens for the spill/restore instructions. */
3408 /* ... fall through ... */
3411 fputs (reg_names
[REGNO (x
)], file
);
3416 rtx addr
= XEXP (x
, 0);
3417 if (GET_RTX_CLASS (GET_CODE (addr
)) == 'a')
3418 addr
= XEXP (addr
, 0);
3419 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
3424 output_addr_const (file
, x
);
3431 /* Calulate the cost of moving data from a register in class FROM to
3435 ia64_register_move_cost (from
, to
)
3436 enum reg_class from
, to
;
3438 int from_hard
, to_hard
;
3443 from_hard
= (from
== BR_REGS
|| from
== AR_M_REGS
|| from
== AR_I_REGS
);
3444 to_hard
= (to
== BR_REGS
|| to
== AR_M_REGS
|| to
== AR_I_REGS
);
3445 from_gr
= (from
== GENERAL_REGS
);
3446 to_gr
= (to
== GENERAL_REGS
);
3447 from_fr
= (from
== FR_REGS
);
3448 to_fr
= (to
== FR_REGS
);
3449 from_pr
= (from
== PR_REGS
);
3450 to_pr
= (to
== PR_REGS
);
3452 if (from_hard
&& to_hard
)
3454 else if ((from_hard
&& !to_gr
) || (!from_gr
&& to_hard
))
3457 /* Moving between PR registers takes two insns. */
3458 else if (from_pr
&& to_pr
)
3460 /* Moving between PR and anything but GR is impossible. */
3461 else if ((from_pr
&& !to_gr
) || (!from_gr
&& to_pr
))
3464 /* ??? Moving from FR<->GR must be more expensive than 2, so that we get
3465 secondary memory reloads for TFmode moves. Unfortunately, we don't
3466 have the mode here, so we can't check that. */
3467 /* Moreover, we have to make this at least as high as MEMORY_MOVE_COST
3468 to avoid spectacularly poor register class preferencing for TFmode. */
3469 else if (from_fr
!= to_fr
)
3475 /* This function returns the register class required for a secondary
3476 register when copying between one of the registers in CLASS, and X,
3477 using MODE. A return value of NO_REGS means that no secondary register
3481 ia64_secondary_reload_class (class, mode
, x
)
3482 enum reg_class
class;
3483 enum machine_mode mode ATTRIBUTE_UNUSED
;
3488 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
3489 regno
= true_regnum (x
);
3494 /* ??? This is required because of a bad gcse/cse/global interaction.
3495 We end up with two pseudos with overlapping lifetimes both of which
3496 are equiv to the same constant, and both which need to be in BR_REGS.
3497 This results in a BR_REGS to BR_REGS copy which doesn't exist. To
3498 reproduce, return NO_REGS here, and compile divdi3 in libgcc2.c.
3499 This seems to be a cse bug. cse_basic_block_end changes depending
3500 on the path length, which means the qty_first_reg check in
3501 make_regs_eqv can give different answers at different times. */
3502 /* ??? At some point I'll probably need a reload_indi pattern to handle
3504 if (BR_REGNO_P (regno
))
3507 /* This is needed if a pseudo used as a call_operand gets spilled to a
3509 if (GET_CODE (x
) == MEM
)
3514 /* This can happen when a paradoxical subreg is an operand to the
3516 /* ??? This shouldn't be necessary after instruction scheduling is
3517 enabled, because paradoxical subregs are not accepted by
3518 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3519 stop the paradoxical subreg stupidity in the *_operand functions
3521 if (GET_CODE (x
) == MEM
3522 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
3523 || GET_MODE (x
) == QImode
))
3526 /* This can happen because of the ior/and/etc patterns that accept FP
3527 registers as operands. If the third operand is a constant, then it
3528 needs to be reloaded into a FP register. */
3529 if (GET_CODE (x
) == CONST_INT
)
3532 /* This can happen because of register elimination in a muldi3 insn.
3533 E.g. `26107 * (unsigned long)&u'. */
3534 if (GET_CODE (x
) == PLUS
)
3539 /* ??? This happens if we cse/gcse a BImode value across a call,
3540 and the function has a nonlocal goto. This is because global
3541 does not allocate call crossing pseudos to hard registers when
3542 current_function_has_nonlocal_goto is true. This is relatively
3543 common for C++ programs that use exceptions. To reproduce,
3544 return NO_REGS and compile libstdc++. */
3545 if (GET_CODE (x
) == MEM
)
3548 /* This can happen when we take a BImode subreg of a DImode value,
3549 and that DImode value winds up in some non-GR register. */
3550 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
) && ! PR_REGNO_P (regno
))
3555 /* Since we have no offsettable memory addresses, we need a temporary
3556 to hold the address of the second word. */
3569 /* Emit text to declare externally defined variables and functions, because
3570 the Intel assembler does not support undefined externals. */
3573 ia64_asm_output_external (file
, decl
, name
)
3578 int save_referenced
;
3580 /* GNU as does not need anything here. */
3584 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3585 the linker when we do this, so we need to be careful not to do this for
3586 builtin functions which have no library equivalent. Unfortunately, we
3587 can't tell here whether or not a function will actually be called by
3588 expand_expr, so we pull in library functions even if we may not need
3590 if (! strcmp (name
, "__builtin_next_arg")
3591 || ! strcmp (name
, "alloca")
3592 || ! strcmp (name
, "__builtin_constant_p")
3593 || ! strcmp (name
, "__builtin_args_info"))
3596 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3598 save_referenced
= TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
));
3599 if (TREE_CODE (decl
) == FUNCTION_DECL
)
3601 fprintf (file
, "%s", TYPE_ASM_OP
);
3602 assemble_name (file
, name
);
3604 fprintf (file
, TYPE_OPERAND_FMT
, "function");
3607 ASM_GLOBALIZE_LABEL (file
, name
);
3608 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)) = save_referenced
;
3611 /* Parse the -mfixed-range= option string. */
3614 fix_range (const_str
)
3615 const char *const_str
;
3618 char *str
, *dash
, *comma
;
3620 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3621 REG2 are either register names or register numbers. The effect
3622 of this option is to mark the registers in the range from REG1 to
3623 REG2 as ``fixed'' so they won't be used by the compiler. This is
3624 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3626 i
= strlen (const_str
);
3627 str
= (char *) alloca (i
+ 1);
3628 memcpy (str
, const_str
, i
+ 1);
3632 dash
= strchr (str
, '-');
3635 warning ("value of -mfixed-range must have form REG1-REG2");
3640 comma
= strchr (dash
+ 1, ',');
3644 first
= decode_reg_name (str
);
3647 warning ("unknown register name: %s", str
);
3651 last
= decode_reg_name (dash
+ 1);
3654 warning ("unknown register name: %s", dash
+ 1);
3662 warning ("%s-%s is an empty range", str
, dash
+ 1);
3666 for (i
= first
; i
<= last
; ++i
)
3667 fixed_regs
[i
] = call_used_regs
[i
] = 1;
3677 /* Called to register all of our global variables with the garbage
3681 ia64_add_gc_roots ()
3683 ggc_add_rtx_root (&ia64_compare_op0
, 1);
3684 ggc_add_rtx_root (&ia64_compare_op1
, 1);
3688 ia64_init_machine_status (p
)
3692 (struct machine_function
*) xcalloc (1, sizeof (struct machine_function
));
3696 ia64_mark_machine_status (p
)
3699 struct machine_function
*machine
= p
->machine
;
3703 ggc_mark_rtx (machine
->ia64_eh_epilogue_sp
);
3704 ggc_mark_rtx (machine
->ia64_eh_epilogue_bsp
);
3705 ggc_mark_rtx (machine
->ia64_gp_save
);
3710 ia64_free_machine_status (p
)
3717 /* Handle TARGET_OPTIONS switches. */
3720 ia64_override_options ()
3722 if (TARGET_AUTO_PIC
)
3723 target_flags
|= MASK_CONST_GP
;
3725 if (TARGET_INLINE_DIV_LAT
&& TARGET_INLINE_DIV_THR
)
3727 warning ("cannot optimize division for both latency and throughput");
3728 target_flags
&= ~MASK_INLINE_DIV_THR
;
3731 if (ia64_fixed_range_string
)
3732 fix_range (ia64_fixed_range_string
);
3734 ia64_flag_schedule_insns2
= flag_schedule_insns_after_reload
;
3735 flag_schedule_insns_after_reload
= 0;
3737 ia64_section_threshold
= g_switch_set
? g_switch_value
: IA64_DEFAULT_GVALUE
;
3739 init_machine_status
= ia64_init_machine_status
;
3740 mark_machine_status
= ia64_mark_machine_status
;
3741 free_machine_status
= ia64_free_machine_status
;
3743 ia64_add_gc_roots ();
3746 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0
PARAMS((rtx
));
3747 static enum attr_itanium_class ia64_safe_itanium_class
PARAMS((rtx
));
3748 static enum attr_type ia64_safe_type
PARAMS((rtx
));
3750 static enum attr_itanium_requires_unit0
3751 ia64_safe_itanium_requires_unit0 (insn
)
3754 if (recog_memoized (insn
) >= 0)
3755 return get_attr_itanium_requires_unit0 (insn
);
3757 return ITANIUM_REQUIRES_UNIT0_NO
;
3760 static enum attr_itanium_class
3761 ia64_safe_itanium_class (insn
)
3764 if (recog_memoized (insn
) >= 0)
3765 return get_attr_itanium_class (insn
);
3767 return ITANIUM_CLASS_UNKNOWN
;
3770 static enum attr_type
3771 ia64_safe_type (insn
)
3774 if (recog_memoized (insn
) >= 0)
3775 return get_attr_type (insn
);
3777 return TYPE_UNKNOWN
;
3780 /* The following collection of routines emit instruction group stop bits as
3781 necessary to avoid dependencies. */
3783 /* Need to track some additional registers as far as serialization is
3784 concerned so we can properly handle br.call and br.ret. We could
3785 make these registers visible to gcc, but since these registers are
3786 never explicitly used in gcc generated code, it seems wasteful to
3787 do so (plus it would make the call and return patterns needlessly
3789 #define REG_GP (GR_REG (1))
3790 #define REG_RP (BR_REG (0))
3791 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
3792 /* This is used for volatile asms which may require a stop bit immediately
3793 before and after them. */
3794 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
3795 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
3796 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
3798 /* For each register, we keep track of how it has been written in the
3799 current instruction group.
3801 If a register is written unconditionally (no qualifying predicate),
3802 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
3804 If a register is written if its qualifying predicate P is true, we
3805 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
3806 may be written again by the complement of P (P^1) and when this happens,
3807 WRITE_COUNT gets set to 2.
3809 The result of this is that whenever an insn attempts to write a register
3810 whose WRITE_COUNT is two, we need to issue a insn group barrier first.
3812 If a predicate register is written by a floating-point insn, we set
3813 WRITTEN_BY_FP to true.
3815 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
3816 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
3818 struct reg_write_state
3820 unsigned int write_count
: 2;
3821 unsigned int first_pred
: 16;
3822 unsigned int written_by_fp
: 1;
3823 unsigned int written_by_and
: 1;
3824 unsigned int written_by_or
: 1;
3827 /* Cumulative info for the current instruction group. */
3828 struct reg_write_state rws_sum
[NUM_REGS
];
3829 /* Info for the current instruction. This gets copied to rws_sum after a
3830 stop bit is emitted. */
3831 struct reg_write_state rws_insn
[NUM_REGS
];
3833 /* Indicates whether this is the first instruction after a stop bit,
3834 in which case we don't need another stop bit. Without this, we hit
3835 the abort in ia64_variable_issue when scheduling an alloc. */
3836 static int first_instruction
;
3838 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
3839 RTL for one instruction. */
3842 unsigned int is_write
: 1; /* Is register being written? */
3843 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
3844 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
3845 unsigned int is_and
: 1; /* Is register used as part of and.orcm? */
3846 unsigned int is_or
: 1; /* Is register used as part of or.andcm? */
3847 unsigned int is_sibcall
: 1; /* Is this a sibling or normal call? */
3850 static void rws_update
PARAMS ((struct reg_write_state
*, int,
3851 struct reg_flags
, int));
3852 static int rws_access_regno
PARAMS ((int, struct reg_flags
, int));
3853 static int rws_access_reg
PARAMS ((rtx
, struct reg_flags
, int));
3854 static void update_set_flags
PARAMS ((rtx
, struct reg_flags
*, int *, rtx
*));
3855 static int set_src_needs_barrier
PARAMS ((rtx
, struct reg_flags
, int, rtx
));
3856 static int rtx_needs_barrier
PARAMS ((rtx
, struct reg_flags
, int));
3857 static void init_insn_group_barriers
PARAMS ((void));
3858 static int group_barrier_needed_p
PARAMS ((rtx
));
3859 static int safe_group_barrier_needed_p
PARAMS ((rtx
));
3861 /* Update *RWS for REGNO, which is being written by the current instruction,
3862 with predicate PRED, and associated register flags in FLAGS. */
3865 rws_update (rws
, regno
, flags
, pred
)
3866 struct reg_write_state
*rws
;
3868 struct reg_flags flags
;
3871 rws
[regno
].write_count
+= pred
? 1 : 2;
3872 rws
[regno
].written_by_fp
|= flags
.is_fp
;
3873 /* ??? Not tracking and/or across differing predicates. */
3874 rws
[regno
].written_by_and
= flags
.is_and
;
3875 rws
[regno
].written_by_or
= flags
.is_or
;
3876 rws
[regno
].first_pred
= pred
;
3879 /* Handle an access to register REGNO of type FLAGS using predicate register
3880 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
3881 a dependency with an earlier instruction in the same group. */
3884 rws_access_regno (regno
, flags
, pred
)
3886 struct reg_flags flags
;
3889 int need_barrier
= 0;
3891 if (regno
>= NUM_REGS
)
3894 if (! PR_REGNO_P (regno
))
3895 flags
.is_and
= flags
.is_or
= 0;
3901 /* One insn writes same reg multiple times? */
3902 if (rws_insn
[regno
].write_count
> 0)
3905 /* Update info for current instruction. */
3906 rws_update (rws_insn
, regno
, flags
, pred
);
3907 write_count
= rws_sum
[regno
].write_count
;
3909 switch (write_count
)
3912 /* The register has not been written yet. */
3913 rws_update (rws_sum
, regno
, flags
, pred
);
3917 /* The register has been written via a predicate. If this is
3918 not a complementary predicate, then we need a barrier. */
3919 /* ??? This assumes that P and P+1 are always complementary
3920 predicates for P even. */
3921 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
3923 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
3925 else if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
3927 rws_update (rws_sum
, regno
, flags
, pred
);
3931 /* The register has been unconditionally written already. We
3933 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
3935 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
3939 rws_sum
[regno
].written_by_and
= flags
.is_and
;
3940 rws_sum
[regno
].written_by_or
= flags
.is_or
;
3949 if (flags
.is_branch
)
3951 /* Branches have several RAW exceptions that allow to avoid
3954 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== AR_PFS_REGNUM
)
3955 /* RAW dependencies on branch regs are permissible as long
3956 as the writer is a non-branch instruction. Since we
3957 never generate code that uses a branch register written
3958 by a branch instruction, handling this case is
3962 if (REGNO_REG_CLASS (regno
) == PR_REGS
3963 && ! rws_sum
[regno
].written_by_fp
)
3964 /* The predicates of a branch are available within the
3965 same insn group as long as the predicate was written by
3966 something other than a floating-point instruction. */
3970 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
3972 if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
3975 switch (rws_sum
[regno
].write_count
)
3978 /* The register has not been written yet. */
3982 /* The register has been written via a predicate. If this is
3983 not a complementary predicate, then we need a barrier. */
3984 /* ??? This assumes that P and P+1 are always complementary
3985 predicates for P even. */
3986 if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
3991 /* The register has been unconditionally written already. We
4001 return need_barrier
;
4005 rws_access_reg (reg
, flags
, pred
)
4007 struct reg_flags flags
;
4010 int regno
= REGNO (reg
);
4011 int n
= HARD_REGNO_NREGS (REGNO (reg
), GET_MODE (reg
));
4014 return rws_access_regno (regno
, flags
, pred
);
4017 int need_barrier
= 0;
4019 need_barrier
|= rws_access_regno (regno
+ n
, flags
, pred
);
4020 return need_barrier
;
4024 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4025 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4028 update_set_flags (x
, pflags
, ppred
, pcond
)
4030 struct reg_flags
*pflags
;
4034 rtx src
= SET_SRC (x
);
4038 switch (GET_CODE (src
))
4044 if (SET_DEST (x
) == pc_rtx
)
4045 /* X is a conditional branch. */
4049 int is_complemented
= 0;
4051 /* X is a conditional move. */
4052 rtx cond
= XEXP (src
, 0);
4053 if (GET_CODE (cond
) == EQ
)
4054 is_complemented
= 1;
4055 cond
= XEXP (cond
, 0);
4056 if (GET_CODE (cond
) != REG
4057 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
4060 if (XEXP (src
, 1) == SET_DEST (x
)
4061 || XEXP (src
, 2) == SET_DEST (x
))
4063 /* X is a conditional move that conditionally writes the
4066 /* We need another complement in this case. */
4067 if (XEXP (src
, 1) == SET_DEST (x
))
4068 is_complemented
= ! is_complemented
;
4070 *ppred
= REGNO (cond
);
4071 if (is_complemented
)
4075 /* ??? If this is a conditional write to the dest, then this
4076 instruction does not actually read one source. This probably
4077 doesn't matter, because that source is also the dest. */
4078 /* ??? Multiple writes to predicate registers are allowed
4079 if they are all AND type compares, or if they are all OR
4080 type compares. We do not generate such instructions
4083 /* ... fall through ... */
4086 if (GET_RTX_CLASS (GET_CODE (src
)) == '<'
4087 && GET_MODE_CLASS (GET_MODE (XEXP (src
, 0))) == MODE_FLOAT
)
4088 /* Set pflags->is_fp to 1 so that we know we're dealing
4089 with a floating point comparison when processing the
4090 destination of the SET. */
4093 /* Discover if this is a parallel comparison. We only handle
4094 and.orcm and or.andcm at present, since we must retain a
4095 strict inverse on the predicate pair. */
4096 else if (GET_CODE (src
) == AND
)
4098 else if (GET_CODE (src
) == IOR
)
4105 /* Subroutine of rtx_needs_barrier; this function determines whether the
4106 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4107 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4111 set_src_needs_barrier (x
, flags
, pred
, cond
)
4113 struct reg_flags flags
;
4117 int need_barrier
= 0;
4119 rtx src
= SET_SRC (x
);
4121 if (GET_CODE (src
) == CALL
)
4122 /* We don't need to worry about the result registers that
4123 get written by subroutine call. */
4124 return rtx_needs_barrier (src
, flags
, pred
);
4125 else if (SET_DEST (x
) == pc_rtx
)
4127 /* X is a conditional branch. */
4128 /* ??? This seems redundant, as the caller sets this bit for
4130 flags
.is_branch
= 1;
4131 return rtx_needs_barrier (src
, flags
, pred
);
4134 need_barrier
= rtx_needs_barrier (src
, flags
, pred
);
4136 /* This instruction unconditionally uses a predicate register. */
4138 need_barrier
|= rws_access_reg (cond
, flags
, 0);
4141 if (GET_CODE (dst
) == ZERO_EXTRACT
)
4143 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
4144 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
4145 dst
= XEXP (dst
, 0);
4147 return need_barrier
;
4150 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4151 Return 1 is this access creates a dependency with an earlier instruction
4152 in the same group. */
4155 rtx_needs_barrier (x
, flags
, pred
)
4157 struct reg_flags flags
;
4161 int is_complemented
= 0;
4162 int need_barrier
= 0;
4163 const char *format_ptr
;
4164 struct reg_flags new_flags
;
4172 switch (GET_CODE (x
))
4175 update_set_flags (x
, &new_flags
, &pred
, &cond
);
4176 need_barrier
= set_src_needs_barrier (x
, new_flags
, pred
, cond
);
4177 if (GET_CODE (SET_SRC (x
)) != CALL
)
4179 new_flags
.is_write
= 1;
4180 need_barrier
|= rtx_needs_barrier (SET_DEST (x
), new_flags
, pred
);
4185 new_flags
.is_write
= 0;
4186 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
4188 /* Avoid multiple register writes, in case this is a pattern with
4189 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4190 if (! flags
.is_sibcall
&& ! rws_insn
[REG_AR_CFM
].write_count
)
4192 new_flags
.is_write
= 1;
4193 need_barrier
|= rws_access_regno (REG_RP
, new_flags
, pred
);
4194 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, new_flags
, pred
);
4195 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4200 /* X is a predicated instruction. */
4202 cond
= COND_EXEC_TEST (x
);
4205 need_barrier
= rtx_needs_barrier (cond
, flags
, 0);
4207 if (GET_CODE (cond
) == EQ
)
4208 is_complemented
= 1;
4209 cond
= XEXP (cond
, 0);
4210 if (GET_CODE (cond
) != REG
4211 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
4213 pred
= REGNO (cond
);
4214 if (is_complemented
)
4217 need_barrier
|= rtx_needs_barrier (COND_EXEC_CODE (x
), flags
, pred
);
4218 return need_barrier
;
4222 /* Clobber & use are for earlier compiler-phases only. */
4227 /* We always emit stop bits for traditional asms. We emit stop bits
4228 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4229 if (GET_CODE (x
) != ASM_OPERANDS
4230 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
4232 /* Avoid writing the register multiple times if we have multiple
4233 asm outputs. This avoids an abort in rws_access_reg. */
4234 if (! rws_insn
[REG_VOLATILE
].write_count
)
4236 new_flags
.is_write
= 1;
4237 rws_access_regno (REG_VOLATILE
, new_flags
, pred
);
4242 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4243 We can not just fall through here since then we would be confused
4244 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4245 traditional asms unlike their normal usage. */
4247 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
4248 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
4253 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
4255 rtx pat
= XVECEXP (x
, 0, i
);
4256 if (GET_CODE (pat
) == SET
)
4258 update_set_flags (pat
, &new_flags
, &pred
, &cond
);
4259 need_barrier
|= set_src_needs_barrier (pat
, new_flags
, pred
, cond
);
4261 else if (GET_CODE (pat
) == USE
4262 || GET_CODE (pat
) == CALL
4263 || GET_CODE (pat
) == ASM_OPERANDS
)
4264 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
4265 else if (GET_CODE (pat
) != CLOBBER
&& GET_CODE (pat
) != RETURN
)
4268 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
4270 rtx pat
= XVECEXP (x
, 0, i
);
4271 if (GET_CODE (pat
) == SET
)
4273 if (GET_CODE (SET_SRC (pat
)) != CALL
)
4275 new_flags
.is_write
= 1;
4276 need_barrier
|= rtx_needs_barrier (SET_DEST (pat
), new_flags
,
4280 else if (GET_CODE (pat
) == CLOBBER
)
4281 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
4289 if (REGNO (x
) == AR_UNAT_REGNUM
)
4291 for (i
= 0; i
< 64; ++i
)
4292 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ i
, flags
, pred
);
4295 need_barrier
= rws_access_reg (x
, flags
, pred
);
4299 /* Find the regs used in memory address computation. */
4300 new_flags
.is_write
= 0;
4301 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
4304 case CONST_INT
: case CONST_DOUBLE
:
4305 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
4308 /* Operators with side-effects. */
4309 case POST_INC
: case POST_DEC
:
4310 if (GET_CODE (XEXP (x
, 0)) != REG
)
4313 new_flags
.is_write
= 0;
4314 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4315 new_flags
.is_write
= 1;
4316 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4320 if (GET_CODE (XEXP (x
, 0)) != REG
)
4323 new_flags
.is_write
= 0;
4324 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4325 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
4326 new_flags
.is_write
= 1;
4327 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4330 /* Handle common unary and binary ops for efficiency. */
4331 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
4332 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
4333 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
4334 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
4335 case NE
: case EQ
: case GE
: case GT
: case LE
:
4336 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
4337 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
4338 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
4341 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
4342 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
4343 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
4344 case SQRT
: case FFS
:
4345 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
4349 switch (XINT (x
, 1))
4351 case 1: /* st8.spill */
4352 case 2: /* ld8.fill */
4354 HOST_WIDE_INT offset
= INTVAL (XVECEXP (x
, 0, 1));
4355 HOST_WIDE_INT bit
= (offset
>> 3) & 63;
4357 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4358 new_flags
.is_write
= (XINT (x
, 1) == 1);
4359 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ bit
,
4364 case 3: /* stf.spill */
4365 case 4: /* ldf.spill */
4366 case 8: /* popcnt */
4367 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4370 case 7: /* pred_rel_mutex */
4371 case 9: /* pic call */
4373 case 19: /* fetchadd_acq */
4374 case 20: /* mov = ar.bsp */
4375 case 21: /* flushrs */
4376 case 22: /* bundle selector */
4377 case 23: /* cycle display */
4380 case 5: /* recip_approx */
4381 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4382 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
4385 case 13: /* cmpxchg_acq */
4386 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
4387 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 2), flags
, pred
);
4395 case UNSPEC_VOLATILE
:
4396 switch (XINT (x
, 1))
4399 /* Alloc must always be the first instruction of a group.
4400 We force this by always returning true. */
4401 /* ??? We might get better scheduling if we explicitly check for
4402 input/local/output register dependencies, and modify the
4403 scheduler so that alloc is always reordered to the start of
4404 the current group. We could then eliminate all of the
4405 first_instruction code. */
4406 rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
4408 new_flags
.is_write
= 1;
4409 rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4412 case 1: /* blockage */
4413 case 2: /* insn group barrier */
4416 case 5: /* set_bsp */
4420 case 7: /* pred.rel.mutex */
4421 case 8: /* safe_across_calls all */
4422 case 9: /* safe_across_calls normal */
4431 new_flags
.is_write
= 0;
4432 need_barrier
= rws_access_regno (REG_RP
, flags
, pred
);
4433 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
4435 new_flags
.is_write
= 1;
4436 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
4437 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4441 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
4442 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
4443 switch (format_ptr
[i
])
4445 case '0': /* unused field */
4446 case 'i': /* integer */
4447 case 'n': /* note */
4448 case 'w': /* wide integer */
4449 case 's': /* pointer to string */
4450 case 'S': /* optional pointer to string */
4454 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
4459 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
4460 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
4469 return need_barrier
;
4472 /* Clear out the state for group_barrier_needed_p at the start of a
4473 sequence of insns. */
4476 init_insn_group_barriers ()
4478 memset (rws_sum
, 0, sizeof (rws_sum
));
4479 first_instruction
= 1;
4482 /* Given the current state, recorded by previous calls to this function,
4483 determine whether a group barrier (a stop bit) is necessary before INSN.
4484 Return nonzero if so. */
4487 group_barrier_needed_p (insn
)
4491 int need_barrier
= 0;
4492 struct reg_flags flags
;
4494 memset (&flags
, 0, sizeof (flags
));
4495 switch (GET_CODE (insn
))
4501 /* A barrier doesn't imply an instruction group boundary. */
4505 memset (rws_insn
, 0, sizeof (rws_insn
));
4509 flags
.is_branch
= 1;
4510 flags
.is_sibcall
= SIBLING_CALL_P (insn
);
4511 memset (rws_insn
, 0, sizeof (rws_insn
));
4512 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
4516 flags
.is_branch
= 1;
4520 if (GET_CODE (PATTERN (insn
)) == USE
4521 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
4522 /* Don't care about USE and CLOBBER "insns"---those are used to
4523 indicate to the optimizer that it shouldn't get rid of
4524 certain operations. */
4527 pat
= PATTERN (insn
);
4529 /* Ug. Hack hacks hacked elsewhere. */
4530 switch (recog_memoized (insn
))
4532 /* We play dependency tricks with the epilogue in order
4533 to get proper schedules. Undo this for dv analysis. */
4534 case CODE_FOR_epilogue_deallocate_stack
:
4535 pat
= XVECEXP (pat
, 0, 0);
4538 /* The pattern we use for br.cloop confuses the code above.
4539 The second element of the vector is representative. */
4540 case CODE_FOR_doloop_end_internal
:
4541 pat
= XVECEXP (pat
, 0, 1);
4544 /* Doesn't generate code. */
4545 case CODE_FOR_pred_rel_mutex
:
4552 memset (rws_insn
, 0, sizeof (rws_insn
));
4553 need_barrier
= rtx_needs_barrier (pat
, flags
, 0);
4555 /* Check to see if the previous instruction was a volatile
4558 need_barrier
= rws_access_regno (REG_VOLATILE
, flags
, 0);
4565 if (first_instruction
)
4568 first_instruction
= 0;
4571 return need_barrier
;
4574 /* Like group_barrier_needed_p, but do not clobber the current state. */
4577 safe_group_barrier_needed_p (insn
)
4580 struct reg_write_state rws_saved
[NUM_REGS
];
4581 int saved_first_instruction
;
4584 memcpy (rws_saved
, rws_sum
, NUM_REGS
* sizeof *rws_saved
);
4585 saved_first_instruction
= first_instruction
;
4587 t
= group_barrier_needed_p (insn
);
4589 memcpy (rws_sum
, rws_saved
, NUM_REGS
* sizeof *rws_saved
);
4590 first_instruction
= saved_first_instruction
;
4595 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
4596 as necessary to eliminate dependendencies. This function assumes that
4597 a final instruction scheduling pass has been run which has already
4598 inserted most of the necessary stop bits. This function only inserts
4599 new ones at basic block boundaries, since these are invisible to the
4603 emit_insn_group_barriers (dump
, insns
)
4609 int insns_since_last_label
= 0;
4611 init_insn_group_barriers ();
4613 for (insn
= insns
; insn
; insn
= NEXT_INSN (insn
))
4615 if (GET_CODE (insn
) == CODE_LABEL
)
4617 if (insns_since_last_label
)
4619 insns_since_last_label
= 0;
4621 else if (GET_CODE (insn
) == NOTE
4622 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
4624 if (insns_since_last_label
)
4626 insns_since_last_label
= 0;
4628 else if (GET_CODE (insn
) == INSN
4629 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
4630 && XINT (PATTERN (insn
), 1) == 2)
4632 init_insn_group_barriers ();
4635 else if (INSN_P (insn
))
4637 insns_since_last_label
= 1;
4639 if (group_barrier_needed_p (insn
))
4644 fprintf (dump
, "Emitting stop before label %d\n",
4645 INSN_UID (last_label
));
4646 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label
);
4649 init_insn_group_barriers ();
4657 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4658 This function has to emit all necessary group barriers. */
4661 emit_all_insn_group_barriers (dump
, insns
)
4662 FILE *dump ATTRIBUTE_UNUSED
;
4667 init_insn_group_barriers ();
4669 for (insn
= insns
; insn
; insn
= NEXT_INSN (insn
))
4671 if (GET_CODE (insn
) == INSN
4672 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
4673 && XINT (PATTERN (insn
), 1) == 2)
4674 init_insn_group_barriers ();
4675 else if (INSN_P (insn
))
4677 if (group_barrier_needed_p (insn
))
4679 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
4680 init_insn_group_barriers ();
4681 group_barrier_needed_p (insn
);
4687 static int errata_find_address_regs
PARAMS ((rtx
*, void *));
4688 static void errata_emit_nops
PARAMS ((rtx
));
4689 static void fixup_errata
PARAMS ((void));
4691 /* This structure is used to track some details about the previous insns
4692 groups so we can determine if it may be necessary to insert NOPs to
4693 workaround hardware errata. */
4696 HARD_REG_SET p_reg_set
;
4697 HARD_REG_SET gr_reg_conditionally_set
;
4700 /* Index into the last_group array. */
4701 static int group_idx
;
4703 /* Called through for_each_rtx; determines if a hard register that was
4704 conditionally set in the previous group is used as an address register.
4705 It ensures that for_each_rtx returns 1 in that case. */
4707 errata_find_address_regs (xp
, data
)
4709 void *data ATTRIBUTE_UNUSED
;
4712 if (GET_CODE (x
) != MEM
)
4715 if (GET_CODE (x
) == POST_MODIFY
)
4717 if (GET_CODE (x
) == REG
)
4719 struct group
*prev_group
= last_group
+ (group_idx
^ 1);
4720 if (TEST_HARD_REG_BIT (prev_group
->gr_reg_conditionally_set
,
4728 /* Called for each insn; this function keeps track of the state in
4729 last_group and emits additional NOPs if necessary to work around
4730 an Itanium A/B step erratum. */
4732 errata_emit_nops (insn
)
4735 struct group
*this_group
= last_group
+ group_idx
;
4736 struct group
*prev_group
= last_group
+ (group_idx
^ 1);
4737 rtx pat
= PATTERN (insn
);
4738 rtx cond
= GET_CODE (pat
) == COND_EXEC
? COND_EXEC_TEST (pat
) : 0;
4739 rtx real_pat
= cond
? COND_EXEC_CODE (pat
) : pat
;
4740 enum attr_type type
;
4743 if (GET_CODE (real_pat
) == USE
4744 || GET_CODE (real_pat
) == CLOBBER
4745 || GET_CODE (real_pat
) == ASM_INPUT
4746 || GET_CODE (real_pat
) == ADDR_VEC
4747 || GET_CODE (real_pat
) == ADDR_DIFF_VEC
4748 || asm_noperands (PATTERN (insn
)) >= 0)
4751 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
4754 if (GET_CODE (set
) == PARALLEL
)
4757 set
= XVECEXP (real_pat
, 0, 0);
4758 for (i
= 1; i
< XVECLEN (real_pat
, 0); i
++)
4759 if (GET_CODE (XVECEXP (real_pat
, 0, i
)) != USE
4760 && GET_CODE (XVECEXP (real_pat
, 0, i
)) != CLOBBER
)
4767 if (set
&& GET_CODE (set
) != SET
)
4770 type
= get_attr_type (insn
);
4773 && set
&& REG_P (SET_DEST (set
)) && PR_REGNO_P (REGNO (SET_DEST (set
))))
4774 SET_HARD_REG_BIT (this_group
->p_reg_set
, REGNO (SET_DEST (set
)));
4776 if ((type
== TYPE_M
|| type
== TYPE_A
) && cond
&& set
4777 && REG_P (SET_DEST (set
))
4778 && GET_CODE (SET_SRC (set
)) != PLUS
4779 && GET_CODE (SET_SRC (set
)) != MINUS
4780 && (GET_CODE (SET_SRC (set
)) != ASHIFT
4781 || !shladd_operand (XEXP (SET_SRC (set
), 1), VOIDmode
))
4782 && (GET_CODE (SET_SRC (set
)) != MEM
4783 || GET_CODE (XEXP (SET_SRC (set
), 0)) != POST_MODIFY
)
4784 && GENERAL_REGNO_P (REGNO (SET_DEST (set
))))
4786 if (GET_RTX_CLASS (GET_CODE (cond
)) != '<'
4787 || ! REG_P (XEXP (cond
, 0)))
4790 if (TEST_HARD_REG_BIT (prev_group
->p_reg_set
, REGNO (XEXP (cond
, 0))))
4791 SET_HARD_REG_BIT (this_group
->gr_reg_conditionally_set
, REGNO (SET_DEST (set
)));
4793 if (for_each_rtx (&real_pat
, errata_find_address_regs
, NULL
))
4795 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
4796 emit_insn_before (gen_nop (), insn
);
4797 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
4799 memset (last_group
, 0, sizeof last_group
);
4803 /* Emit extra nops if they are required to work around hardware errata. */
4810 if (! TARGET_B_STEP
)
4814 memset (last_group
, 0, sizeof last_group
);
4816 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
4821 if (ia64_safe_type (insn
) == TYPE_S
)
4824 memset (last_group
+ group_idx
, 0, sizeof last_group
[group_idx
]);
4827 errata_emit_nops (insn
);
4831 /* Instruction scheduling support. */
4832 /* Describe one bundle. */
4836 /* Zero if there's no possibility of a stop in this bundle other than
4837 at the end, otherwise the position of the optional stop bit. */
4839 /* The types of the three slots. */
4840 enum attr_type t
[3];
4841 /* The pseudo op to be emitted into the assembler output. */
4845 #define NR_BUNDLES 10
4847 /* A list of all available bundles. */
4849 static const struct bundle bundle
[NR_BUNDLES
] =
4851 { 2, { TYPE_M
, TYPE_I
, TYPE_I
}, ".mii" },
4852 { 1, { TYPE_M
, TYPE_M
, TYPE_I
}, ".mmi" },
4853 { 0, { TYPE_M
, TYPE_F
, TYPE_I
}, ".mfi" },
4854 { 0, { TYPE_M
, TYPE_M
, TYPE_F
}, ".mmf" },
4855 #if NR_BUNDLES == 10
4856 { 0, { TYPE_B
, TYPE_B
, TYPE_B
}, ".bbb" },
4857 { 0, { TYPE_M
, TYPE_B
, TYPE_B
}, ".mbb" },
4859 { 0, { TYPE_M
, TYPE_I
, TYPE_B
}, ".mib" },
4860 { 0, { TYPE_M
, TYPE_M
, TYPE_B
}, ".mmb" },
4861 { 0, { TYPE_M
, TYPE_F
, TYPE_B
}, ".mfb" },
4862 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
4863 it matches an L type insn. Otherwise we'll try to generate L type
4865 { 0, { TYPE_M
, TYPE_L
, TYPE_X
}, ".mlx" }
4868 /* Describe a packet of instructions. Packets consist of two bundles that
4869 are visible to the hardware in one scheduling window. */
4873 const struct bundle
*t1
, *t2
;
4874 /* Precomputed value of the first split issue in this packet if a cycle
4875 starts at its beginning. */
4877 /* For convenience, the insn types are replicated here so we don't have
4878 to go through T1 and T2 all the time. */
4879 enum attr_type t
[6];
4882 /* An array containing all possible packets. */
4883 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
4884 static struct ia64_packet packets
[NR_PACKETS
];
4886 /* Map attr_type to a string with the name. */
4888 static const char *type_names
[] =
4890 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
4893 /* Nonzero if we should insert stop bits into the schedule. */
4894 int ia64_final_schedule
= 0;
4896 static int itanium_split_issue
PARAMS ((const struct ia64_packet
*, int));
4897 static rtx ia64_single_set
PARAMS ((rtx
));
4898 static int insn_matches_slot
PARAMS ((const struct ia64_packet
*, enum attr_type
, int, rtx
));
4899 static void ia64_emit_insn_before
PARAMS ((rtx
, rtx
));
4900 static void maybe_rotate
PARAMS ((FILE *));
4901 static void finish_last_head
PARAMS ((FILE *, int));
4902 static void rotate_one_bundle
PARAMS ((FILE *));
4903 static void rotate_two_bundles
PARAMS ((FILE *));
4904 static void nop_cycles_until
PARAMS ((int, FILE *));
4905 static void cycle_end_fill_slots
PARAMS ((FILE *));
4906 static int packet_matches_p
PARAMS ((const struct ia64_packet
*, int, int *));
4907 static int get_split
PARAMS ((const struct ia64_packet
*, int));
4908 static int find_best_insn
PARAMS ((rtx
*, enum attr_type
*, int,
4909 const struct ia64_packet
*, int));
4910 static void find_best_packet
PARAMS ((int *, const struct ia64_packet
**,
4911 rtx
*, enum attr_type
*, int));
4912 static int itanium_reorder
PARAMS ((FILE *, rtx
*, rtx
*, int));
4913 static void dump_current_packet
PARAMS ((FILE *));
4914 static void schedule_stop
PARAMS ((FILE *));
4915 static rtx gen_nop_type
PARAMS ((enum attr_type
));
4916 static void ia64_emit_nops
PARAMS ((void));
4918 /* Map a bundle number to its pseudo-op. */
4924 return bundle
[b
].name
;
4927 /* Compute the slot which will cause a split issue in packet P if the
4928 current cycle begins at slot BEGIN. */
4931 itanium_split_issue (p
, begin
)
4932 const struct ia64_packet
*p
;
4935 int type_count
[TYPE_S
];
4941 /* Always split before and after MMF. */
4942 if (p
->t
[0] == TYPE_M
&& p
->t
[1] == TYPE_M
&& p
->t
[2] == TYPE_F
)
4944 if (p
->t
[3] == TYPE_M
&& p
->t
[4] == TYPE_M
&& p
->t
[5] == TYPE_F
)
4946 /* Always split after MBB and BBB. */
4947 if (p
->t
[1] == TYPE_B
)
4949 /* Split after first bundle in MIB BBB combination. */
4950 if (p
->t
[2] == TYPE_B
&& p
->t
[3] == TYPE_B
)
4954 memset (type_count
, 0, sizeof type_count
);
4955 for (i
= begin
; i
< split
; i
++)
4957 enum attr_type t0
= p
->t
[i
];
4958 /* An MLX bundle reserves the same units as an MFI bundle. */
4959 enum attr_type t
= (t0
== TYPE_L
? TYPE_F
4960 : t0
== TYPE_X
? TYPE_I
4962 int max
= (t
== TYPE_B
? 3 : t
== TYPE_F
? 1 : 2);
4963 if (type_count
[t
] == max
)
4970 /* Return the maximum number of instructions a cpu can issue. */
4978 /* Helper function - like single_set, but look inside COND_EXEC. */
4981 ia64_single_set (insn
)
4984 rtx x
= PATTERN (insn
);
4985 if (GET_CODE (x
) == COND_EXEC
)
4986 x
= COND_EXEC_CODE (x
);
4987 if (GET_CODE (x
) == SET
)
4989 return single_set_2 (insn
, x
);
4992 /* Adjust the cost of a scheduling dependency. Return the new cost of
4993 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4996 ia64_adjust_cost (insn
, link
, dep_insn
, cost
)
4997 rtx insn
, link
, dep_insn
;
5000 enum attr_type dep_type
;
5001 enum attr_itanium_class dep_class
;
5002 enum attr_itanium_class insn_class
;
5003 rtx dep_set
, set
, src
, addr
;
5005 if (GET_CODE (PATTERN (insn
)) == CLOBBER
5006 || GET_CODE (PATTERN (insn
)) == USE
5007 || GET_CODE (PATTERN (dep_insn
)) == CLOBBER
5008 || GET_CODE (PATTERN (dep_insn
)) == USE
5009 /* @@@ Not accurate for indirect calls. */
5010 || GET_CODE (insn
) == CALL_INSN
5011 || ia64_safe_type (insn
) == TYPE_S
)
5014 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
5015 || REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
5018 dep_type
= ia64_safe_type (dep_insn
);
5019 dep_class
= ia64_safe_itanium_class (dep_insn
);
5020 insn_class
= ia64_safe_itanium_class (insn
);
5022 /* Compares that feed a conditional branch can execute in the same
5024 dep_set
= ia64_single_set (dep_insn
);
5025 set
= ia64_single_set (insn
);
5027 if (dep_type
!= TYPE_F
5029 && GET_CODE (SET_DEST (dep_set
)) == REG
5030 && PR_REG (REGNO (SET_DEST (dep_set
)))
5031 && GET_CODE (insn
) == JUMP_INSN
)
5034 if (dep_set
&& GET_CODE (SET_DEST (dep_set
)) == MEM
)
5036 /* ??? Can't find any information in the documenation about whether
5040 splits issue. Assume it doesn't. */
5044 src
= set
? SET_SRC (set
) : 0;
5046 if (set
&& GET_CODE (SET_DEST (set
)) == MEM
)
5047 addr
= XEXP (SET_DEST (set
), 0);
5048 else if (set
&& GET_CODE (src
) == MEM
)
5049 addr
= XEXP (src
, 0);
5050 else if (set
&& GET_CODE (src
) == ZERO_EXTEND
5051 && GET_CODE (XEXP (src
, 0)) == MEM
)
5052 addr
= XEXP (XEXP (src
, 0), 0);
5053 else if (set
&& GET_CODE (src
) == UNSPEC
5054 && XVECLEN (XEXP (src
, 0), 0) > 0
5055 && GET_CODE (XVECEXP (src
, 0, 0)) == MEM
)
5056 addr
= XEXP (XVECEXP (src
, 0, 0), 0);
5057 if (addr
&& GET_CODE (addr
) == POST_MODIFY
)
5058 addr
= XEXP (addr
, 0);
5060 set
= ia64_single_set (dep_insn
);
5062 if ((dep_class
== ITANIUM_CLASS_IALU
5063 || dep_class
== ITANIUM_CLASS_ILOG
5064 || dep_class
== ITANIUM_CLASS_LD
)
5065 && (insn_class
== ITANIUM_CLASS_LD
5066 || insn_class
== ITANIUM_CLASS_ST
))
5068 if (! addr
|| ! set
)
5070 /* This isn't completely correct - an IALU that feeds an address has
5071 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5072 otherwise. Unfortunately there's no good way to describe this. */
5073 if (reg_overlap_mentioned_p (SET_DEST (set
), addr
))
5076 if ((dep_class
== ITANIUM_CLASS_IALU
5077 || dep_class
== ITANIUM_CLASS_ILOG
5078 || dep_class
== ITANIUM_CLASS_LD
)
5079 && (insn_class
== ITANIUM_CLASS_MMMUL
5080 || insn_class
== ITANIUM_CLASS_MMSHF
5081 || insn_class
== ITANIUM_CLASS_MMSHFI
))
5083 if (dep_class
== ITANIUM_CLASS_FMAC
5084 && (insn_class
== ITANIUM_CLASS_FMISC
5085 || insn_class
== ITANIUM_CLASS_FCVTFX
5086 || insn_class
== ITANIUM_CLASS_XMPY
))
5088 if ((dep_class
== ITANIUM_CLASS_FMAC
5089 || dep_class
== ITANIUM_CLASS_FMISC
5090 || dep_class
== ITANIUM_CLASS_FCVTFX
5091 || dep_class
== ITANIUM_CLASS_XMPY
)
5092 && insn_class
== ITANIUM_CLASS_STF
)
5094 if ((dep_class
== ITANIUM_CLASS_MMMUL
5095 || dep_class
== ITANIUM_CLASS_MMSHF
5096 || dep_class
== ITANIUM_CLASS_MMSHFI
)
5097 && (insn_class
== ITANIUM_CLASS_LD
5098 || insn_class
== ITANIUM_CLASS_ST
5099 || insn_class
== ITANIUM_CLASS_IALU
5100 || insn_class
== ITANIUM_CLASS_ILOG
5101 || insn_class
== ITANIUM_CLASS_ISHF
))
5107 /* Describe the current state of the Itanium pipeline. */
5110 /* The first slot that is used in the current cycle. */
5112 /* The next slot to fill. */
5114 /* The packet we have selected for the current issue window. */
5115 const struct ia64_packet
*packet
;
5116 /* The position of the split issue that occurs due to issue width
5117 limitations (6 if there's no split issue). */
5119 /* Record data about the insns scheduled so far in the same issue
5120 window. The elements up to but not including FIRST_SLOT belong
5121 to the previous cycle, the ones starting with FIRST_SLOT belong
5122 to the current cycle. */
5123 enum attr_type types
[6];
5126 /* Nonzero if we decided to schedule a stop bit. */
5130 /* Temporary arrays; they have enough elements to hold all insns that
5131 can be ready at the same time while scheduling of the current block.
5132 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5133 static rtx
*sched_ready
;
5134 static enum attr_type
*sched_types
;
5136 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5140 insn_matches_slot (p
, itype
, slot
, insn
)
5141 const struct ia64_packet
*p
;
5142 enum attr_type itype
;
5146 enum attr_itanium_requires_unit0 u0
;
5147 enum attr_type stype
= p
->t
[slot
];
5151 u0
= ia64_safe_itanium_requires_unit0 (insn
);
5152 if (u0
== ITANIUM_REQUIRES_UNIT0_YES
)
5155 for (i
= sched_data
.first_slot
; i
< slot
; i
++)
5156 if (p
->t
[i
] == stype
)
5159 if (GET_CODE (insn
) == CALL_INSN
)
5161 /* Reject calls in multiway branch packets. We want to limit
5162 the number of multiway branches we generate (since the branch
5163 predictor is limited), and this seems to work fairly well.
5164 (If we didn't do this, we'd have to add another test here to
5165 force calls into the third slot of the bundle.) */
5168 if (p
->t
[1] == TYPE_B
)
5173 if (p
->t
[4] == TYPE_B
)
5181 if (itype
== TYPE_A
)
5182 return stype
== TYPE_M
|| stype
== TYPE_I
;
5186 /* Like emit_insn_before, but skip cycle_display insns. This makes the
5187 assembly output a bit prettier. */
5190 ia64_emit_insn_before (insn
, before
)
5193 rtx prev
= PREV_INSN (before
);
5194 if (prev
&& GET_CODE (prev
) == INSN
5195 && GET_CODE (PATTERN (prev
)) == UNSPEC
5196 && XINT (PATTERN (prev
), 1) == 23)
5198 emit_insn_before (insn
, before
);
5202 /* Generate a nop insn of the given type. Note we never generate L type
5212 return gen_nop_m ();
5214 return gen_nop_i ();
5216 return gen_nop_b ();
5218 return gen_nop_f ();
5220 return gen_nop_x ();
5227 /* When rotating a bundle out of the issue window, insert a bundle selector
5228 insn in front of it. DUMP is the scheduling dump file or NULL. START
5229 is either 0 or 3, depending on whether we want to emit a bundle selector
5230 for the first bundle or the second bundle in the current issue window.
5232 The selector insns are emitted this late because the selected packet can
5233 be changed until parts of it get rotated out. */
5236 finish_last_head (dump
, start
)
5240 const struct ia64_packet
*p
= sched_data
.packet
;
5241 const struct bundle
*b
= start
== 0 ? p
->t1
: p
->t2
;
5242 int bundle_type
= b
- bundle
;
5246 if (! ia64_final_schedule
)
5249 for (i
= start
; sched_data
.insns
[i
] == 0; i
++)
5252 insn
= sched_data
.insns
[i
];
5255 fprintf (dump
, "// Emitting template before %d: %s\n",
5256 INSN_UID (insn
), b
->name
);
5258 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type
)), insn
);
5261 /* We can't schedule more insns this cycle. Fix up the scheduling state
5262 and advance FIRST_SLOT and CUR.
5263 We have to distribute the insns that are currently found between
5264 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5265 far, they are stored successively in the fields starting at FIRST_SLOT;
5266 now they must be moved to the correct slots.
5267 DUMP is the current scheduling dump file, or NULL. */
5270 cycle_end_fill_slots (dump
)
5273 const struct ia64_packet
*packet
= sched_data
.packet
;
5275 enum attr_type tmp_types
[6];
5278 memcpy (tmp_types
, sched_data
.types
, 6 * sizeof (enum attr_type
));
5279 memcpy (tmp_insns
, sched_data
.insns
, 6 * sizeof (rtx
));
5281 for (i
= slot
= sched_data
.first_slot
; i
< sched_data
.cur
; i
++)
5283 enum attr_type t
= tmp_types
[i
];
5284 if (t
!= ia64_safe_type (tmp_insns
[i
]))
5286 while (! insn_matches_slot (packet
, t
, slot
, tmp_insns
[i
]))
5288 if (slot
> sched_data
.split
)
5291 fprintf (dump
, "// Packet needs %s, have %s\n", type_names
[packet
->t
[slot
]],
5293 sched_data
.types
[slot
] = packet
->t
[slot
];
5294 sched_data
.insns
[slot
] = 0;
5295 sched_data
.stopbit
[slot
] = 0;
5298 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5299 actual slot type later. */
5300 sched_data
.types
[slot
] = packet
->t
[slot
];
5301 sched_data
.insns
[slot
] = tmp_insns
[i
];
5302 sched_data
.stopbit
[slot
] = 0;
5306 /* This isn't right - there's no need to pad out until the forced split;
5307 the CPU will automatically split if an insn isn't ready. */
5309 while (slot
< sched_data
.split
)
5311 sched_data
.types
[slot
] = packet
->t
[slot
];
5312 sched_data
.insns
[slot
] = 0;
5313 sched_data
.stopbit
[slot
] = 0;
5318 sched_data
.first_slot
= sched_data
.cur
= slot
;
5321 /* Bundle rotations, as described in the Itanium optimization manual.
5322 We can rotate either one or both bundles out of the issue window.
5323 DUMP is the current scheduling dump file, or NULL. */
5326 rotate_one_bundle (dump
)
5330 fprintf (dump
, "// Rotating one bundle.\n");
5332 finish_last_head (dump
, 0);
5333 if (sched_data
.cur
> 3)
5335 sched_data
.cur
-= 3;
5336 sched_data
.first_slot
-= 3;
5337 memmove (sched_data
.types
,
5338 sched_data
.types
+ 3,
5339 sched_data
.cur
* sizeof *sched_data
.types
);
5340 memmove (sched_data
.stopbit
,
5341 sched_data
.stopbit
+ 3,
5342 sched_data
.cur
* sizeof *sched_data
.stopbit
);
5343 memmove (sched_data
.insns
,
5344 sched_data
.insns
+ 3,
5345 sched_data
.cur
* sizeof *sched_data
.insns
);
5350 sched_data
.first_slot
= 0;
5355 rotate_two_bundles (dump
)
5359 fprintf (dump
, "// Rotating two bundles.\n");
5361 if (sched_data
.cur
== 0)
5364 finish_last_head (dump
, 0);
5365 if (sched_data
.cur
> 3)
5366 finish_last_head (dump
, 3);
5368 sched_data
.first_slot
= 0;
5371 /* We're beginning a new block. Initialize data structures as necessary. */
5374 ia64_sched_init (dump
, sched_verbose
, max_ready
)
5375 FILE *dump ATTRIBUTE_UNUSED
;
5376 int sched_verbose ATTRIBUTE_UNUSED
;
5379 static int initialized
= 0;
5387 for (i
= b1
= 0; b1
< NR_BUNDLES
; b1
++)
5389 const struct bundle
*t1
= bundle
+ b1
;
5390 for (b2
= 0; b2
< NR_BUNDLES
; b2
++, i
++)
5392 const struct bundle
*t2
= bundle
+ b2
;
5398 for (i
= 0; i
< NR_PACKETS
; i
++)
5401 for (j
= 0; j
< 3; j
++)
5402 packets
[i
].t
[j
] = packets
[i
].t1
->t
[j
];
5403 for (j
= 0; j
< 3; j
++)
5404 packets
[i
].t
[j
+ 3] = packets
[i
].t2
->t
[j
];
5405 packets
[i
].first_split
= itanium_split_issue (packets
+ i
, 0);
5410 init_insn_group_barriers ();
5412 memset (&sched_data
, 0, sizeof sched_data
);
5413 sched_types
= (enum attr_type
*) xmalloc (max_ready
5414 * sizeof (enum attr_type
));
5415 sched_ready
= (rtx
*) xmalloc (max_ready
* sizeof (rtx
));
5418 /* See if the packet P can match the insns we have already scheduled. Return
5419 nonzero if so. In *PSLOT, we store the first slot that is available for
5420 more instructions if we choose this packet.
5421 SPLIT holds the last slot we can use, there's a split issue after it so
5422 scheduling beyond it would cause us to use more than one cycle. */
5425 packet_matches_p (p
, split
, pslot
)
5426 const struct ia64_packet
*p
;
5430 int filled
= sched_data
.cur
;
5431 int first
= sched_data
.first_slot
;
5434 /* First, check if the first of the two bundles must be a specific one (due
5436 if (first
> 0 && sched_data
.stopbit
[0] && p
->t1
->possible_stop
!= 1)
5438 if (first
> 1 && sched_data
.stopbit
[1] && p
->t1
->possible_stop
!= 2)
5441 for (i
= 0; i
< first
; i
++)
5442 if (! insn_matches_slot (p
, sched_data
.types
[i
], i
,
5443 sched_data
.insns
[i
]))
5445 for (i
= slot
= first
; i
< filled
; i
++)
5447 while (slot
< split
)
5449 if (insn_matches_slot (p
, sched_data
.types
[i
], slot
,
5450 sched_data
.insns
[i
]))
5464 /* A frontend for itanium_split_issue. For a packet P and a slot
5465 number FIRST that describes the start of the current clock cycle,
5466 return the slot number of the first split issue. This function
5467 uses the cached number found in P if possible. */
5470 get_split (p
, first
)
5471 const struct ia64_packet
*p
;
5475 return p
->first_split
;
5476 return itanium_split_issue (p
, first
);
5479 /* Given N_READY insns in the array READY, whose types are found in the
5480 corresponding array TYPES, return the insn that is best suited to be
5481 scheduled in slot SLOT of packet P. */
5484 find_best_insn (ready
, types
, n_ready
, p
, slot
)
5486 enum attr_type
*types
;
5488 const struct ia64_packet
*p
;
5493 while (n_ready
-- > 0)
5495 rtx insn
= ready
[n_ready
];
5498 if (best
>= 0 && INSN_PRIORITY (ready
[n_ready
]) < best_pri
)
5500 /* If we have equally good insns, one of which has a stricter
5501 slot requirement, prefer the one with the stricter requirement. */
5502 if (best
>= 0 && types
[n_ready
] == TYPE_A
)
5504 if (insn_matches_slot (p
, types
[n_ready
], slot
, insn
))
5507 best_pri
= INSN_PRIORITY (ready
[best
]);
5509 /* If there's no way we could get a stricter requirement, stop
5511 if (types
[n_ready
] != TYPE_A
5512 && ia64_safe_itanium_requires_unit0 (ready
[n_ready
]))
5520 /* Select the best packet to use given the current scheduler state and the
5522 READY is an array holding N_READY ready insns; TYPES is a corresponding
5523 array that holds their types. Store the best packet in *PPACKET and the
5524 number of insns that can be scheduled in the current cycle in *PBEST. */
5527 find_best_packet (pbest
, ppacket
, ready
, types
, n_ready
)
5529 const struct ia64_packet
**ppacket
;
5531 enum attr_type
*types
;
5534 int first
= sched_data
.first_slot
;
5537 const struct ia64_packet
*best_packet
= NULL
;
5540 for (i
= 0; i
< NR_PACKETS
; i
++)
5542 const struct ia64_packet
*p
= packets
+ i
;
5544 int split
= get_split (p
, first
);
5546 int first_slot
, last_slot
;
5549 if (! packet_matches_p (p
, split
, &first_slot
))
5552 memcpy (sched_ready
, ready
, n_ready
* sizeof (rtx
));
5556 for (slot
= first_slot
; slot
< split
; slot
++)
5560 /* Disallow a degenerate case where the first bundle doesn't
5561 contain anything but NOPs! */
5562 if (first_slot
== 0 && win
== 0 && slot
== 3)
5568 insn_nr
= find_best_insn (sched_ready
, types
, n_ready
, p
, slot
);
5571 sched_ready
[insn_nr
] = 0;
5575 else if (p
->t
[slot
] == TYPE_B
)
5578 /* We must disallow MBB/BBB packets if any of their B slots would be
5579 filled with nops. */
5582 if (p
->t
[1] == TYPE_B
&& (b_nops
|| last_slot
< 2))
5587 if (p
->t
[4] == TYPE_B
&& (b_nops
|| last_slot
< 5))
5592 || (win
== best
&& last_slot
< lowest_end
))
5595 lowest_end
= last_slot
;
5600 *ppacket
= best_packet
;
5603 /* Reorder the ready list so that the insns that can be issued in this cycle
5604 are found in the correct order at the end of the list.
5605 DUMP is the scheduling dump file, or NULL. READY points to the start,
5606 E_READY to the end of the ready list. MAY_FAIL determines what should be
5607 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5608 otherwise we return 0.
5609 Return 1 if any insns can be scheduled in this cycle. */
5612 itanium_reorder (dump
, ready
, e_ready
, may_fail
)
5618 const struct ia64_packet
*best_packet
;
5619 int n_ready
= e_ready
- ready
;
5620 int first
= sched_data
.first_slot
;
5621 int i
, best
, best_split
, filled
;
5623 for (i
= 0; i
< n_ready
; i
++)
5624 sched_types
[i
] = ia64_safe_type (ready
[i
]);
5626 find_best_packet (&best
, &best_packet
, ready
, sched_types
, n_ready
);
5637 fprintf (dump
, "// Selected bundles: %s %s (%d insns)\n",
5638 best_packet
->t1
->name
,
5639 best_packet
->t2
? best_packet
->t2
->name
: NULL
, best
);
5642 best_split
= itanium_split_issue (best_packet
, first
);
5643 packet_matches_p (best_packet
, best_split
, &filled
);
5645 for (i
= filled
; i
< best_split
; i
++)
5649 insn_nr
= find_best_insn (ready
, sched_types
, n_ready
, best_packet
, i
);
5652 rtx insn
= ready
[insn_nr
];
5653 memmove (ready
+ insn_nr
, ready
+ insn_nr
+ 1,
5654 (n_ready
- insn_nr
- 1) * sizeof (rtx
));
5655 memmove (sched_types
+ insn_nr
, sched_types
+ insn_nr
+ 1,
5656 (n_ready
- insn_nr
- 1) * sizeof (enum attr_type
));
5657 ready
[--n_ready
] = insn
;
5661 sched_data
.packet
= best_packet
;
5662 sched_data
.split
= best_split
;
5666 /* Dump information about the current scheduling state to file DUMP. */
5669 dump_current_packet (dump
)
5673 fprintf (dump
, "// %d slots filled:", sched_data
.cur
);
5674 for (i
= 0; i
< sched_data
.first_slot
; i
++)
5676 rtx insn
= sched_data
.insns
[i
];
5677 fprintf (dump
, " %s", type_names
[sched_data
.types
[i
]]);
5679 fprintf (dump
, "/%s", type_names
[ia64_safe_type (insn
)]);
5680 if (sched_data
.stopbit
[i
])
5681 fprintf (dump
, " ;;");
5683 fprintf (dump
, " :::");
5684 for (i
= sched_data
.first_slot
; i
< sched_data
.cur
; i
++)
5686 rtx insn
= sched_data
.insns
[i
];
5687 enum attr_type t
= ia64_safe_type (insn
);
5688 fprintf (dump
, " (%d) %s", INSN_UID (insn
), type_names
[t
]);
5690 fprintf (dump
, "\n");
5693 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
5697 schedule_stop (dump
)
5700 const struct ia64_packet
*best
= sched_data
.packet
;
5705 fprintf (dump
, "// Stop bit, cur = %d.\n", sched_data
.cur
);
5707 if (sched_data
.cur
== 0)
5710 fprintf (dump
, "// At start of bundle, so nothing to do.\n");
5712 rotate_two_bundles (NULL
);
5716 for (i
= -1; i
< NR_PACKETS
; i
++)
5718 /* This is a slight hack to give the current packet the first chance.
5719 This is done to avoid e.g. switching from MIB to MBB bundles. */
5720 const struct ia64_packet
*p
= (i
>= 0 ? packets
+ i
: sched_data
.packet
);
5721 int split
= get_split (p
, sched_data
.first_slot
);
5722 const struct bundle
*compare
;
5725 if (! packet_matches_p (p
, split
, &next
))
5728 compare
= next
> 3 ? p
->t2
: p
->t1
;
5731 if (compare
->possible_stop
)
5732 stoppos
= compare
->possible_stop
;
5736 if (stoppos
< next
|| stoppos
>= best_stop
)
5738 if (compare
->possible_stop
== 0)
5740 stoppos
= (next
> 3 ? 6 : 3);
5742 if (stoppos
< next
|| stoppos
>= best_stop
)
5746 fprintf (dump
, "// switching from %s %s to %s %s (stop at %d)\n",
5747 best
->t1
->name
, best
->t2
->name
, p
->t1
->name
, p
->t2
->name
,
5750 best_stop
= stoppos
;
5754 sched_data
.packet
= best
;
5755 cycle_end_fill_slots (dump
);
5756 while (sched_data
.cur
< best_stop
)
5758 sched_data
.types
[sched_data
.cur
] = best
->t
[sched_data
.cur
];
5759 sched_data
.insns
[sched_data
.cur
] = 0;
5760 sched_data
.stopbit
[sched_data
.cur
] = 0;
5763 sched_data
.stopbit
[sched_data
.cur
- 1] = 1;
5764 sched_data
.first_slot
= best_stop
;
5767 dump_current_packet (dump
);
5770 /* If necessary, perform one or two rotations on the scheduling state.
5771 This should only be called if we are starting a new cycle. */
5777 if (sched_data
.cur
== 6)
5778 rotate_two_bundles (dump
);
5779 else if (sched_data
.cur
>= 3)
5780 rotate_one_bundle (dump
);
5781 sched_data
.first_slot
= sched_data
.cur
;
5784 /* The clock cycle when ia64_sched_reorder was last called. */
5785 static int prev_cycle
;
5787 /* The first insn scheduled in the previous cycle. This is the saved
5788 value of sched_data.first_slot. */
5789 static int prev_first
;
5791 /* The last insn that has been scheduled. At the start of a new cycle
5792 we know that we can emit new insns after it; the main scheduling code
5793 has already emitted a cycle_display insn after it and is using that
5794 as its current last insn. */
5795 static rtx last_issued
;
5797 /* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
5798 pad out the delay between MM (shifts, etc.) and integer operations. */
5801 nop_cycles_until (clock_var
, dump
)
5805 int prev_clock
= prev_cycle
;
5806 int cycles_left
= clock_var
- prev_clock
;
5808 /* Finish the previous cycle; pad it out with NOPs. */
5809 if (sched_data
.cur
== 3)
5811 rtx t
= gen_insn_group_barrier (GEN_INT (3));
5812 last_issued
= emit_insn_after (t
, last_issued
);
5813 maybe_rotate (dump
);
5815 else if (sched_data
.cur
> 0)
5818 int split
= itanium_split_issue (sched_data
.packet
, prev_first
);
5820 if (sched_data
.cur
< 3 && split
> 3)
5826 if (split
> sched_data
.cur
)
5829 for (i
= sched_data
.cur
; i
< split
; i
++)
5833 t
= gen_nop_type (sched_data
.packet
->t
[i
]);
5834 last_issued
= emit_insn_after (t
, last_issued
);
5835 sched_data
.types
[i
] = sched_data
.packet
->t
[sched_data
.cur
];
5836 sched_data
.insns
[i
] = last_issued
;
5837 sched_data
.stopbit
[i
] = 0;
5839 sched_data
.cur
= split
;
5842 if (! need_stop
&& sched_data
.cur
> 0 && sched_data
.cur
< 6
5846 for (i
= sched_data
.cur
; i
< 6; i
++)
5850 t
= gen_nop_type (sched_data
.packet
->t
[i
]);
5851 last_issued
= emit_insn_after (t
, last_issued
);
5852 sched_data
.types
[i
] = sched_data
.packet
->t
[sched_data
.cur
];
5853 sched_data
.insns
[i
] = last_issued
;
5854 sched_data
.stopbit
[i
] = 0;
5861 if (need_stop
|| sched_data
.cur
== 6)
5863 rtx t
= gen_insn_group_barrier (GEN_INT (3));
5864 last_issued
= emit_insn_after (t
, last_issued
);
5866 maybe_rotate (dump
);
5870 while (cycles_left
> 0)
5872 rtx t
= gen_bundle_selector (GEN_INT (0));
5873 last_issued
= emit_insn_after (t
, last_issued
);
5874 t
= gen_nop_type (TYPE_M
);
5875 last_issued
= emit_insn_after (t
, last_issued
);
5876 t
= gen_nop_type (TYPE_I
);
5877 last_issued
= emit_insn_after (t
, last_issued
);
5878 if (cycles_left
> 1)
5880 t
= gen_insn_group_barrier (GEN_INT (2));
5881 last_issued
= emit_insn_after (t
, last_issued
);
5884 t
= gen_nop_type (TYPE_I
);
5885 last_issued
= emit_insn_after (t
, last_issued
);
5886 t
= gen_insn_group_barrier (GEN_INT (3));
5887 last_issued
= emit_insn_after (t
, last_issued
);
5892 /* We are about to being issuing insns for this clock cycle.
5893 Override the default sort algorithm to better slot instructions. */
5896 ia64_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
,
5897 reorder_type
, clock_var
)
5898 FILE *dump ATTRIBUTE_UNUSED
;
5899 int sched_verbose ATTRIBUTE_UNUSED
;
5902 int reorder_type
, clock_var
;
5904 int n_ready
= *pn_ready
;
5905 rtx
*e_ready
= ready
+ n_ready
;
5911 fprintf (dump
, "// ia64_sched_reorder (type %d):\n", reorder_type
);
5912 dump_current_packet (dump
);
5915 if (reorder_type
== 0 && clock_var
> 0 && ia64_final_schedule
)
5917 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
5920 enum attr_itanium_class t
= ia64_safe_itanium_class (insn
);
5921 if (t
== ITANIUM_CLASS_IALU
|| t
== ITANIUM_CLASS_ISHF
5922 || t
== ITANIUM_CLASS_ILOG
5923 || t
== ITANIUM_CLASS_LD
|| t
== ITANIUM_CLASS_ST
)
5926 for (link
= LOG_LINKS (insn
); link
; link
= XEXP (link
, 1))
5927 if (REG_NOTE_KIND (link
) != REG_DEP_OUTPUT
5928 && REG_NOTE_KIND (link
) != REG_DEP_ANTI
)
5930 rtx other
= XEXP (link
, 0);
5931 enum attr_itanium_class t0
= ia64_safe_itanium_class (other
);
5932 if (t0
== ITANIUM_CLASS_MMSHF
5933 || t0
== ITANIUM_CLASS_MMMUL
)
5935 nop_cycles_until (clock_var
, sched_verbose
? dump
: NULL
);
5944 prev_first
= sched_data
.first_slot
;
5945 prev_cycle
= clock_var
;
5947 if (reorder_type
== 0)
5948 maybe_rotate (sched_verbose
? dump
: NULL
);
5950 /* First, move all USEs, CLOBBERs and other crud out of the way. */
5951 highest
= ready
[n_ready
- 1];
5952 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
5953 if (insnp
< e_ready
)
5956 enum attr_type t
= ia64_safe_type (insn
);
5957 if (t
== TYPE_UNKNOWN
)
5959 highest
= ready
[n_ready
- 1];
5960 ready
[n_ready
- 1] = insn
;
5962 if (ia64_final_schedule
&& group_barrier_needed_p (insn
))
5964 schedule_stop (sched_verbose
? dump
: NULL
);
5965 sched_data
.last_was_stop
= 1;
5966 maybe_rotate (sched_verbose
? dump
: NULL
);
5968 else if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
5969 || asm_noperands (PATTERN (insn
)) >= 0)
5971 /* It must be an asm of some kind. */
5972 cycle_end_fill_slots (sched_verbose
? dump
: NULL
);
5978 if (ia64_final_schedule
)
5980 int nr_need_stop
= 0;
5982 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
5983 if (safe_group_barrier_needed_p (*insnp
))
5986 /* Schedule a stop bit if
5987 - all insns require a stop bit, or
5988 - we are starting a new cycle and _any_ insns require a stop bit.
5989 The reason for the latter is that if our schedule is accurate, then
5990 the additional stop won't decrease performance at this point (since
5991 there's a split issue at this point anyway), but it gives us more
5992 freedom when scheduling the currently ready insns. */
5993 if ((reorder_type
== 0 && nr_need_stop
)
5994 || (reorder_type
== 1 && n_ready
== nr_need_stop
))
5996 schedule_stop (sched_verbose
? dump
: NULL
);
5997 sched_data
.last_was_stop
= 1;
5998 maybe_rotate (sched_verbose
? dump
: NULL
);
5999 if (reorder_type
== 1)
6006 /* Move down everything that needs a stop bit, preserving relative
6008 while (insnp
-- > ready
+ deleted
)
6009 while (insnp
>= ready
+ deleted
)
6012 if (! safe_group_barrier_needed_p (insn
))
6014 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
6020 if (deleted
!= nr_need_stop
)
6025 return itanium_reorder (sched_verbose
? dump
: NULL
,
6026 ready
, e_ready
, reorder_type
== 1);
6029 /* Like ia64_sched_reorder, but called after issuing each insn.
6030 Override the default sort algorithm to better slot instructions. */
6033 ia64_sched_reorder2 (dump
, sched_verbose
, ready
, pn_ready
, clock_var
)
6034 FILE *dump ATTRIBUTE_UNUSED
;
6035 int sched_verbose ATTRIBUTE_UNUSED
;
6040 if (sched_data
.last_was_stop
)
6043 /* Detect one special case and try to optimize it.
6044 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6045 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6046 if (sched_data
.first_slot
== 1
6047 && sched_data
.stopbit
[0]
6048 && ((sched_data
.cur
== 4
6049 && (sched_data
.types
[1] == TYPE_M
|| sched_data
.types
[1] == TYPE_A
)
6050 && (sched_data
.types
[2] == TYPE_I
|| sched_data
.types
[2] == TYPE_A
)
6051 && (sched_data
.types
[3] != TYPE_M
&& sched_data
.types
[3] != TYPE_A
))
6052 || (sched_data
.cur
== 3
6053 && (sched_data
.types
[1] == TYPE_M
|| sched_data
.types
[1] == TYPE_A
)
6054 && (sched_data
.types
[2] != TYPE_M
&& sched_data
.types
[2] != TYPE_I
6055 && sched_data
.types
[2] != TYPE_A
))))
6059 rtx stop
= PREV_INSN (sched_data
.insns
[1]);
6062 sched_data
.stopbit
[0] = 0;
6063 sched_data
.stopbit
[2] = 1;
6064 if (GET_CODE (stop
) != INSN
)
6067 pat
= PATTERN (stop
);
6068 /* Ignore cycle displays. */
6069 if (GET_CODE (pat
) == UNSPEC
&& XINT (pat
, 1) == 23)
6070 stop
= PREV_INSN (stop
);
6071 pat
= PATTERN (stop
);
6072 if (GET_CODE (pat
) != UNSPEC_VOLATILE
6073 || XINT (pat
, 1) != 2
6074 || INTVAL (XVECEXP (pat
, 0, 0)) != 1)
6076 XVECEXP (pat
, 0, 0) = GEN_INT (3);
6078 sched_data
.types
[5] = sched_data
.types
[3];
6079 sched_data
.types
[4] = sched_data
.types
[2];
6080 sched_data
.types
[3] = sched_data
.types
[1];
6081 sched_data
.insns
[5] = sched_data
.insns
[3];
6082 sched_data
.insns
[4] = sched_data
.insns
[2];
6083 sched_data
.insns
[3] = sched_data
.insns
[1];
6084 sched_data
.stopbit
[5] = sched_data
.stopbit
[4] = sched_data
.stopbit
[3] = 0;
6085 sched_data
.cur
+= 2;
6086 sched_data
.first_slot
= 3;
6087 for (i
= 0; i
< NR_PACKETS
; i
++)
6089 const struct ia64_packet
*p
= packets
+ i
;
6090 if (p
->t
[0] == TYPE_M
&& p
->t
[1] == TYPE_F
&& p
->t
[2] == TYPE_B
)
6092 sched_data
.packet
= p
;
6096 rotate_one_bundle (sched_verbose
? dump
: NULL
);
6099 for (i
= 0; i
< NR_PACKETS
; i
++)
6101 const struct ia64_packet
*p
= packets
+ i
;
6102 int split
= get_split (p
, sched_data
.first_slot
);
6105 /* Disallow multiway branches here. */
6106 if (p
->t
[1] == TYPE_B
)
6109 if (packet_matches_p (p
, split
, &next
) && next
< best
)
6112 sched_data
.packet
= p
;
6113 sched_data
.split
= split
;
6122 int more
= ia64_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
, 1,
6126 /* Did we schedule a stop? If so, finish this cycle. */
6127 if (sched_data
.cur
== sched_data
.first_slot
)
6132 fprintf (dump
, "// Can't issue more this cycle; updating type array.\n");
6134 cycle_end_fill_slots (sched_verbose
? dump
: NULL
);
6136 dump_current_packet (dump
);
6140 /* We are about to issue INSN. Return the number of insns left on the
6141 ready queue that can be issued this cycle. */
6144 ia64_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
6148 int can_issue_more ATTRIBUTE_UNUSED
;
6150 enum attr_type t
= ia64_safe_type (insn
);
6154 if (sched_data
.last_was_stop
)
6156 int t
= sched_data
.first_slot
;
6159 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t
)), insn
);
6160 init_insn_group_barriers ();
6161 sched_data
.last_was_stop
= 0;
6164 if (t
== TYPE_UNKNOWN
)
6167 fprintf (dump
, "// Ignoring type %s\n", type_names
[t
]);
6168 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6169 || asm_noperands (PATTERN (insn
)) >= 0)
6171 /* This must be some kind of asm. Clear the scheduling state. */
6172 rotate_two_bundles (sched_verbose
? dump
: NULL
);
6173 if (ia64_final_schedule
)
6174 group_barrier_needed_p (insn
);
6179 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6180 important state info. Don't delete this test. */
6181 if (ia64_final_schedule
6182 && group_barrier_needed_p (insn
))
6185 sched_data
.stopbit
[sched_data
.cur
] = 0;
6186 sched_data
.insns
[sched_data
.cur
] = insn
;
6187 sched_data
.types
[sched_data
.cur
] = t
;
6191 fprintf (dump
, "// Scheduling insn %d of type %s\n",
6192 INSN_UID (insn
), type_names
[t
]);
6194 if (GET_CODE (insn
) == CALL_INSN
&& ia64_final_schedule
)
6196 schedule_stop (sched_verbose
? dump
: NULL
);
6197 sched_data
.last_was_stop
= 1;
6203 /* Free data allocated by ia64_sched_init. */
6206 ia64_sched_finish (dump
, sched_verbose
)
6211 fprintf (dump
, "// Finishing schedule.\n");
6212 rotate_two_bundles (NULL
);
6217 /* Emit pseudo-ops for the assembler to describe predicate relations.
6218 At present this assumes that we only consider predicate pairs to
6219 be mutex, and that the assembler can deduce proper values from
6220 straight-line code. */
6223 emit_predicate_relation_info ()
6227 for (i
= n_basic_blocks
- 1; i
>= 0; --i
)
6229 basic_block bb
= BASIC_BLOCK (i
);
6231 rtx head
= bb
->head
;
6233 /* We only need such notes at code labels. */
6234 if (GET_CODE (head
) != CODE_LABEL
)
6236 if (GET_CODE (NEXT_INSN (head
)) == NOTE
6237 && NOTE_LINE_NUMBER (NEXT_INSN (head
)) == NOTE_INSN_BASIC_BLOCK
)
6238 head
= NEXT_INSN (head
);
6240 for (r
= PR_REG (0); r
< PR_REG (64); r
+= 2)
6241 if (REGNO_REG_SET_P (bb
->global_live_at_start
, r
))
6243 rtx p
= gen_rtx_REG (BImode
, r
);
6244 rtx n
= emit_insn_after (gen_pred_rel_mutex (p
), head
);
6245 if (head
== bb
->end
)
6251 /* Look for conditional calls that do not return, and protect predicate
6252 relations around them. Otherwise the assembler will assume the call
6253 returns, and complain about uses of call-clobbered predicates after
6255 for (i
= n_basic_blocks
- 1; i
>= 0; --i
)
6257 basic_block bb
= BASIC_BLOCK (i
);
6258 rtx insn
= bb
->head
;
6262 if (GET_CODE (insn
) == CALL_INSN
6263 && GET_CODE (PATTERN (insn
)) == COND_EXEC
6264 && find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
6266 rtx b
= emit_insn_before (gen_safe_across_calls_all (), insn
);
6267 rtx a
= emit_insn_after (gen_safe_across_calls_normal (), insn
);
6268 if (bb
->head
== insn
)
6270 if (bb
->end
== insn
)
6274 if (insn
== bb
->end
)
6276 insn
= NEXT_INSN (insn
);
6281 /* Generate a NOP instruction of type T. We will never generate L type
6291 return gen_nop_m ();
6293 return gen_nop_i ();
6295 return gen_nop_b ();
6297 return gen_nop_f ();
6299 return gen_nop_x ();
6305 /* After the last scheduling pass, fill in NOPs. It's easier to do this
6306 here than while scheduling. */
6312 const struct bundle
*b
= 0;
6315 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6319 pat
= INSN_P (insn
) ? PATTERN (insn
) : const0_rtx
;
6320 if (GET_CODE (pat
) == USE
|| GET_CODE (pat
) == CLOBBER
)
6322 if ((GET_CODE (pat
) == UNSPEC
&& XINT (pat
, 1) == 22)
6323 || GET_CODE (insn
) == CODE_LABEL
)
6326 while (bundle_pos
< 3)
6328 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6331 if (GET_CODE (insn
) != CODE_LABEL
)
6332 b
= bundle
+ INTVAL (XVECEXP (pat
, 0, 0));
6338 else if (GET_CODE (pat
) == UNSPEC_VOLATILE
&& XINT (pat
, 1) == 2)
6340 int t
= INTVAL (XVECEXP (pat
, 0, 0));
6342 while (bundle_pos
< t
)
6344 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6350 if (bundle_pos
== 3)
6353 if (b
&& INSN_P (insn
))
6355 t
= ia64_safe_type (insn
);
6356 if (asm_noperands (PATTERN (insn
)) >= 0
6357 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)
6359 while (bundle_pos
< 3)
6361 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6367 if (t
== TYPE_UNKNOWN
)
6369 while (bundle_pos
< 3)
6371 if (t
== b
->t
[bundle_pos
]
6372 || (t
== TYPE_A
&& (b
->t
[bundle_pos
] == TYPE_M
6373 || b
->t
[bundle_pos
] == TYPE_I
)))
6376 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6385 /* Perform machine dependent operations on the rtl chain INSNS. */
6391 /* If optimizing, we'll have split before scheduling. */
6393 split_all_insns (0);
6395 /* Make sure the CFG and global_live_at_start are correct
6396 for emit_predicate_relation_info. */
6397 find_basic_blocks (insns
, max_reg_num (), NULL
);
6398 life_analysis (insns
, NULL
, PROP_DEATH_NOTES
);
6400 if (ia64_flag_schedule_insns2
)
6402 ia64_final_schedule
= 1;
6403 schedule_ebbs (rtl_dump_file
);
6404 ia64_final_schedule
= 0;
6406 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6407 place as they were during scheduling. */
6408 emit_insn_group_barriers (rtl_dump_file
, insns
);
6412 emit_all_insn_group_barriers (rtl_dump_file
, insns
);
6415 emit_predicate_relation_info ();
6418 /* Return true if REGNO is used by the epilogue. */
6421 ia64_epilogue_uses (regno
)
6424 /* When a function makes a call through a function descriptor, we
6425 will write a (potentially) new value to "gp". After returning
6426 from such a call, we need to make sure the function restores the
6427 original gp-value, even if the function itself does not use the
6429 if (regno
== R_GR (1)
6431 && !(TARGET_AUTO_PIC
|| TARGET_NO_PIC
))
6434 /* For functions defined with the syscall_linkage attribute, all input
6435 registers are marked as live at all function exits. This prevents the
6436 register allocator from using the input registers, which in turn makes it
6437 possible to restart a system call after an interrupt without having to
6438 save/restore the input registers. This also prevents kernel data from
6439 leaking to application code. */
6441 if (IN_REGNO_P (regno
)
6442 && lookup_attribute ("syscall_linkage",
6443 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
6446 /* Conditional return patterns can't represent the use of `b0' as
6447 the return address, so we force the value live this way. */
6448 if (regno
== R_BR (0))
6451 if (regs_ever_live
[AR_LC_REGNUM
] && regno
== AR_LC_REGNUM
)
6453 if (! current_function_is_leaf
&& regno
== AR_PFS_REGNUM
)
6455 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
6456 && regno
== AR_UNAT_REGNUM
)
6462 /* Return true if IDENTIFIER is a valid attribute for TYPE. */
6465 ia64_valid_type_attribute (type
, attributes
, identifier
, args
)
6467 tree attributes ATTRIBUTE_UNUSED
;
6471 /* We only support an attribute for function calls. */
6473 if (TREE_CODE (type
) != FUNCTION_TYPE
6474 && TREE_CODE (type
) != METHOD_TYPE
)
6477 /* The "syscall_linkage" attribute says the callee is a system call entry
6478 point. This affects ia64_epilogue_uses. */
6480 if (is_attribute_p ("syscall_linkage", identifier
))
6481 return args
== NULL_TREE
;
6486 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6488 We add @ to the name if this goes in small data/bss. We can only put
6489 a variable in small data/bss if it is defined in this module or a module
6490 that we are statically linked with. We can't check the second condition,
6491 but TREE_STATIC gives us the first one. */
6493 /* ??? If we had IPA, we could check the second condition. We could support
6494 programmer added section attributes if the variable is not defined in this
6497 /* ??? See the v850 port for a cleaner way to do this. */
6499 /* ??? We could also support own long data here. Generating movl/add/ld8
6500 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6501 code faster because there is one less load. This also includes incomplete
6502 types which can't go in sdata/sbss. */
6504 /* ??? See select_section. We must put short own readonly variables in
6505 sdata/sbss instead of the more natural rodata, because we can't perform
6506 the DECL_READONLY_SECTION test here. */
6508 extern struct obstack
* saveable_obstack
;
6511 ia64_encode_section_info (decl
)
6514 const char *symbol_str
;
6516 if (TREE_CODE (decl
) == FUNCTION_DECL
)
6518 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl
), 0)) = 1;
6522 /* Careful not to prod global register variables. */
6523 if (TREE_CODE (decl
) != VAR_DECL
6524 || GET_CODE (DECL_RTL (decl
)) != MEM
6525 || GET_CODE (XEXP (DECL_RTL (decl
), 0)) != SYMBOL_REF
)
6528 symbol_str
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
6530 /* We assume that -fpic is used only to create a shared library (dso).
6531 With -fpic, no global data can ever be sdata.
6532 Without -fpic, global common uninitialized data can never be sdata, since
6533 it can unify with a real definition in a dso. */
6534 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
6535 to access them. The linker may then be able to do linker relaxation to
6536 optimize references to them. Currently sdata implies use of gprel. */
6537 /* We need the DECL_EXTERNAL check for C++. static class data members get
6538 both TREE_STATIC and DECL_EXTERNAL set, to indicate that they are
6539 statically allocated, but the space is allocated somewhere else. Such
6540 decls can not be own data. */
6541 if (! TARGET_NO_SDATA
6542 && TREE_STATIC (decl
) && ! DECL_EXTERNAL (decl
)
6543 && ! (DECL_ONE_ONLY (decl
) || DECL_WEAK (decl
))
6544 && ! (TREE_PUBLIC (decl
)
6546 || (DECL_COMMON (decl
)
6547 && (DECL_INITIAL (decl
) == 0
6548 || DECL_INITIAL (decl
) == error_mark_node
))))
6549 /* Either the variable must be declared without a section attribute,
6550 or the section must be sdata or sbss. */
6551 && (DECL_SECTION_NAME (decl
) == 0
6552 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl
)),
6554 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl
)),
6557 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (decl
));
6559 /* If the variable has already been defined in the output file, then it
6560 is too late to put it in sdata if it wasn't put there in the first
6561 place. The test is here rather than above, because if it is already
6562 in sdata, then it can stay there. */
6564 if (TREE_ASM_WRITTEN (decl
))
6567 /* If this is an incomplete type with size 0, then we can't put it in
6568 sdata because it might be too big when completed. */
6570 && size
<= (HOST_WIDE_INT
) ia64_section_threshold
6571 && symbol_str
[0] != SDATA_NAME_FLAG_CHAR
)
6573 size_t len
= strlen (symbol_str
);
6574 char *newstr
= alloca (len
+ 1);
6577 *newstr
= SDATA_NAME_FLAG_CHAR
;
6578 memcpy (newstr
+ 1, symbol_str
, len
+ 1);
6580 string
= ggc_alloc_string (newstr
, len
+ 1);
6581 XSTR (XEXP (DECL_RTL (decl
), 0), 0) = string
;
6584 /* This decl is marked as being in small data/bss but it shouldn't
6585 be; one likely explanation for this is that the decl has been
6586 moved into a different section from the one it was in when
6587 ENCODE_SECTION_INFO was first called. Remove the '@'.*/
6588 else if (symbol_str
[0] == SDATA_NAME_FLAG_CHAR
)
6590 XSTR (XEXP (DECL_RTL (decl
), 0), 0)
6591 = ggc_strdup (symbol_str
+ 1);
6595 /* Output assembly directives for prologue regions. */
6597 /* The current basic block number. */
6599 static int block_num
;
6601 /* True if we need a copy_state command at the start of the next block. */
6603 static int need_copy_state
;
6605 /* The function emits unwind directives for the start of an epilogue. */
6610 /* If this isn't the last block of the function, then we need to label the
6611 current state, and copy it back in at the start of the next block. */
6613 if (block_num
!= n_basic_blocks
- 1)
6615 fprintf (asm_out_file
, "\t.label_state 1\n");
6616 need_copy_state
= 1;
6619 fprintf (asm_out_file
, "\t.restore sp\n");
6622 /* This function processes a SET pattern looking for specific patterns
6623 which result in emitting an assembly directive required for unwinding. */
6626 process_set (asm_out_file
, pat
)
6630 rtx src
= SET_SRC (pat
);
6631 rtx dest
= SET_DEST (pat
);
6632 int src_regno
, dest_regno
;
6634 /* Look for the ALLOC insn. */
6635 if (GET_CODE (src
) == UNSPEC_VOLATILE
6636 && XINT (src
, 1) == 0
6637 && GET_CODE (dest
) == REG
)
6639 dest_regno
= REGNO (dest
);
6641 /* If this isn't the final destination for ar.pfs, the alloc
6642 shouldn't have been marked frame related. */
6643 if (dest_regno
!= current_frame_info
.reg_save_ar_pfs
)
6646 fprintf (asm_out_file
, "\t.save ar.pfs, r%d\n",
6647 ia64_dbx_register_number (dest_regno
));
6651 /* Look for SP = .... */
6652 if (GET_CODE (dest
) == REG
&& REGNO (dest
) == STACK_POINTER_REGNUM
)
6654 if (GET_CODE (src
) == PLUS
)
6656 rtx op0
= XEXP (src
, 0);
6657 rtx op1
= XEXP (src
, 1);
6658 if (op0
== dest
&& GET_CODE (op1
) == CONST_INT
)
6660 if (INTVAL (op1
) < 0)
6662 fputs ("\t.fframe ", asm_out_file
);
6663 fprintf (asm_out_file
, HOST_WIDE_INT_PRINT_DEC
,
6665 fputc ('\n', asm_out_file
);
6668 process_epilogue ();
6673 else if (GET_CODE (src
) == REG
6674 && REGNO (src
) == HARD_FRAME_POINTER_REGNUM
)
6675 process_epilogue ();
6682 /* Register move we need to look at. */
6683 if (GET_CODE (dest
) == REG
&& GET_CODE (src
) == REG
)
6685 src_regno
= REGNO (src
);
6686 dest_regno
= REGNO (dest
);
6691 /* Saving return address pointer. */
6692 if (dest_regno
!= current_frame_info
.reg_save_b0
)
6694 fprintf (asm_out_file
, "\t.save rp, r%d\n",
6695 ia64_dbx_register_number (dest_regno
));
6699 if (dest_regno
!= current_frame_info
.reg_save_pr
)
6701 fprintf (asm_out_file
, "\t.save pr, r%d\n",
6702 ia64_dbx_register_number (dest_regno
));
6705 case AR_UNAT_REGNUM
:
6706 if (dest_regno
!= current_frame_info
.reg_save_ar_unat
)
6708 fprintf (asm_out_file
, "\t.save ar.unat, r%d\n",
6709 ia64_dbx_register_number (dest_regno
));
6713 if (dest_regno
!= current_frame_info
.reg_save_ar_lc
)
6715 fprintf (asm_out_file
, "\t.save ar.lc, r%d\n",
6716 ia64_dbx_register_number (dest_regno
));
6719 case STACK_POINTER_REGNUM
:
6720 if (dest_regno
!= HARD_FRAME_POINTER_REGNUM
6721 || ! frame_pointer_needed
)
6723 fprintf (asm_out_file
, "\t.vframe r%d\n",
6724 ia64_dbx_register_number (dest_regno
));
6728 /* Everything else should indicate being stored to memory. */
6733 /* Memory store we need to look at. */
6734 if (GET_CODE (dest
) == MEM
&& GET_CODE (src
) == REG
)
6740 if (GET_CODE (XEXP (dest
, 0)) == REG
)
6742 base
= XEXP (dest
, 0);
6745 else if (GET_CODE (XEXP (dest
, 0)) == PLUS
6746 && GET_CODE (XEXP (XEXP (dest
, 0), 1)) == CONST_INT
)
6748 base
= XEXP (XEXP (dest
, 0), 0);
6749 off
= INTVAL (XEXP (XEXP (dest
, 0), 1));
6754 if (base
== hard_frame_pointer_rtx
)
6756 saveop
= ".savepsp";
6759 else if (base
== stack_pointer_rtx
)
6764 src_regno
= REGNO (src
);
6768 if (current_frame_info
.reg_save_b0
!= 0)
6770 fprintf (asm_out_file
, "\t%s rp, %ld\n", saveop
, off
);
6774 if (current_frame_info
.reg_save_pr
!= 0)
6776 fprintf (asm_out_file
, "\t%s pr, %ld\n", saveop
, off
);
6780 if (current_frame_info
.reg_save_ar_lc
!= 0)
6782 fprintf (asm_out_file
, "\t%s ar.lc, %ld\n", saveop
, off
);
6786 if (current_frame_info
.reg_save_ar_pfs
!= 0)
6788 fprintf (asm_out_file
, "\t%s ar.pfs, %ld\n", saveop
, off
);
6791 case AR_UNAT_REGNUM
:
6792 if (current_frame_info
.reg_save_ar_unat
!= 0)
6794 fprintf (asm_out_file
, "\t%s ar.unat, %ld\n", saveop
, off
);
6801 fprintf (asm_out_file
, "\t.save.g 0x%x\n",
6802 1 << (src_regno
- GR_REG (4)));
6810 fprintf (asm_out_file
, "\t.save.b 0x%x\n",
6811 1 << (src_regno
- BR_REG (1)));
6818 fprintf (asm_out_file
, "\t.save.f 0x%x\n",
6819 1 << (src_regno
- FR_REG (2)));
6822 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
6823 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
6824 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
6825 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
6826 fprintf (asm_out_file
, "\t.save.gf 0x0, 0x%x\n",
6827 1 << (src_regno
- FR_REG (12)));
6839 /* This function looks at a single insn and emits any directives
6840 required to unwind this insn. */
6842 process_for_unwind_directive (asm_out_file
, insn
)
6846 if (flag_unwind_tables
6847 || (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
6851 if (GET_CODE (insn
) == NOTE
6852 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
6854 block_num
= NOTE_BASIC_BLOCK (insn
)->index
;
6856 /* Restore unwind state from immediately before the epilogue. */
6857 if (need_copy_state
)
6859 fprintf (asm_out_file
, "\t.body\n");
6860 fprintf (asm_out_file
, "\t.copy_state 1\n");
6861 need_copy_state
= 0;
6865 if (! RTX_FRAME_RELATED_P (insn
))
6868 pat
= find_reg_note (insn
, REG_FRAME_RELATED_EXPR
, NULL_RTX
);
6870 pat
= XEXP (pat
, 0);
6872 pat
= PATTERN (insn
);
6874 switch (GET_CODE (pat
))
6877 process_set (asm_out_file
, pat
);
6883 int limit
= XVECLEN (pat
, 0);
6884 for (par_index
= 0; par_index
< limit
; par_index
++)
6886 rtx x
= XVECEXP (pat
, 0, par_index
);
6887 if (GET_CODE (x
) == SET
)
6888 process_set (asm_out_file
, x
);
6901 ia64_init_builtins ()
6903 tree psi_type_node
= build_pointer_type (integer_type_node
);
6904 tree pdi_type_node
= build_pointer_type (long_integer_type_node
);
6905 tree endlink
= void_list_node
;
6907 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
6908 tree si_ftype_psi_si_si
6909 = build_function_type (integer_type_node
,
6910 tree_cons (NULL_TREE
, psi_type_node
,
6911 tree_cons (NULL_TREE
, integer_type_node
,
6912 tree_cons (NULL_TREE
,
6916 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
6917 tree di_ftype_pdi_di_di
6918 = build_function_type (long_integer_type_node
,
6919 tree_cons (NULL_TREE
, pdi_type_node
,
6920 tree_cons (NULL_TREE
,
6921 long_integer_type_node
,
6922 tree_cons (NULL_TREE
,
6923 long_integer_type_node
,
6925 /* __sync_synchronize */
6926 tree void_ftype_void
6927 = build_function_type (void_type_node
, endlink
);
6929 /* __sync_lock_test_and_set_si */
6930 tree si_ftype_psi_si
6931 = build_function_type (integer_type_node
,
6932 tree_cons (NULL_TREE
, psi_type_node
,
6933 tree_cons (NULL_TREE
, integer_type_node
, endlink
)));
6935 /* __sync_lock_test_and_set_di */
6936 tree di_ftype_pdi_di
6937 = build_function_type (long_integer_type_node
,
6938 tree_cons (NULL_TREE
, pdi_type_node
,
6939 tree_cons (NULL_TREE
, long_integer_type_node
,
6942 /* __sync_lock_release_si */
6944 = build_function_type (void_type_node
, tree_cons (NULL_TREE
, psi_type_node
,
6947 /* __sync_lock_release_di */
6949 = build_function_type (void_type_node
, tree_cons (NULL_TREE
, pdi_type_node
,
6952 #define def_builtin(name, type, code) \
6953 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
6955 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si
,
6956 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
);
6957 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di
,
6958 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
);
6959 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si
,
6960 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
);
6961 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di
,
6962 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
);
6964 def_builtin ("__sync_synchronize", void_ftype_void
,
6965 IA64_BUILTIN_SYNCHRONIZE
);
6967 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si
,
6968 IA64_BUILTIN_LOCK_TEST_AND_SET_SI
);
6969 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di
,
6970 IA64_BUILTIN_LOCK_TEST_AND_SET_DI
);
6971 def_builtin ("__sync_lock_release_si", void_ftype_psi
,
6972 IA64_BUILTIN_LOCK_RELEASE_SI
);
6973 def_builtin ("__sync_lock_release_di", void_ftype_pdi
,
6974 IA64_BUILTIN_LOCK_RELEASE_DI
);
6976 def_builtin ("__builtin_ia64_bsp",
6977 build_function_type (ptr_type_node
, endlink
),
6980 def_builtin ("__builtin_ia64_flushrs",
6981 build_function_type (void_type_node
, endlink
),
6982 IA64_BUILTIN_FLUSHRS
);
6984 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si
,
6985 IA64_BUILTIN_FETCH_AND_ADD_SI
);
6986 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si
,
6987 IA64_BUILTIN_FETCH_AND_SUB_SI
);
6988 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si
,
6989 IA64_BUILTIN_FETCH_AND_OR_SI
);
6990 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si
,
6991 IA64_BUILTIN_FETCH_AND_AND_SI
);
6992 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si
,
6993 IA64_BUILTIN_FETCH_AND_XOR_SI
);
6994 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si
,
6995 IA64_BUILTIN_FETCH_AND_NAND_SI
);
6997 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si
,
6998 IA64_BUILTIN_ADD_AND_FETCH_SI
);
6999 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si
,
7000 IA64_BUILTIN_SUB_AND_FETCH_SI
);
7001 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si
,
7002 IA64_BUILTIN_OR_AND_FETCH_SI
);
7003 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si
,
7004 IA64_BUILTIN_AND_AND_FETCH_SI
);
7005 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si
,
7006 IA64_BUILTIN_XOR_AND_FETCH_SI
);
7007 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si
,
7008 IA64_BUILTIN_NAND_AND_FETCH_SI
);
7010 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di
,
7011 IA64_BUILTIN_FETCH_AND_ADD_DI
);
7012 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di
,
7013 IA64_BUILTIN_FETCH_AND_SUB_DI
);
7014 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di
,
7015 IA64_BUILTIN_FETCH_AND_OR_DI
);
7016 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di
,
7017 IA64_BUILTIN_FETCH_AND_AND_DI
);
7018 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di
,
7019 IA64_BUILTIN_FETCH_AND_XOR_DI
);
7020 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di
,
7021 IA64_BUILTIN_FETCH_AND_NAND_DI
);
7023 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di
,
7024 IA64_BUILTIN_ADD_AND_FETCH_DI
);
7025 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di
,
7026 IA64_BUILTIN_SUB_AND_FETCH_DI
);
7027 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di
,
7028 IA64_BUILTIN_OR_AND_FETCH_DI
);
7029 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di
,
7030 IA64_BUILTIN_AND_AND_FETCH_DI
);
7031 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di
,
7032 IA64_BUILTIN_XOR_AND_FETCH_DI
);
7033 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di
,
7034 IA64_BUILTIN_NAND_AND_FETCH_DI
);
7039 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7047 cmpxchgsz.acq tmp = [ptr], tmp
7048 } while (tmp != ret)
7052 ia64_expand_fetch_and_op (binoptab
, mode
, arglist
, target
)
7054 enum machine_mode mode
;
7058 rtx ret
, label
, tmp
, ccv
, insn
, mem
, value
;
7061 arg0
= TREE_VALUE (arglist
);
7062 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7063 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7064 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7066 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7067 MEM_VOLATILE_P (mem
) = 1;
7069 if (target
&& register_operand (target
, mode
))
7072 ret
= gen_reg_rtx (mode
);
7074 emit_insn (gen_mf ());
7076 /* Special case for fetchadd instructions. */
7077 if (binoptab
== add_optab
&& fetchadd_operand (value
, VOIDmode
))
7080 insn
= gen_fetchadd_acq_si (ret
, mem
, value
);
7082 insn
= gen_fetchadd_acq_di (ret
, mem
, value
);
7087 tmp
= gen_reg_rtx (mode
);
7088 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
7089 emit_move_insn (tmp
, mem
);
7091 label
= gen_label_rtx ();
7093 emit_move_insn (ret
, tmp
);
7094 emit_move_insn (ccv
, tmp
);
7096 /* Perform the specific operation. Special case NAND by noticing
7097 one_cmpl_optab instead. */
7098 if (binoptab
== one_cmpl_optab
)
7100 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
7101 binoptab
= and_optab
;
7103 tmp
= expand_binop (mode
, binoptab
, tmp
, value
, tmp
, 1, OPTAB_WIDEN
);
7106 insn
= gen_cmpxchg_acq_si (tmp
, mem
, tmp
, ccv
);
7108 insn
= gen_cmpxchg_acq_di (tmp
, mem
, tmp
, ccv
);
7111 emit_cmp_and_jump_insns (tmp
, ret
, NE
, 0, mode
, 1, 0, label
);
7116 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7124 cmpxchgsz.acq tmp = [ptr], ret
7125 } while (tmp != old)
7129 ia64_expand_op_and_fetch (binoptab
, mode
, arglist
, target
)
7131 enum machine_mode mode
;
7135 rtx old
, label
, tmp
, ret
, ccv
, insn
, mem
, value
;
7138 arg0
= TREE_VALUE (arglist
);
7139 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7140 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7141 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7143 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7144 MEM_VOLATILE_P (mem
) = 1;
7146 if (target
&& ! register_operand (target
, mode
))
7149 emit_insn (gen_mf ());
7150 tmp
= gen_reg_rtx (mode
);
7151 old
= gen_reg_rtx (mode
);
7152 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
7154 emit_move_insn (tmp
, mem
);
7156 label
= gen_label_rtx ();
7158 emit_move_insn (old
, tmp
);
7159 emit_move_insn (ccv
, tmp
);
7161 /* Perform the specific operation. Special case NAND by noticing
7162 one_cmpl_optab instead. */
7163 if (binoptab
== one_cmpl_optab
)
7165 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
7166 binoptab
= and_optab
;
7168 ret
= expand_binop (mode
, binoptab
, tmp
, value
, target
, 1, OPTAB_WIDEN
);
7171 insn
= gen_cmpxchg_acq_si (tmp
, mem
, ret
, ccv
);
7173 insn
= gen_cmpxchg_acq_di (tmp
, mem
, ret
, ccv
);
7176 emit_cmp_and_jump_insns (tmp
, old
, NE
, 0, mode
, 1, 0, label
);
7181 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7185 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7188 For bool_ it's the same except return ret == oldval.
7192 ia64_expand_compare_and_swap (mode
, boolp
, arglist
, target
)
7193 enum machine_mode mode
;
7198 tree arg0
, arg1
, arg2
;
7199 rtx mem
, old
, new, ccv
, tmp
, insn
;
7201 arg0
= TREE_VALUE (arglist
);
7202 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7203 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
7204 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7205 old
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7206 new = expand_expr (arg2
, NULL_RTX
, mode
, 0);
7208 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7209 MEM_VOLATILE_P (mem
) = 1;
7211 if (! register_operand (old
, mode
))
7212 old
= copy_to_mode_reg (mode
, old
);
7213 if (! register_operand (new, mode
))
7214 new = copy_to_mode_reg (mode
, new);
7216 if (! boolp
&& target
&& register_operand (target
, mode
))
7219 tmp
= gen_reg_rtx (mode
);
7221 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
7222 emit_move_insn (ccv
, old
);
7223 emit_insn (gen_mf ());
7225 insn
= gen_cmpxchg_acq_si (tmp
, mem
, new, ccv
);
7227 insn
= gen_cmpxchg_acq_di (tmp
, mem
, new, ccv
);
7233 target
= gen_reg_rtx (mode
);
7234 return emit_store_flag_force (target
, EQ
, tmp
, old
, mode
, 1, 1);
7240 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7243 ia64_expand_lock_test_and_set (mode
, arglist
, target
)
7244 enum machine_mode mode
;
7249 rtx mem
, new, ret
, insn
;
7251 arg0
= TREE_VALUE (arglist
);
7252 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7253 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7254 new = expand_expr (arg1
, NULL_RTX
, mode
, 0);
7256 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7257 MEM_VOLATILE_P (mem
) = 1;
7258 if (! register_operand (new, mode
))
7259 new = copy_to_mode_reg (mode
, new);
7261 if (target
&& register_operand (target
, mode
))
7264 ret
= gen_reg_rtx (mode
);
7267 insn
= gen_xchgsi (ret
, mem
, new);
7269 insn
= gen_xchgdi (ret
, mem
, new);
7275 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7278 ia64_expand_lock_release (mode
, arglist
, target
)
7279 enum machine_mode mode
;
7281 rtx target ATTRIBUTE_UNUSED
;
7286 arg0
= TREE_VALUE (arglist
);
7287 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7289 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7290 MEM_VOLATILE_P (mem
) = 1;
7292 emit_move_insn (mem
, const0_rtx
);
7298 ia64_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
7301 rtx subtarget ATTRIBUTE_UNUSED
;
7302 enum machine_mode mode ATTRIBUTE_UNUSED
;
7303 int ignore ATTRIBUTE_UNUSED
;
7305 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
7306 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
7307 tree arglist
= TREE_OPERAND (exp
, 1);
7311 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
7312 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
7313 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
7314 case IA64_BUILTIN_LOCK_RELEASE_SI
:
7315 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
7316 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
7317 case IA64_BUILTIN_FETCH_AND_OR_SI
:
7318 case IA64_BUILTIN_FETCH_AND_AND_SI
:
7319 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
7320 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
7321 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
7322 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
7323 case IA64_BUILTIN_OR_AND_FETCH_SI
:
7324 case IA64_BUILTIN_AND_AND_FETCH_SI
:
7325 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
7326 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
7330 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
7331 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
7332 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
7333 case IA64_BUILTIN_LOCK_RELEASE_DI
:
7334 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
7335 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
7336 case IA64_BUILTIN_FETCH_AND_OR_DI
:
7337 case IA64_BUILTIN_FETCH_AND_AND_DI
:
7338 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
7339 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
7340 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
7341 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
7342 case IA64_BUILTIN_OR_AND_FETCH_DI
:
7343 case IA64_BUILTIN_AND_AND_FETCH_DI
:
7344 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
7345 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
7355 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
7356 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
7357 return ia64_expand_compare_and_swap (mode
, 1, arglist
, target
);
7359 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
7360 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
7361 return ia64_expand_compare_and_swap (mode
, 0, arglist
, target
);
7363 case IA64_BUILTIN_SYNCHRONIZE
:
7364 emit_insn (gen_mf ());
7367 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
7368 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
7369 return ia64_expand_lock_test_and_set (mode
, arglist
, target
);
7371 case IA64_BUILTIN_LOCK_RELEASE_SI
:
7372 case IA64_BUILTIN_LOCK_RELEASE_DI
:
7373 return ia64_expand_lock_release (mode
, arglist
, target
);
7375 case IA64_BUILTIN_BSP
:
7376 if (! target
|| ! register_operand (target
, DImode
))
7377 target
= gen_reg_rtx (DImode
);
7378 emit_insn (gen_bsp_value (target
));
7381 case IA64_BUILTIN_FLUSHRS
:
7382 emit_insn (gen_flushrs ());
7385 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
7386 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
7387 return ia64_expand_fetch_and_op (add_optab
, mode
, arglist
, target
);
7389 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
7390 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
7391 return ia64_expand_fetch_and_op (sub_optab
, mode
, arglist
, target
);
7393 case IA64_BUILTIN_FETCH_AND_OR_SI
:
7394 case IA64_BUILTIN_FETCH_AND_OR_DI
:
7395 return ia64_expand_fetch_and_op (ior_optab
, mode
, arglist
, target
);
7397 case IA64_BUILTIN_FETCH_AND_AND_SI
:
7398 case IA64_BUILTIN_FETCH_AND_AND_DI
:
7399 return ia64_expand_fetch_and_op (and_optab
, mode
, arglist
, target
);
7401 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
7402 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
7403 return ia64_expand_fetch_and_op (xor_optab
, mode
, arglist
, target
);
7405 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
7406 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
7407 return ia64_expand_fetch_and_op (one_cmpl_optab
, mode
, arglist
, target
);
7409 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
7410 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
7411 return ia64_expand_op_and_fetch (add_optab
, mode
, arglist
, target
);
7413 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
7414 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
7415 return ia64_expand_op_and_fetch (sub_optab
, mode
, arglist
, target
);
7417 case IA64_BUILTIN_OR_AND_FETCH_SI
:
7418 case IA64_BUILTIN_OR_AND_FETCH_DI
:
7419 return ia64_expand_op_and_fetch (ior_optab
, mode
, arglist
, target
);
7421 case IA64_BUILTIN_AND_AND_FETCH_SI
:
7422 case IA64_BUILTIN_AND_AND_FETCH_DI
:
7423 return ia64_expand_op_and_fetch (and_optab
, mode
, arglist
, target
);
7425 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
7426 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
7427 return ia64_expand_op_and_fetch (xor_optab
, mode
, arglist
, target
);
7429 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
7430 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
7431 return ia64_expand_op_and_fetch (one_cmpl_optab
, mode
, arglist
, target
);