1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-attr.h"
42 #include "basic-block.h"
44 #include "sched-int.h"
46 /* This is used for communication between ASM_OUTPUT_LABEL and
47 ASM_OUTPUT_LABELREF. */
48 int ia64_asm_output_label
= 0;
50 /* Define the information needed to generate branch and scc insns. This is
51 stored from the compare operation. */
52 struct rtx_def
* ia64_compare_op0
;
53 struct rtx_def
* ia64_compare_op1
;
55 /* Register names for ia64_expand_prologue. */
56 static const char * const ia64_reg_numbers
[96] =
57 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
58 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
59 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
60 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
61 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
62 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
63 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
64 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
65 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
66 "r104","r105","r106","r107","r108","r109","r110","r111",
67 "r112","r113","r114","r115","r116","r117","r118","r119",
68 "r120","r121","r122","r123","r124","r125","r126","r127"};
70 /* ??? These strings could be shared with REGISTER_NAMES. */
71 static const char * const ia64_input_reg_names
[8] =
72 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
74 /* ??? These strings could be shared with REGISTER_NAMES. */
75 static const char * const ia64_local_reg_names
[80] =
76 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
77 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
78 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
79 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
80 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
81 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
82 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
83 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
84 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
85 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
87 /* ??? These strings could be shared with REGISTER_NAMES. */
88 static const char * const ia64_output_reg_names
[8] =
89 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
91 /* String used with the -mfixed-range= option. */
92 const char *ia64_fixed_range_string
;
94 /* Determines whether we run our final scheduling pass or not. We always
95 avoid the normal second scheduling pass. */
96 static int ia64_flag_schedule_insns2
;
98 /* Variables which are this size or smaller are put in the sdata/sbss
101 unsigned int ia64_section_threshold
;
103 static int find_gr_spill
PARAMS ((int));
104 static int next_scratch_gr_reg
PARAMS ((void));
105 static void mark_reg_gr_used_mask
PARAMS ((rtx
, void *));
106 static void ia64_compute_frame_size
PARAMS ((HOST_WIDE_INT
));
107 static void setup_spill_pointers
PARAMS ((int, rtx
, HOST_WIDE_INT
));
108 static void finish_spill_pointers
PARAMS ((void));
109 static rtx spill_restore_mem
PARAMS ((rtx
, HOST_WIDE_INT
));
110 static void do_spill
PARAMS ((rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
, rtx
));
111 static void do_restore
PARAMS ((rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
));
112 static rtx gen_movdi_x
PARAMS ((rtx
, rtx
, rtx
));
113 static rtx gen_fr_spill_x
PARAMS ((rtx
, rtx
, rtx
));
114 static rtx gen_fr_restore_x
PARAMS ((rtx
, rtx
, rtx
));
116 static enum machine_mode hfa_element_mode
PARAMS ((tree
, int));
117 static void fix_range
PARAMS ((const char *));
118 static void ia64_add_gc_roots
PARAMS ((void));
119 static void ia64_init_machine_status
PARAMS ((struct function
*));
120 static void ia64_mark_machine_status
PARAMS ((struct function
*));
121 static void ia64_free_machine_status
PARAMS ((struct function
*));
122 static void emit_insn_group_barriers
PARAMS ((FILE *, rtx
));
123 static void emit_all_insn_group_barriers
PARAMS ((FILE *, rtx
));
124 static void emit_predicate_relation_info
PARAMS ((void));
125 static void process_epilogue
PARAMS ((void));
126 static int process_set
PARAMS ((FILE *, rtx
));
128 static rtx ia64_expand_fetch_and_op
PARAMS ((optab
, enum machine_mode
,
130 static rtx ia64_expand_op_and_fetch
PARAMS ((optab
, enum machine_mode
,
132 static rtx ia64_expand_compare_and_swap
PARAMS ((enum machine_mode
, int,
134 static rtx ia64_expand_lock_test_and_set
PARAMS ((enum machine_mode
,
136 static rtx ia64_expand_lock_release
PARAMS ((enum machine_mode
, tree
, rtx
));
138 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
141 call_operand (op
, mode
)
143 enum machine_mode mode
;
145 if (mode
!= GET_MODE (op
))
148 return (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == REG
149 || (GET_CODE (op
) == SUBREG
&& GET_CODE (XEXP (op
, 0)) == REG
));
152 /* Return 1 if OP refers to a symbol in the sdata section. */
155 sdata_symbolic_operand (op
, mode
)
157 enum machine_mode mode ATTRIBUTE_UNUSED
;
159 switch (GET_CODE (op
))
162 if (GET_CODE (XEXP (op
, 0)) != PLUS
163 || GET_CODE (XEXP (XEXP (op
, 0), 0)) != SYMBOL_REF
)
165 op
= XEXP (XEXP (op
, 0), 0);
169 if (CONSTANT_POOL_ADDRESS_P (op
))
170 return GET_MODE_SIZE (get_pool_mode (op
)) <= ia64_section_threshold
;
172 return XSTR (op
, 0)[0] == SDATA_NAME_FLAG_CHAR
;
181 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
184 got_symbolic_operand (op
, mode
)
186 enum machine_mode mode ATTRIBUTE_UNUSED
;
188 switch (GET_CODE (op
))
192 if (GET_CODE (op
) != PLUS
)
194 if (GET_CODE (XEXP (op
, 0)) != SYMBOL_REF
)
197 if (GET_CODE (op
) != CONST_INT
)
202 /* Ok if we're not using GOT entries at all. */
203 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
206 /* "Ok" while emitting rtl, since otherwise we won't be provided
207 with the entire offset during emission, which makes it very
208 hard to split the offset into high and low parts. */
209 if (rtx_equal_function_value_matters
)
212 /* Force the low 14 bits of the constant to zero so that we do not
213 use up so many GOT entries. */
214 return (INTVAL (op
) & 0x3fff) == 0;
226 /* Return 1 if OP refers to a symbol. */
229 symbolic_operand (op
, mode
)
231 enum machine_mode mode ATTRIBUTE_UNUSED
;
233 switch (GET_CODE (op
))
246 /* Return 1 if OP refers to a function. */
249 function_operand (op
, mode
)
251 enum machine_mode mode ATTRIBUTE_UNUSED
;
253 if (GET_CODE (op
) == SYMBOL_REF
&& SYMBOL_REF_FLAG (op
))
259 /* Return 1 if OP is setjmp or a similar function. */
261 /* ??? This is an unsatisfying solution. Should rethink. */
264 setjmp_operand (op
, mode
)
266 enum machine_mode mode ATTRIBUTE_UNUSED
;
271 if (GET_CODE (op
) != SYMBOL_REF
)
276 /* The following code is borrowed from special_function_p in calls.c. */
278 /* Disregard prefix _, __ or __x. */
281 if (name
[1] == '_' && name
[2] == 'x')
283 else if (name
[1] == '_')
293 && (! strcmp (name
, "setjmp")
294 || ! strcmp (name
, "setjmp_syscall")))
296 && ! strcmp (name
, "sigsetjmp"))
298 && ! strcmp (name
, "savectx")));
300 else if ((name
[0] == 'q' && name
[1] == 's'
301 && ! strcmp (name
, "qsetjmp"))
302 || (name
[0] == 'v' && name
[1] == 'f'
303 && ! strcmp (name
, "vfork")))
309 /* Return 1 if OP is a general operand, but when pic exclude symbolic
312 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
313 from PREDICATE_CODES. */
316 move_operand (op
, mode
)
318 enum machine_mode mode
;
320 if (! TARGET_NO_PIC
&& symbolic_operand (op
, mode
))
323 return general_operand (op
, mode
);
326 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
329 gr_register_operand (op
, mode
)
331 enum machine_mode mode
;
333 if (! register_operand (op
, mode
))
335 if (GET_CODE (op
) == SUBREG
)
336 op
= SUBREG_REG (op
);
337 if (GET_CODE (op
) == REG
)
339 unsigned int regno
= REGNO (op
);
340 if (regno
< FIRST_PSEUDO_REGISTER
)
341 return GENERAL_REGNO_P (regno
);
346 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
349 fr_register_operand (op
, mode
)
351 enum machine_mode mode
;
353 if (! register_operand (op
, mode
))
355 if (GET_CODE (op
) == SUBREG
)
356 op
= SUBREG_REG (op
);
357 if (GET_CODE (op
) == REG
)
359 unsigned int regno
= REGNO (op
);
360 if (regno
< FIRST_PSEUDO_REGISTER
)
361 return FR_REGNO_P (regno
);
366 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
369 grfr_register_operand (op
, mode
)
371 enum machine_mode mode
;
373 if (! register_operand (op
, mode
))
375 if (GET_CODE (op
) == SUBREG
)
376 op
= SUBREG_REG (op
);
377 if (GET_CODE (op
) == REG
)
379 unsigned int regno
= REGNO (op
);
380 if (regno
< FIRST_PSEUDO_REGISTER
)
381 return GENERAL_REGNO_P (regno
) || FR_REGNO_P (regno
);
386 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
389 gr_nonimmediate_operand (op
, mode
)
391 enum machine_mode mode
;
393 if (! nonimmediate_operand (op
, mode
))
395 if (GET_CODE (op
) == SUBREG
)
396 op
= SUBREG_REG (op
);
397 if (GET_CODE (op
) == REG
)
399 unsigned int regno
= REGNO (op
);
400 if (regno
< FIRST_PSEUDO_REGISTER
)
401 return GENERAL_REGNO_P (regno
);
406 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
409 fr_nonimmediate_operand (op
, mode
)
411 enum machine_mode mode
;
413 if (! nonimmediate_operand (op
, mode
))
415 if (GET_CODE (op
) == SUBREG
)
416 op
= SUBREG_REG (op
);
417 if (GET_CODE (op
) == REG
)
419 unsigned int regno
= REGNO (op
);
420 if (regno
< FIRST_PSEUDO_REGISTER
)
421 return FR_REGNO_P (regno
);
426 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
429 grfr_nonimmediate_operand (op
, mode
)
431 enum machine_mode mode
;
433 if (! nonimmediate_operand (op
, mode
))
435 if (GET_CODE (op
) == SUBREG
)
436 op
= SUBREG_REG (op
);
437 if (GET_CODE (op
) == REG
)
439 unsigned int regno
= REGNO (op
);
440 if (regno
< FIRST_PSEUDO_REGISTER
)
441 return GENERAL_REGNO_P (regno
) || FR_REGNO_P (regno
);
446 /* Return 1 if OP is a GR register operand, or zero. */
449 gr_reg_or_0_operand (op
, mode
)
451 enum machine_mode mode
;
453 return (op
== const0_rtx
|| gr_register_operand (op
, mode
));
456 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
459 gr_reg_or_5bit_operand (op
, mode
)
461 enum machine_mode mode
;
463 return ((GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 32)
464 || GET_CODE (op
) == CONSTANT_P_RTX
465 || gr_register_operand (op
, mode
));
468 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
471 gr_reg_or_6bit_operand (op
, mode
)
473 enum machine_mode mode
;
475 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_M (INTVAL (op
)))
476 || GET_CODE (op
) == CONSTANT_P_RTX
477 || gr_register_operand (op
, mode
));
480 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
483 gr_reg_or_8bit_operand (op
, mode
)
485 enum machine_mode mode
;
487 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
)))
488 || GET_CODE (op
) == CONSTANT_P_RTX
489 || gr_register_operand (op
, mode
));
492 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
495 grfr_reg_or_8bit_operand (op
, mode
)
497 enum machine_mode mode
;
499 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
)))
500 || GET_CODE (op
) == CONSTANT_P_RTX
501 || grfr_register_operand (op
, mode
));
504 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
508 gr_reg_or_8bit_adjusted_operand (op
, mode
)
510 enum machine_mode mode
;
512 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_L (INTVAL (op
)))
513 || GET_CODE (op
) == CONSTANT_P_RTX
514 || gr_register_operand (op
, mode
));
517 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
518 immediate and an 8 bit adjusted immediate operand. This is necessary
519 because when we emit a compare, we don't know what the condition will be,
520 so we need the union of the immediates accepted by GT and LT. */
523 gr_reg_or_8bit_and_adjusted_operand (op
, mode
)
525 enum machine_mode mode
;
527 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
))
528 && CONST_OK_FOR_L (INTVAL (op
)))
529 || GET_CODE (op
) == CONSTANT_P_RTX
530 || gr_register_operand (op
, mode
));
533 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
536 gr_reg_or_14bit_operand (op
, mode
)
538 enum machine_mode mode
;
540 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_I (INTVAL (op
)))
541 || GET_CODE (op
) == CONSTANT_P_RTX
542 || gr_register_operand (op
, mode
));
545 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
548 gr_reg_or_22bit_operand (op
, mode
)
550 enum machine_mode mode
;
552 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_J (INTVAL (op
)))
553 || GET_CODE (op
) == CONSTANT_P_RTX
554 || gr_register_operand (op
, mode
));
557 /* Return 1 if OP is a 6 bit immediate operand. */
560 shift_count_operand (op
, mode
)
562 enum machine_mode mode ATTRIBUTE_UNUSED
;
564 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_M (INTVAL (op
)))
565 || GET_CODE (op
) == CONSTANT_P_RTX
);
568 /* Return 1 if OP is a 5 bit immediate operand. */
571 shift_32bit_count_operand (op
, mode
)
573 enum machine_mode mode ATTRIBUTE_UNUSED
;
575 return ((GET_CODE (op
) == CONST_INT
576 && (INTVAL (op
) >= 0 && INTVAL (op
) < 32))
577 || GET_CODE (op
) == CONSTANT_P_RTX
);
580 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
583 shladd_operand (op
, mode
)
585 enum machine_mode mode ATTRIBUTE_UNUSED
;
587 return (GET_CODE (op
) == CONST_INT
588 && (INTVAL (op
) == 2 || INTVAL (op
) == 4
589 || INTVAL (op
) == 8 || INTVAL (op
) == 16));
592 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
595 fetchadd_operand (op
, mode
)
597 enum machine_mode mode ATTRIBUTE_UNUSED
;
599 return (GET_CODE (op
) == CONST_INT
600 && (INTVAL (op
) == -16 || INTVAL (op
) == -8 ||
601 INTVAL (op
) == -4 || INTVAL (op
) == -1 ||
602 INTVAL (op
) == 1 || INTVAL (op
) == 4 ||
603 INTVAL (op
) == 8 || INTVAL (op
) == 16));
606 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
609 fr_reg_or_fp01_operand (op
, mode
)
611 enum machine_mode mode
;
613 return ((GET_CODE (op
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (op
))
614 || fr_register_operand (op
, mode
));
617 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
618 POST_MODIFY with a REG as displacement. */
621 destination_operand (op
, mode
)
623 enum machine_mode mode
;
625 if (! nonimmediate_operand (op
, mode
))
627 if (GET_CODE (op
) == MEM
628 && GET_CODE (XEXP (op
, 0)) == POST_MODIFY
629 && GET_CODE (XEXP (XEXP (XEXP (op
, 0), 1), 1)) == REG
)
634 /* Like memory_operand, but don't allow post-increments. */
637 not_postinc_memory_operand (op
, mode
)
639 enum machine_mode mode
;
641 return (memory_operand (op
, mode
)
642 && GET_RTX_CLASS (GET_CODE (XEXP (op
, 0))) != 'a');
645 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
646 signed immediate operand. */
649 normal_comparison_operator (op
, mode
)
651 enum machine_mode mode
;
653 enum rtx_code code
= GET_CODE (op
);
654 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
655 && (code
== EQ
|| code
== NE
656 || code
== GT
|| code
== LE
|| code
== GTU
|| code
== LEU
));
659 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
660 signed immediate operand. */
663 adjusted_comparison_operator (op
, mode
)
665 enum machine_mode mode
;
667 enum rtx_code code
= GET_CODE (op
);
668 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
669 && (code
== LT
|| code
== GE
|| code
== LTU
|| code
== GEU
));
672 /* Return 1 if this is a signed inequality operator. */
675 signed_inequality_operator (op
, mode
)
677 enum machine_mode mode
;
679 enum rtx_code code
= GET_CODE (op
);
680 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
681 && (code
== GE
|| code
== GT
682 || code
== LE
|| code
== LT
));
685 /* Return 1 if this operator is valid for predication. */
688 predicate_operator (op
, mode
)
690 enum machine_mode mode
;
692 enum rtx_code code
= GET_CODE (op
);
693 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
694 && (code
== EQ
|| code
== NE
));
697 /* Return 1 if this is the ar.lc register. */
700 ar_lc_reg_operand (op
, mode
)
702 enum machine_mode mode
;
704 return (GET_MODE (op
) == DImode
705 && (mode
== DImode
|| mode
== VOIDmode
)
706 && GET_CODE (op
) == REG
707 && REGNO (op
) == AR_LC_REGNUM
);
710 /* Return 1 if this is the ar.ccv register. */
713 ar_ccv_reg_operand (op
, mode
)
715 enum machine_mode mode
;
717 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
718 && GET_CODE (op
) == REG
719 && REGNO (op
) == AR_CCV_REGNUM
);
722 /* Like general_operand, but don't allow (mem (addressof)). */
725 general_tfmode_operand (op
, mode
)
727 enum machine_mode mode
;
729 if (! general_operand (op
, mode
))
731 if (GET_CODE (op
) == MEM
&& GET_CODE (XEXP (op
, 0)) == ADDRESSOF
)
739 destination_tfmode_operand (op
, mode
)
741 enum machine_mode mode
;
743 if (! destination_operand (op
, mode
))
745 if (GET_CODE (op
) == MEM
&& GET_CODE (XEXP (op
, 0)) == ADDRESSOF
)
753 tfreg_or_fp01_operand (op
, mode
)
755 enum machine_mode mode
;
757 if (GET_CODE (op
) == SUBREG
)
759 return fr_reg_or_fp01_operand (op
, mode
);
762 /* Return 1 if the operands of a move are ok. */
765 ia64_move_ok (dst
, src
)
768 /* If we're under init_recog_no_volatile, we'll not be able to use
769 memory_operand. So check the code directly and don't worry about
770 the validity of the underlying address, which should have been
771 checked elsewhere anyway. */
772 if (GET_CODE (dst
) != MEM
)
774 if (GET_CODE (src
) == MEM
)
776 if (register_operand (src
, VOIDmode
))
779 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
780 if (INTEGRAL_MODE_P (GET_MODE (dst
)))
781 return src
== const0_rtx
;
783 return GET_CODE (src
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (src
);
786 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
787 Return the length of the field, or <= 0 on failure. */
790 ia64_depz_field_mask (rop
, rshift
)
793 unsigned HOST_WIDE_INT op
= INTVAL (rop
);
794 unsigned HOST_WIDE_INT shift
= INTVAL (rshift
);
796 /* Get rid of the zero bits we're shifting in. */
799 /* We must now have a solid block of 1's at bit 0. */
800 return exact_log2 (op
+ 1);
803 /* Expand a symbolic constant load. */
804 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
807 ia64_expand_load_address (dest
, src
, scratch
)
808 rtx dest
, src
, scratch
;
812 /* The destination could be a MEM during initial rtl generation,
813 which isn't a valid destination for the PIC load address patterns. */
814 if (! register_operand (dest
, DImode
))
815 temp
= gen_reg_rtx (DImode
);
820 emit_insn (gen_load_gprel64 (temp
, src
));
821 else if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_FLAG (src
))
822 emit_insn (gen_load_fptr (temp
, src
));
823 else if (sdata_symbolic_operand (src
, DImode
))
824 emit_insn (gen_load_gprel (temp
, src
));
825 else if (GET_CODE (src
) == CONST
826 && GET_CODE (XEXP (src
, 0)) == PLUS
827 && GET_CODE (XEXP (XEXP (src
, 0), 1)) == CONST_INT
828 && (INTVAL (XEXP (XEXP (src
, 0), 1)) & 0x1fff) != 0)
830 rtx subtarget
= no_new_pseudos
? temp
: gen_reg_rtx (DImode
);
831 rtx sym
= XEXP (XEXP (src
, 0), 0);
832 HOST_WIDE_INT ofs
, hi
, lo
;
834 /* Split the offset into a sign extended 14-bit low part
835 and a complementary high part. */
836 ofs
= INTVAL (XEXP (XEXP (src
, 0), 1));
837 lo
= ((ofs
& 0x3fff) ^ 0x2000) - 0x2000;
841 scratch
= no_new_pseudos
? subtarget
: gen_reg_rtx (DImode
);
843 emit_insn (gen_load_symptr (subtarget
, plus_constant (sym
, hi
),
845 emit_insn (gen_adddi3 (temp
, subtarget
, GEN_INT (lo
)));
851 scratch
= no_new_pseudos
? temp
: gen_reg_rtx (DImode
);
853 insn
= emit_insn (gen_load_symptr (temp
, src
, scratch
));
854 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_EQUAL
, src
, REG_NOTES (insn
));
858 emit_move_insn (dest
, temp
);
862 ia64_gp_save_reg (setjmp_p
)
865 rtx save
= cfun
->machine
->ia64_gp_save
;
869 /* We can't save GP in a pseudo if we are calling setjmp, because
870 pseudos won't be restored by longjmp. For now, we save it in r4. */
871 /* ??? It would be more efficient to save this directly into a stack
872 slot. Unfortunately, the stack slot address gets cse'd across
873 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
876 /* ??? Get the barf bag, Virginia. We've got to replace this thing
877 in place, since this rtx is used in exception handling receivers.
878 Moreover, we must get this rtx out of regno_reg_rtx or reload
879 will do the wrong thing. */
880 unsigned int old_regno
= REGNO (save
);
881 if (setjmp_p
&& old_regno
!= GR_REG (4))
883 REGNO (save
) = GR_REG (4);
884 regno_reg_rtx
[old_regno
] = gen_rtx_raw_REG (DImode
, old_regno
);
890 save
= gen_rtx_REG (DImode
, GR_REG (4));
892 save
= gen_rtx_REG (DImode
, LOC_REG (0));
894 save
= gen_reg_rtx (DImode
);
895 cfun
->machine
->ia64_gp_save
= save
;
901 /* Split a post-reload TImode reference into two DImode components. */
904 ia64_split_timode (out
, in
, scratch
)
908 switch (GET_CODE (in
))
911 out
[0] = gen_rtx_REG (DImode
, REGNO (in
));
912 out
[1] = gen_rtx_REG (DImode
, REGNO (in
) + 1);
917 rtx base
= XEXP (in
, 0);
919 switch (GET_CODE (base
))
922 out
[0] = change_address (in
, DImode
, NULL_RTX
);
925 base
= XEXP (base
, 0);
926 out
[0] = change_address (in
, DImode
, NULL_RTX
);
929 /* Since we're changing the mode, we need to change to POST_MODIFY
930 as well to preserve the size of the increment. Either that or
931 do the update in two steps, but we've already got this scratch
932 register handy so let's use it. */
934 base
= XEXP (base
, 0);
935 out
[0] = change_address (in
, DImode
,
936 gen_rtx_POST_MODIFY (Pmode
, base
,plus_constant (base
, 16)));
939 base
= XEXP (base
, 0);
940 out
[0] = change_address (in
, DImode
,
941 gen_rtx_POST_MODIFY (Pmode
, base
,plus_constant (base
, -16)));
947 if (scratch
== NULL_RTX
)
949 out
[1] = change_address (in
, DImode
, scratch
);
950 return gen_adddi3 (scratch
, base
, GEN_INT (8));
955 split_double (in
, &out
[0], &out
[1]);
963 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
964 through memory plus an extra GR scratch register. Except that you can
965 either get the first from SECONDARY_MEMORY_NEEDED or the second from
966 SECONDARY_RELOAD_CLASS, but not both.
968 We got into problems in the first place by allowing a construct like
969 (subreg:TF (reg:TI)), which we got from a union containing a long double.
970 This solution attempts to prevent this situation from ocurring. When
971 we see something like the above, we spill the inner register to memory. */
974 spill_tfmode_operand (in
, force
)
978 if (GET_CODE (in
) == SUBREG
979 && GET_MODE (SUBREG_REG (in
)) == TImode
980 && GET_CODE (SUBREG_REG (in
)) == REG
)
982 rtx mem
= gen_mem_addressof (SUBREG_REG (in
), NULL_TREE
);
983 return gen_rtx_MEM (TFmode
, copy_to_reg (XEXP (mem
, 0)));
985 else if (force
&& GET_CODE (in
) == REG
)
987 rtx mem
= gen_mem_addressof (in
, NULL_TREE
);
988 return gen_rtx_MEM (TFmode
, copy_to_reg (XEXP (mem
, 0)));
990 else if (GET_CODE (in
) == MEM
991 && GET_CODE (XEXP (in
, 0)) == ADDRESSOF
)
993 return change_address (in
, TFmode
, copy_to_reg (XEXP (in
, 0)));
999 /* Emit comparison instruction if necessary, returning the expression
1000 that holds the compare result in the proper mode. */
1003 ia64_expand_compare (code
, mode
)
1005 enum machine_mode mode
;
1007 rtx op0
= ia64_compare_op0
, op1
= ia64_compare_op1
;
1010 /* If we have a BImode input, then we already have a compare result, and
1011 do not need to emit another comparison. */
1012 if (GET_MODE (op0
) == BImode
)
1014 if ((code
== NE
|| code
== EQ
) && op1
== const0_rtx
)
1021 cmp
= gen_reg_rtx (BImode
);
1022 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1023 gen_rtx_fmt_ee (code
, BImode
, op0
, op1
)));
1027 return gen_rtx_fmt_ee (code
, mode
, cmp
, const0_rtx
);
1030 /* Emit the appropriate sequence for a call. */
1033 ia64_expand_call (retval
, addr
, nextarg
, sibcall_p
)
1039 rtx insn
, b0
, gp_save
, narg_rtx
;
1042 addr
= XEXP (addr
, 0);
1043 b0
= gen_rtx_REG (DImode
, R_BR (0));
1047 else if (IN_REGNO_P (REGNO (nextarg
)))
1048 narg
= REGNO (nextarg
) - IN_REG (0);
1050 narg
= REGNO (nextarg
) - OUT_REG (0);
1051 narg_rtx
= GEN_INT (narg
);
1053 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
1056 insn
= gen_sibcall_nopic (addr
, narg_rtx
, b0
);
1058 insn
= gen_call_nopic (addr
, narg_rtx
, b0
);
1060 insn
= gen_call_value_nopic (retval
, addr
, narg_rtx
, b0
);
1061 emit_call_insn (insn
);
1068 gp_save
= ia64_gp_save_reg (setjmp_operand (addr
, VOIDmode
));
1070 /* If this is an indirect call, then we have the address of a descriptor. */
1071 if (! symbolic_operand (addr
, VOIDmode
))
1076 emit_move_insn (gp_save
, pic_offset_table_rtx
);
1078 dest
= force_reg (DImode
, gen_rtx_MEM (DImode
, addr
));
1079 emit_move_insn (pic_offset_table_rtx
,
1080 gen_rtx_MEM (DImode
, plus_constant (addr
, 8)));
1083 insn
= gen_sibcall_pic (dest
, narg_rtx
, b0
);
1085 insn
= gen_call_pic (dest
, narg_rtx
, b0
);
1087 insn
= gen_call_value_pic (retval
, dest
, narg_rtx
, b0
);
1088 emit_call_insn (insn
);
1091 emit_move_insn (pic_offset_table_rtx
, gp_save
);
1093 else if (TARGET_CONST_GP
)
1096 insn
= gen_sibcall_nopic (addr
, narg_rtx
, b0
);
1098 insn
= gen_call_nopic (addr
, narg_rtx
, b0
);
1100 insn
= gen_call_value_nopic (retval
, addr
, narg_rtx
, b0
);
1101 emit_call_insn (insn
);
1106 emit_call_insn (gen_sibcall_pic (addr
, narg_rtx
, b0
));
1109 emit_move_insn (gp_save
, pic_offset_table_rtx
);
1112 insn
= gen_call_pic (addr
, narg_rtx
, b0
);
1114 insn
= gen_call_value_pic (retval
, addr
, narg_rtx
, b0
);
1115 emit_call_insn (insn
);
1117 emit_move_insn (pic_offset_table_rtx
, gp_save
);
1122 /* Begin the assembly file. */
1125 emit_safe_across_calls (f
)
1128 unsigned int rs
, re
;
1135 while (rs
< 64 && call_used_regs
[PR_REG (rs
)])
1139 for (re
= rs
+ 1; re
< 64 && ! call_used_regs
[PR_REG (re
)]; re
++)
1143 fputs ("\t.pred.safe_across_calls ", f
);
1149 fprintf (f
, "p%u", rs
);
1151 fprintf (f
, "p%u-p%u", rs
, re
- 1);
1159 /* Structure to be filled in by ia64_compute_frame_size with register
1160 save masks and offsets for the current function. */
1162 struct ia64_frame_info
1164 HOST_WIDE_INT total_size
; /* size of the stack frame, not including
1165 the caller's scratch area. */
1166 HOST_WIDE_INT spill_cfa_off
; /* top of the reg spill area from the cfa. */
1167 HOST_WIDE_INT spill_size
; /* size of the gr/br/fr spill area. */
1168 HOST_WIDE_INT extra_spill_size
; /* size of spill area for others. */
1169 HARD_REG_SET mask
; /* mask of saved registers. */
1170 unsigned int gr_used_mask
; /* mask of registers in use as gr spill
1171 registers or long-term scratches. */
1172 int n_spilled
; /* number of spilled registers. */
1173 int reg_fp
; /* register for fp. */
1174 int reg_save_b0
; /* save register for b0. */
1175 int reg_save_pr
; /* save register for prs. */
1176 int reg_save_ar_pfs
; /* save register for ar.pfs. */
1177 int reg_save_ar_unat
; /* save register for ar.unat. */
1178 int reg_save_ar_lc
; /* save register for ar.lc. */
1179 int n_input_regs
; /* number of input registers used. */
1180 int n_local_regs
; /* number of local registers used. */
1181 int n_output_regs
; /* number of output registers used. */
1182 int n_rotate_regs
; /* number of rotating registers used. */
1184 char need_regstk
; /* true if a .regstk directive needed. */
1185 char initialized
; /* true if the data is finalized. */
1188 /* Current frame information calculated by ia64_compute_frame_size. */
1189 static struct ia64_frame_info current_frame_info
;
1191 /* Helper function for ia64_compute_frame_size: find an appropriate general
1192 register to spill some special register to. SPECIAL_SPILL_MASK contains
1193 bits in GR0 to GR31 that have already been allocated by this routine.
1194 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1197 find_gr_spill (try_locals
)
1202 /* If this is a leaf function, first try an otherwise unused
1203 call-clobbered register. */
1204 if (current_function_is_leaf
)
1206 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1207 if (! regs_ever_live
[regno
]
1208 && call_used_regs
[regno
]
1209 && ! fixed_regs
[regno
]
1210 && ! global_regs
[regno
]
1211 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1213 current_frame_info
.gr_used_mask
|= 1 << regno
;
1220 regno
= current_frame_info
.n_local_regs
;
1221 /* If there is a frame pointer, then we can't use loc79, because
1222 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1223 reg_name switching code in ia64_expand_prologue. */
1224 if (regno
< (80 - frame_pointer_needed
))
1226 current_frame_info
.n_local_regs
= regno
+ 1;
1227 return LOC_REG (0) + regno
;
1231 /* Failed to find a general register to spill to. Must use stack. */
1235 /* In order to make for nice schedules, we try to allocate every temporary
1236 to a different register. We must of course stay away from call-saved,
1237 fixed, and global registers. We must also stay away from registers
1238 allocated in current_frame_info.gr_used_mask, since those include regs
1239 used all through the prologue.
1241 Any register allocated here must be used immediately. The idea is to
1242 aid scheduling, not to solve data flow problems. */
1244 static int last_scratch_gr_reg
;
1247 next_scratch_gr_reg ()
1251 for (i
= 0; i
< 32; ++i
)
1253 regno
= (last_scratch_gr_reg
+ i
+ 1) & 31;
1254 if (call_used_regs
[regno
]
1255 && ! fixed_regs
[regno
]
1256 && ! global_regs
[regno
]
1257 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1259 last_scratch_gr_reg
= regno
;
1264 /* There must be _something_ available. */
1268 /* Helper function for ia64_compute_frame_size, called through
1269 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1272 mark_reg_gr_used_mask (reg
, data
)
1274 void *data ATTRIBUTE_UNUSED
;
1276 unsigned int regno
= REGNO (reg
);
1278 current_frame_info
.gr_used_mask
|= 1 << regno
;
1281 /* Returns the number of bytes offset between the frame pointer and the stack
1282 pointer for the current function. SIZE is the number of bytes of space
1283 needed for local variables. */
1286 ia64_compute_frame_size (size
)
1289 HOST_WIDE_INT total_size
;
1290 HOST_WIDE_INT spill_size
= 0;
1291 HOST_WIDE_INT extra_spill_size
= 0;
1292 HOST_WIDE_INT pretend_args_size
;
1295 int spilled_gr_p
= 0;
1296 int spilled_fr_p
= 0;
1300 if (current_frame_info
.initialized
)
1303 memset (¤t_frame_info
, 0, sizeof current_frame_info
);
1304 CLEAR_HARD_REG_SET (mask
);
1306 /* Don't allocate scratches to the return register. */
1307 diddle_return_value (mark_reg_gr_used_mask
, NULL
);
1309 /* Don't allocate scratches to the EH scratch registers. */
1310 if (cfun
->machine
->ia64_eh_epilogue_sp
)
1311 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_sp
, NULL
);
1312 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
1313 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_bsp
, NULL
);
1315 /* Find the size of the register stack frame. We have only 80 local
1316 registers, because we reserve 8 for the inputs and 8 for the
1319 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1320 since we'll be adjusting that down later. */
1321 regno
= LOC_REG (78) + ! frame_pointer_needed
;
1322 for (; regno
>= LOC_REG (0); regno
--)
1323 if (regs_ever_live
[regno
])
1325 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
1327 /* For functions marked with the syscall_linkage attribute, we must mark
1328 all eight input registers as in use, so that locals aren't visible to
1331 if (cfun
->machine
->n_varargs
> 0
1332 || lookup_attribute ("syscall_linkage",
1333 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
1334 current_frame_info
.n_input_regs
= 8;
1337 for (regno
= IN_REG (7); regno
>= IN_REG (0); regno
--)
1338 if (regs_ever_live
[regno
])
1340 current_frame_info
.n_input_regs
= regno
- IN_REG (0) + 1;
1343 for (regno
= OUT_REG (7); regno
>= OUT_REG (0); regno
--)
1344 if (regs_ever_live
[regno
])
1346 i
= regno
- OUT_REG (0) + 1;
1348 /* When -p profiling, we need one output register for the mcount argument.
1349 Likwise for -a profiling for the bb_init_func argument. For -ax
1350 profiling, we need two output registers for the two bb_init_trace_func
1352 if (profile_flag
|| profile_block_flag
== 1)
1354 else if (profile_block_flag
== 2)
1356 current_frame_info
.n_output_regs
= i
;
1358 /* ??? No rotating register support yet. */
1359 current_frame_info
.n_rotate_regs
= 0;
1361 /* Discover which registers need spilling, and how much room that
1362 will take. Begin with floating point and general registers,
1363 which will always wind up on the stack. */
1365 for (regno
= FR_REG (2); regno
<= FR_REG (127); regno
++)
1366 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1368 SET_HARD_REG_BIT (mask
, regno
);
1374 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1375 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1377 SET_HARD_REG_BIT (mask
, regno
);
1383 for (regno
= BR_REG (1); regno
<= BR_REG (7); regno
++)
1384 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1386 SET_HARD_REG_BIT (mask
, regno
);
1391 /* Now come all special registers that might get saved in other
1392 general registers. */
1394 if (frame_pointer_needed
)
1396 current_frame_info
.reg_fp
= find_gr_spill (1);
1397 /* If we did not get a register, then we take LOC79. This is guaranteed
1398 to be free, even if regs_ever_live is already set, because this is
1399 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1400 as we don't count loc79 above. */
1401 if (current_frame_info
.reg_fp
== 0)
1403 current_frame_info
.reg_fp
= LOC_REG (79);
1404 current_frame_info
.n_local_regs
++;
1408 if (! current_function_is_leaf
)
1410 /* Emit a save of BR0 if we call other functions. Do this even
1411 if this function doesn't return, as EH depends on this to be
1412 able to unwind the stack. */
1413 SET_HARD_REG_BIT (mask
, BR_REG (0));
1415 current_frame_info
.reg_save_b0
= find_gr_spill (1);
1416 if (current_frame_info
.reg_save_b0
== 0)
1422 /* Similarly for ar.pfs. */
1423 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
1424 current_frame_info
.reg_save_ar_pfs
= find_gr_spill (1);
1425 if (current_frame_info
.reg_save_ar_pfs
== 0)
1427 extra_spill_size
+= 8;
1433 if (regs_ever_live
[BR_REG (0)] && ! call_used_regs
[BR_REG (0)])
1435 SET_HARD_REG_BIT (mask
, BR_REG (0));
1441 /* Unwind descriptor hackery: things are most efficient if we allocate
1442 consecutive GR save registers for RP, PFS, FP in that order. However,
1443 it is absolutely critical that FP get the only hard register that's
1444 guaranteed to be free, so we allocated it first. If all three did
1445 happen to be allocated hard regs, and are consecutive, rearrange them
1446 into the preferred order now. */
1447 if (current_frame_info
.reg_fp
!= 0
1448 && current_frame_info
.reg_save_b0
== current_frame_info
.reg_fp
+ 1
1449 && current_frame_info
.reg_save_ar_pfs
== current_frame_info
.reg_fp
+ 2)
1451 current_frame_info
.reg_save_b0
= current_frame_info
.reg_fp
;
1452 current_frame_info
.reg_save_ar_pfs
= current_frame_info
.reg_fp
+ 1;
1453 current_frame_info
.reg_fp
= current_frame_info
.reg_fp
+ 2;
1456 /* See if we need to store the predicate register block. */
1457 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
1458 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1460 if (regno
<= PR_REG (63))
1462 SET_HARD_REG_BIT (mask
, PR_REG (0));
1463 current_frame_info
.reg_save_pr
= find_gr_spill (1);
1464 if (current_frame_info
.reg_save_pr
== 0)
1466 extra_spill_size
+= 8;
1470 /* ??? Mark them all as used so that register renaming and such
1471 are free to use them. */
1472 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
1473 regs_ever_live
[regno
] = 1;
1476 /* If we're forced to use st8.spill, we're forced to save and restore
1478 if (spilled_gr_p
|| cfun
->machine
->n_varargs
)
1480 SET_HARD_REG_BIT (mask
, AR_UNAT_REGNUM
);
1481 current_frame_info
.reg_save_ar_unat
= find_gr_spill (spill_size
== 0);
1482 if (current_frame_info
.reg_save_ar_unat
== 0)
1484 extra_spill_size
+= 8;
1489 if (regs_ever_live
[AR_LC_REGNUM
])
1491 SET_HARD_REG_BIT (mask
, AR_LC_REGNUM
);
1492 current_frame_info
.reg_save_ar_lc
= find_gr_spill (spill_size
== 0);
1493 if (current_frame_info
.reg_save_ar_lc
== 0)
1495 extra_spill_size
+= 8;
1500 /* If we have an odd number of words of pretend arguments written to
1501 the stack, then the FR save area will be unaligned. We round the
1502 size of this area up to keep things 16 byte aligned. */
1504 pretend_args_size
= IA64_STACK_ALIGN (current_function_pretend_args_size
);
1506 pretend_args_size
= current_function_pretend_args_size
;
1508 total_size
= (spill_size
+ extra_spill_size
+ size
+ pretend_args_size
1509 + current_function_outgoing_args_size
);
1510 total_size
= IA64_STACK_ALIGN (total_size
);
1512 /* We always use the 16-byte scratch area provided by the caller, but
1513 if we are a leaf function, there's no one to which we need to provide
1515 if (current_function_is_leaf
)
1516 total_size
= MAX (0, total_size
- 16);
1518 current_frame_info
.total_size
= total_size
;
1519 current_frame_info
.spill_cfa_off
= pretend_args_size
- 16;
1520 current_frame_info
.spill_size
= spill_size
;
1521 current_frame_info
.extra_spill_size
= extra_spill_size
;
1522 COPY_HARD_REG_SET (current_frame_info
.mask
, mask
);
1523 current_frame_info
.n_spilled
= n_spilled
;
1524 current_frame_info
.initialized
= reload_completed
;
1527 /* Compute the initial difference between the specified pair of registers. */
1530 ia64_initial_elimination_offset (from
, to
)
1533 HOST_WIDE_INT offset
;
1535 ia64_compute_frame_size (get_frame_size ());
1538 case FRAME_POINTER_REGNUM
:
1539 if (to
== HARD_FRAME_POINTER_REGNUM
)
1541 if (current_function_is_leaf
)
1542 offset
= -current_frame_info
.total_size
;
1544 offset
= -(current_frame_info
.total_size
1545 - current_function_outgoing_args_size
- 16);
1547 else if (to
== STACK_POINTER_REGNUM
)
1549 if (current_function_is_leaf
)
1552 offset
= 16 + current_function_outgoing_args_size
;
1558 case ARG_POINTER_REGNUM
:
1559 /* Arguments start above the 16 byte save area, unless stdarg
1560 in which case we store through the 16 byte save area. */
1561 if (to
== HARD_FRAME_POINTER_REGNUM
)
1562 offset
= 16 - current_function_pretend_args_size
;
1563 else if (to
== STACK_POINTER_REGNUM
)
1564 offset
= (current_frame_info
.total_size
1565 + 16 - current_function_pretend_args_size
);
1570 case RETURN_ADDRESS_POINTER_REGNUM
:
1581 /* If there are more than a trivial number of register spills, we use
1582 two interleaved iterators so that we can get two memory references
1585 In order to simplify things in the prologue and epilogue expanders,
1586 we use helper functions to fix up the memory references after the
1587 fact with the appropriate offsets to a POST_MODIFY memory mode.
1588 The following data structure tracks the state of the two iterators
1589 while insns are being emitted. */
1591 struct spill_fill_data
1593 rtx init_after
; /* point at which to emit intializations */
1594 rtx init_reg
[2]; /* initial base register */
1595 rtx iter_reg
[2]; /* the iterator registers */
1596 rtx
*prev_addr
[2]; /* address of last memory use */
1597 HOST_WIDE_INT prev_off
[2]; /* last offset */
1598 int n_iter
; /* number of iterators in use */
1599 int next_iter
; /* next iterator to use */
1600 unsigned int save_gr_used_mask
;
1603 static struct spill_fill_data spill_fill_data
;
1606 setup_spill_pointers (n_spills
, init_reg
, cfa_off
)
1609 HOST_WIDE_INT cfa_off
;
1613 spill_fill_data
.init_after
= get_last_insn ();
1614 spill_fill_data
.init_reg
[0] = init_reg
;
1615 spill_fill_data
.init_reg
[1] = init_reg
;
1616 spill_fill_data
.prev_addr
[0] = NULL
;
1617 spill_fill_data
.prev_addr
[1] = NULL
;
1618 spill_fill_data
.prev_off
[0] = cfa_off
;
1619 spill_fill_data
.prev_off
[1] = cfa_off
;
1620 spill_fill_data
.next_iter
= 0;
1621 spill_fill_data
.save_gr_used_mask
= current_frame_info
.gr_used_mask
;
1623 spill_fill_data
.n_iter
= 1 + (n_spills
> 2);
1624 for (i
= 0; i
< spill_fill_data
.n_iter
; ++i
)
1626 int regno
= next_scratch_gr_reg ();
1627 spill_fill_data
.iter_reg
[i
] = gen_rtx_REG (DImode
, regno
);
1628 current_frame_info
.gr_used_mask
|= 1 << regno
;
1633 finish_spill_pointers ()
1635 current_frame_info
.gr_used_mask
= spill_fill_data
.save_gr_used_mask
;
1639 spill_restore_mem (reg
, cfa_off
)
1641 HOST_WIDE_INT cfa_off
;
1643 int iter
= spill_fill_data
.next_iter
;
1644 HOST_WIDE_INT disp
= spill_fill_data
.prev_off
[iter
] - cfa_off
;
1645 rtx disp_rtx
= GEN_INT (disp
);
1648 if (spill_fill_data
.prev_addr
[iter
])
1650 if (CONST_OK_FOR_N (disp
))
1651 *spill_fill_data
.prev_addr
[iter
]
1652 = gen_rtx_POST_MODIFY (DImode
, spill_fill_data
.iter_reg
[iter
],
1653 gen_rtx_PLUS (DImode
,
1654 spill_fill_data
.iter_reg
[iter
],
1658 /* ??? Could use register post_modify for loads. */
1659 if (! CONST_OK_FOR_I (disp
))
1661 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
1662 emit_move_insn (tmp
, disp_rtx
);
1665 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
1666 spill_fill_data
.iter_reg
[iter
], disp_rtx
));
1669 /* Micro-optimization: if we've created a frame pointer, it's at
1670 CFA 0, which may allow the real iterator to be initialized lower,
1671 slightly increasing parallelism. Also, if there are few saves
1672 it may eliminate the iterator entirely. */
1674 && spill_fill_data
.init_reg
[iter
] == stack_pointer_rtx
1675 && frame_pointer_needed
)
1677 mem
= gen_rtx_MEM (GET_MODE (reg
), hard_frame_pointer_rtx
);
1678 MEM_ALIAS_SET (mem
) = get_varargs_alias_set ();
1686 seq
= gen_movdi (spill_fill_data
.iter_reg
[iter
],
1687 spill_fill_data
.init_reg
[iter
]);
1692 if (! CONST_OK_FOR_I (disp
))
1694 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
1695 emit_move_insn (tmp
, disp_rtx
);
1699 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
1700 spill_fill_data
.init_reg
[iter
],
1703 seq
= gen_sequence ();
1707 /* Careful for being the first insn in a sequence. */
1708 if (spill_fill_data
.init_after
)
1709 spill_fill_data
.init_after
1710 = emit_insn_after (seq
, spill_fill_data
.init_after
);
1713 rtx first
= get_insns ();
1715 spill_fill_data
.init_after
1716 = emit_insn_before (seq
, first
);
1718 spill_fill_data
.init_after
= emit_insn (seq
);
1722 mem
= gen_rtx_MEM (GET_MODE (reg
), spill_fill_data
.iter_reg
[iter
]);
1724 /* ??? Not all of the spills are for varargs, but some of them are.
1725 The rest of the spills belong in an alias set of their own. But
1726 it doesn't actually hurt to include them here. */
1727 MEM_ALIAS_SET (mem
) = get_varargs_alias_set ();
1729 spill_fill_data
.prev_addr
[iter
] = &XEXP (mem
, 0);
1730 spill_fill_data
.prev_off
[iter
] = cfa_off
;
1732 if (++iter
>= spill_fill_data
.n_iter
)
1734 spill_fill_data
.next_iter
= iter
;
1740 do_spill (move_fn
, reg
, cfa_off
, frame_reg
)
1741 rtx (*move_fn
) PARAMS ((rtx
, rtx
, rtx
));
1743 HOST_WIDE_INT cfa_off
;
1747 mem
= spill_restore_mem (reg
, cfa_off
);
1748 insn
= emit_insn ((*move_fn
) (mem
, reg
, GEN_INT (cfa_off
)));
1755 RTX_FRAME_RELATED_P (insn
) = 1;
1757 /* Don't even pretend that the unwind code can intuit its way
1758 through a pair of interleaved post_modify iterators. Just
1759 provide the correct answer. */
1761 if (frame_pointer_needed
)
1763 base
= hard_frame_pointer_rtx
;
1768 base
= stack_pointer_rtx
;
1769 off
= current_frame_info
.total_size
- cfa_off
;
1773 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1774 gen_rtx_SET (VOIDmode
,
1775 gen_rtx_MEM (GET_MODE (reg
),
1776 plus_constant (base
, off
)),
1783 do_restore (move_fn
, reg
, cfa_off
)
1784 rtx (*move_fn
) PARAMS ((rtx
, rtx
, rtx
));
1786 HOST_WIDE_INT cfa_off
;
1788 emit_insn ((*move_fn
) (reg
, spill_restore_mem (reg
, cfa_off
),
1789 GEN_INT (cfa_off
)));
1792 /* Wrapper functions that discards the CONST_INT spill offset. These
1793 exist so that we can give gr_spill/gr_fill the offset they need and
1794 use a consistant function interface. */
1797 gen_movdi_x (dest
, src
, offset
)
1799 rtx offset ATTRIBUTE_UNUSED
;
1801 return gen_movdi (dest
, src
);
1805 gen_fr_spill_x (dest
, src
, offset
)
1807 rtx offset ATTRIBUTE_UNUSED
;
1809 return gen_fr_spill (dest
, src
);
1813 gen_fr_restore_x (dest
, src
, offset
)
1815 rtx offset ATTRIBUTE_UNUSED
;
1817 return gen_fr_restore (dest
, src
);
1820 /* Called after register allocation to add any instructions needed for the
1821 prologue. Using a prologue insn is favored compared to putting all of the
1822 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
1823 to intermix instructions with the saves of the caller saved registers. In
1824 some cases, it might be necessary to emit a barrier instruction as the last
1825 insn to prevent such scheduling.
1827 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
1828 so that the debug info generation code can handle them properly.
1830 The register save area is layed out like so:
1832 [ varargs spill area ]
1833 [ fr register spill area ]
1834 [ br register spill area ]
1835 [ ar register spill area ]
1836 [ pr register spill area ]
1837 [ gr register spill area ] */
1839 /* ??? Get inefficient code when the frame size is larger than can fit in an
1840 adds instruction. */
1843 ia64_expand_prologue ()
1845 rtx insn
, ar_pfs_save_reg
, ar_unat_save_reg
;
1846 int i
, epilogue_p
, regno
, alt_regno
, cfa_off
, n_varargs
;
1849 ia64_compute_frame_size (get_frame_size ());
1850 last_scratch_gr_reg
= 15;
1852 /* If there is no epilogue, then we don't need some prologue insns.
1853 We need to avoid emitting the dead prologue insns, because flow
1854 will complain about them. */
1859 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
1860 if ((e
->flags
& EDGE_FAKE
) == 0
1861 && (e
->flags
& EDGE_FALLTHRU
) != 0)
1863 epilogue_p
= (e
!= NULL
);
1868 /* Set the local, input, and output register names. We need to do this
1869 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1870 half. If we use in/loc/out register names, then we get assembler errors
1871 in crtn.S because there is no alloc insn or regstk directive in there. */
1872 if (! TARGET_REG_NAMES
)
1874 int inputs
= current_frame_info
.n_input_regs
;
1875 int locals
= current_frame_info
.n_local_regs
;
1876 int outputs
= current_frame_info
.n_output_regs
;
1878 for (i
= 0; i
< inputs
; i
++)
1879 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
1880 for (i
= 0; i
< locals
; i
++)
1881 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
1882 for (i
= 0; i
< outputs
; i
++)
1883 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
1886 /* Set the frame pointer register name. The regnum is logically loc79,
1887 but of course we'll not have allocated that many locals. Rather than
1888 worrying about renumbering the existing rtxs, we adjust the name. */
1889 /* ??? This code means that we can never use one local register when
1890 there is a frame pointer. loc79 gets wasted in this case, as it is
1891 renamed to a register that will never be used. See also the try_locals
1892 code in find_gr_spill. */
1893 if (current_frame_info
.reg_fp
)
1895 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
1896 reg_names
[HARD_FRAME_POINTER_REGNUM
]
1897 = reg_names
[current_frame_info
.reg_fp
];
1898 reg_names
[current_frame_info
.reg_fp
] = tmp
;
1901 /* Fix up the return address placeholder. */
1902 /* ??? We can fail if __builtin_return_address is used, and we didn't
1903 allocate a register in which to save b0. I can't think of a way to
1904 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
1905 then be sure that I got the right one. Further, reload doesn't seem
1906 to care if an eliminable register isn't used, and "eliminates" it
1908 if (regs_ever_live
[RETURN_ADDRESS_POINTER_REGNUM
]
1909 && current_frame_info
.reg_save_b0
!= 0)
1910 XINT (return_address_pointer_rtx
, 0) = current_frame_info
.reg_save_b0
;
1912 /* We don't need an alloc instruction if we've used no outputs or locals. */
1913 if (current_frame_info
.n_local_regs
== 0
1914 && current_frame_info
.n_output_regs
== 0
1915 && current_frame_info
.n_input_regs
<= current_function_args_info
.words
)
1917 /* If there is no alloc, but there are input registers used, then we
1918 need a .regstk directive. */
1919 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
1920 ar_pfs_save_reg
= NULL_RTX
;
1924 current_frame_info
.need_regstk
= 0;
1926 if (current_frame_info
.reg_save_ar_pfs
)
1927 regno
= current_frame_info
.reg_save_ar_pfs
;
1929 regno
= next_scratch_gr_reg ();
1930 ar_pfs_save_reg
= gen_rtx_REG (DImode
, regno
);
1932 insn
= emit_insn (gen_alloc (ar_pfs_save_reg
,
1933 GEN_INT (current_frame_info
.n_input_regs
),
1934 GEN_INT (current_frame_info
.n_local_regs
),
1935 GEN_INT (current_frame_info
.n_output_regs
),
1936 GEN_INT (current_frame_info
.n_rotate_regs
)));
1937 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_pfs
!= 0);
1940 /* Set up frame pointer, stack pointer, and spill iterators. */
1942 n_varargs
= cfun
->machine
->n_varargs
;
1943 setup_spill_pointers (current_frame_info
.n_spilled
+ n_varargs
,
1944 stack_pointer_rtx
, 0);
1946 if (frame_pointer_needed
)
1948 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
1949 RTX_FRAME_RELATED_P (insn
) = 1;
1952 if (current_frame_info
.total_size
!= 0)
1954 rtx frame_size_rtx
= GEN_INT (- current_frame_info
.total_size
);
1957 if (CONST_OK_FOR_I (- current_frame_info
.total_size
))
1958 offset
= frame_size_rtx
;
1961 regno
= next_scratch_gr_reg ();
1962 offset
= gen_rtx_REG (DImode
, regno
);
1963 emit_move_insn (offset
, frame_size_rtx
);
1966 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
,
1967 stack_pointer_rtx
, offset
));
1969 if (! frame_pointer_needed
)
1971 RTX_FRAME_RELATED_P (insn
) = 1;
1972 if (GET_CODE (offset
) != CONST_INT
)
1975 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1976 gen_rtx_SET (VOIDmode
,
1978 gen_rtx_PLUS (DImode
,
1985 /* ??? At this point we must generate a magic insn that appears to
1986 modify the stack pointer, the frame pointer, and all spill
1987 iterators. This would allow the most scheduling freedom. For
1988 now, just hard stop. */
1989 emit_insn (gen_blockage ());
1992 /* Must copy out ar.unat before doing any integer spills. */
1993 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
1995 if (current_frame_info
.reg_save_ar_unat
)
1997 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2000 alt_regno
= next_scratch_gr_reg ();
2001 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2002 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2005 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2006 insn
= emit_move_insn (ar_unat_save_reg
, reg
);
2007 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_unat
!= 0);
2009 /* Even if we're not going to generate an epilogue, we still
2010 need to save the register so that EH works. */
2011 if (! epilogue_p
&& current_frame_info
.reg_save_ar_unat
)
2012 emit_insn (gen_rtx_USE (VOIDmode
, ar_unat_save_reg
));
2015 ar_unat_save_reg
= NULL_RTX
;
2017 /* Spill all varargs registers. Do this before spilling any GR registers,
2018 since we want the UNAT bits for the GR registers to override the UNAT
2019 bits from varargs, which we don't care about. */
2022 for (regno
= GR_ARG_FIRST
+ 7; n_varargs
> 0; --n_varargs
, --regno
)
2024 reg
= gen_rtx_REG (DImode
, regno
);
2025 do_spill (gen_gr_spill
, reg
, cfa_off
+= 8, NULL_RTX
);
2028 /* Locate the bottom of the register save area. */
2029 cfa_off
= (current_frame_info
.spill_cfa_off
2030 + current_frame_info
.spill_size
2031 + current_frame_info
.extra_spill_size
);
2033 /* Save the predicate register block either in a register or in memory. */
2034 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2036 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2037 if (current_frame_info
.reg_save_pr
!= 0)
2039 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2040 insn
= emit_move_insn (alt_reg
, reg
);
2042 /* ??? Denote pr spill/fill by a DImode move that modifies all
2043 64 hard registers. */
2044 RTX_FRAME_RELATED_P (insn
) = 1;
2046 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2047 gen_rtx_SET (VOIDmode
, alt_reg
, reg
),
2050 /* Even if we're not going to generate an epilogue, we still
2051 need to save the register so that EH works. */
2053 emit_insn (gen_rtx_USE (VOIDmode
, alt_reg
));
2057 alt_regno
= next_scratch_gr_reg ();
2058 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2059 insn
= emit_move_insn (alt_reg
, reg
);
2060 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2065 /* Handle AR regs in numerical order. All of them get special handling. */
2066 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
2067 && current_frame_info
.reg_save_ar_unat
== 0)
2069 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2070 do_spill (gen_movdi_x
, ar_unat_save_reg
, cfa_off
, reg
);
2074 /* The alloc insn already copied ar.pfs into a general register. The
2075 only thing we have to do now is copy that register to a stack slot
2076 if we'd not allocated a local register for the job. */
2077 if (current_frame_info
.reg_save_ar_pfs
== 0
2078 && ! current_function_is_leaf
)
2080 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2081 do_spill (gen_movdi_x
, ar_pfs_save_reg
, cfa_off
, reg
);
2085 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2087 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2088 if (current_frame_info
.reg_save_ar_lc
!= 0)
2090 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2091 insn
= emit_move_insn (alt_reg
, reg
);
2092 RTX_FRAME_RELATED_P (insn
) = 1;
2094 /* Even if we're not going to generate an epilogue, we still
2095 need to save the register so that EH works. */
2097 emit_insn (gen_rtx_USE (VOIDmode
, alt_reg
));
2101 alt_regno
= next_scratch_gr_reg ();
2102 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2103 emit_move_insn (alt_reg
, reg
);
2104 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2109 /* We should now be at the base of the gr/br/fr spill area. */
2110 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2111 + current_frame_info
.spill_size
))
2114 /* Spill all general registers. */
2115 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2116 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2118 reg
= gen_rtx_REG (DImode
, regno
);
2119 do_spill (gen_gr_spill
, reg
, cfa_off
, reg
);
2123 /* Handle BR0 specially -- it may be getting stored permanently in
2124 some GR register. */
2125 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2127 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2128 if (current_frame_info
.reg_save_b0
!= 0)
2130 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2131 insn
= emit_move_insn (alt_reg
, reg
);
2132 RTX_FRAME_RELATED_P (insn
) = 1;
2134 /* Even if we're not going to generate an epilogue, we still
2135 need to save the register so that EH works. */
2137 emit_insn (gen_rtx_USE (VOIDmode
, alt_reg
));
2141 alt_regno
= next_scratch_gr_reg ();
2142 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2143 emit_move_insn (alt_reg
, reg
);
2144 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2149 /* Spill the rest of the BR registers. */
2150 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2151 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2153 alt_regno
= next_scratch_gr_reg ();
2154 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2155 reg
= gen_rtx_REG (DImode
, regno
);
2156 emit_move_insn (alt_reg
, reg
);
2157 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2161 /* Align the frame and spill all FR registers. */
2162 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2163 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2167 reg
= gen_rtx_REG (TFmode
, regno
);
2168 do_spill (gen_fr_spill_x
, reg
, cfa_off
, reg
);
2172 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2175 finish_spill_pointers ();
2178 /* Called after register allocation to add any instructions needed for the
2179 epilogue. Using a epilogue insn is favored compared to putting all of the
2180 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
2181 to intermix instructions with the saves of the caller saved registers. In
2182 some cases, it might be necessary to emit a barrier instruction as the last
2183 insn to prevent such scheduling. */
2186 ia64_expand_epilogue (sibcall_p
)
2189 rtx insn
, reg
, alt_reg
, ar_unat_save_reg
;
2190 int regno
, alt_regno
, cfa_off
;
2192 ia64_compute_frame_size (get_frame_size ());
2194 /* If there is a frame pointer, then we use it instead of the stack
2195 pointer, so that the stack pointer does not need to be valid when
2196 the epilogue starts. See EXIT_IGNORE_STACK. */
2197 if (frame_pointer_needed
)
2198 setup_spill_pointers (current_frame_info
.n_spilled
,
2199 hard_frame_pointer_rtx
, 0);
2201 setup_spill_pointers (current_frame_info
.n_spilled
, stack_pointer_rtx
,
2202 current_frame_info
.total_size
);
2204 if (current_frame_info
.total_size
!= 0)
2206 /* ??? At this point we must generate a magic insn that appears to
2207 modify the spill iterators and the frame pointer. This would
2208 allow the most scheduling freedom. For now, just hard stop. */
2209 emit_insn (gen_blockage ());
2212 /* Locate the bottom of the register save area. */
2213 cfa_off
= (current_frame_info
.spill_cfa_off
2214 + current_frame_info
.spill_size
2215 + current_frame_info
.extra_spill_size
);
2217 /* Restore the predicate registers. */
2218 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2220 if (current_frame_info
.reg_save_pr
!= 0)
2221 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2224 alt_regno
= next_scratch_gr_reg ();
2225 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2226 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2229 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2230 emit_move_insn (reg
, alt_reg
);
2233 /* Restore the application registers. */
2235 /* Load the saved unat from the stack, but do not restore it until
2236 after the GRs have been restored. */
2237 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2239 if (current_frame_info
.reg_save_ar_unat
!= 0)
2241 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2244 alt_regno
= next_scratch_gr_reg ();
2245 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2246 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2247 do_restore (gen_movdi_x
, ar_unat_save_reg
, cfa_off
);
2252 ar_unat_save_reg
= NULL_RTX
;
2254 if (current_frame_info
.reg_save_ar_pfs
!= 0)
2256 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_pfs
);
2257 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2258 emit_move_insn (reg
, alt_reg
);
2260 else if (! current_function_is_leaf
)
2262 alt_regno
= next_scratch_gr_reg ();
2263 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2264 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2266 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2267 emit_move_insn (reg
, alt_reg
);
2270 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2272 if (current_frame_info
.reg_save_ar_lc
!= 0)
2273 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2276 alt_regno
= next_scratch_gr_reg ();
2277 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2278 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2281 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2282 emit_move_insn (reg
, alt_reg
);
2285 /* We should now be at the base of the gr/br/fr spill area. */
2286 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2287 + current_frame_info
.spill_size
))
2290 /* Restore all general registers. */
2291 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2292 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2294 reg
= gen_rtx_REG (DImode
, regno
);
2295 do_restore (gen_gr_restore
, reg
, cfa_off
);
2299 /* Restore the branch registers. Handle B0 specially, as it may
2300 have gotten stored in some GR register. */
2301 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2303 if (current_frame_info
.reg_save_b0
!= 0)
2304 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2307 alt_regno
= next_scratch_gr_reg ();
2308 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2309 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2312 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2313 emit_move_insn (reg
, alt_reg
);
2316 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2317 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2319 alt_regno
= next_scratch_gr_reg ();
2320 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2321 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2323 reg
= gen_rtx_REG (DImode
, regno
);
2324 emit_move_insn (reg
, alt_reg
);
2327 /* Restore floating point registers. */
2328 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2329 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2333 reg
= gen_rtx_REG (TFmode
, regno
);
2334 do_restore (gen_fr_restore_x
, reg
, cfa_off
);
2338 /* Restore ar.unat for real. */
2339 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2341 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2342 emit_move_insn (reg
, ar_unat_save_reg
);
2345 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2348 finish_spill_pointers ();
2350 if (current_frame_info
.total_size
|| cfun
->machine
->ia64_eh_epilogue_sp
)
2352 /* ??? At this point we must generate a magic insn that appears to
2353 modify the spill iterators, the stack pointer, and the frame
2354 pointer. This would allow the most scheduling freedom. For now,
2356 emit_insn (gen_blockage ());
2359 if (cfun
->machine
->ia64_eh_epilogue_sp
)
2360 emit_move_insn (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
);
2361 else if (frame_pointer_needed
)
2363 insn
= emit_move_insn (stack_pointer_rtx
, hard_frame_pointer_rtx
);
2364 RTX_FRAME_RELATED_P (insn
) = 1;
2366 else if (current_frame_info
.total_size
)
2368 rtx offset
, frame_size_rtx
;
2370 frame_size_rtx
= GEN_INT (current_frame_info
.total_size
);
2371 if (CONST_OK_FOR_I (current_frame_info
.total_size
))
2372 offset
= frame_size_rtx
;
2375 regno
= next_scratch_gr_reg ();
2376 offset
= gen_rtx_REG (DImode
, regno
);
2377 emit_move_insn (offset
, frame_size_rtx
);
2380 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
2383 RTX_FRAME_RELATED_P (insn
) = 1;
2384 if (GET_CODE (offset
) != CONST_INT
)
2387 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2388 gen_rtx_SET (VOIDmode
,
2390 gen_rtx_PLUS (DImode
,
2397 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
2398 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
2401 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
2404 /* Return 1 if br.ret can do all the work required to return from a
2408 ia64_direct_return ()
2410 if (reload_completed
&& ! frame_pointer_needed
)
2412 ia64_compute_frame_size (get_frame_size ());
2414 return (current_frame_info
.total_size
== 0
2415 && current_frame_info
.n_spilled
== 0
2416 && current_frame_info
.reg_save_b0
== 0
2417 && current_frame_info
.reg_save_pr
== 0
2418 && current_frame_info
.reg_save_ar_pfs
== 0
2419 && current_frame_info
.reg_save_ar_unat
== 0
2420 && current_frame_info
.reg_save_ar_lc
== 0);
2426 ia64_hard_regno_rename_ok (from
, to
)
2430 /* Don't clobber any of the registers we reserved for the prologue. */
2431 if (to
== current_frame_info
.reg_fp
2432 || to
== current_frame_info
.reg_save_b0
2433 || to
== current_frame_info
.reg_save_pr
2434 || to
== current_frame_info
.reg_save_ar_pfs
2435 || to
== current_frame_info
.reg_save_ar_unat
2436 || to
== current_frame_info
.reg_save_ar_lc
)
2439 if (from
== current_frame_info
.reg_fp
2440 || from
== current_frame_info
.reg_save_b0
2441 || from
== current_frame_info
.reg_save_pr
2442 || from
== current_frame_info
.reg_save_ar_pfs
2443 || from
== current_frame_info
.reg_save_ar_unat
2444 || from
== current_frame_info
.reg_save_ar_lc
)
2447 /* Don't use output registers outside the register frame. */
2448 if (OUT_REGNO_P (to
) && to
>= OUT_REG (current_frame_info
.n_output_regs
))
2451 /* Retain even/oddness on predicate register pairs. */
2452 if (PR_REGNO_P (from
) && PR_REGNO_P (to
))
2453 return (from
& 1) == (to
& 1);
2455 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2456 if (from
== GR_REG (4) && current_function_calls_setjmp
)
2462 /* Emit the function prologue. */
2465 ia64_function_prologue (file
, size
)
2467 int size ATTRIBUTE_UNUSED
;
2469 int mask
, grsave
, grsave_prev
;
2471 if (current_frame_info
.need_regstk
)
2472 fprintf (file
, "\t.regstk %d, %d, %d, %d\n",
2473 current_frame_info
.n_input_regs
,
2474 current_frame_info
.n_local_regs
,
2475 current_frame_info
.n_output_regs
,
2476 current_frame_info
.n_rotate_regs
);
2478 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
2481 /* Emit the .prologue directive. */
2484 grsave
= grsave_prev
= 0;
2485 if (current_frame_info
.reg_save_b0
!= 0)
2488 grsave
= grsave_prev
= current_frame_info
.reg_save_b0
;
2490 if (current_frame_info
.reg_save_ar_pfs
!= 0
2491 && (grsave_prev
== 0
2492 || current_frame_info
.reg_save_ar_pfs
== grsave_prev
+ 1))
2495 if (grsave_prev
== 0)
2496 grsave
= current_frame_info
.reg_save_ar_pfs
;
2497 grsave_prev
= current_frame_info
.reg_save_ar_pfs
;
2499 if (current_frame_info
.reg_fp
!= 0
2500 && (grsave_prev
== 0
2501 || current_frame_info
.reg_fp
== grsave_prev
+ 1))
2504 if (grsave_prev
== 0)
2505 grsave
= HARD_FRAME_POINTER_REGNUM
;
2506 grsave_prev
= current_frame_info
.reg_fp
;
2508 if (current_frame_info
.reg_save_pr
!= 0
2509 && (grsave_prev
== 0
2510 || current_frame_info
.reg_save_pr
== grsave_prev
+ 1))
2513 if (grsave_prev
== 0)
2514 grsave
= current_frame_info
.reg_save_pr
;
2518 fprintf (file
, "\t.prologue %d, %d\n", mask
,
2519 ia64_dbx_register_number (grsave
));
2521 fputs ("\t.prologue\n", file
);
2523 /* Emit a .spill directive, if necessary, to relocate the base of
2524 the register spill area. */
2525 if (current_frame_info
.spill_cfa_off
!= -16)
2526 fprintf (file
, "\t.spill %ld\n",
2527 (long) (current_frame_info
.spill_cfa_off
2528 + current_frame_info
.spill_size
));
2531 /* Emit the .body directive at the scheduled end of the prologue. */
2534 ia64_output_end_prologue (file
)
2537 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
2540 fputs ("\t.body\n", file
);
2543 /* Emit the function epilogue. */
2546 ia64_function_epilogue (file
, size
)
2547 FILE *file ATTRIBUTE_UNUSED
;
2548 int size ATTRIBUTE_UNUSED
;
2552 /* Reset from the function's potential modifications. */
2553 XINT (return_address_pointer_rtx
, 0) = RETURN_ADDRESS_POINTER_REGNUM
;
2555 if (current_frame_info
.reg_fp
)
2557 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
2558 reg_names
[HARD_FRAME_POINTER_REGNUM
]
2559 = reg_names
[current_frame_info
.reg_fp
];
2560 reg_names
[current_frame_info
.reg_fp
] = tmp
;
2562 if (! TARGET_REG_NAMES
)
2564 for (i
= 0; i
< current_frame_info
.n_input_regs
; i
++)
2565 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
2566 for (i
= 0; i
< current_frame_info
.n_local_regs
; i
++)
2567 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
2568 for (i
= 0; i
< current_frame_info
.n_output_regs
; i
++)
2569 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
2572 current_frame_info
.initialized
= 0;
2576 ia64_dbx_register_number (regno
)
2579 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2580 from its home at loc79 to something inside the register frame. We
2581 must perform the same renumbering here for the debug info. */
2582 if (current_frame_info
.reg_fp
)
2584 if (regno
== HARD_FRAME_POINTER_REGNUM
)
2585 regno
= current_frame_info
.reg_fp
;
2586 else if (regno
== current_frame_info
.reg_fp
)
2587 regno
= HARD_FRAME_POINTER_REGNUM
;
2590 if (IN_REGNO_P (regno
))
2591 return 32 + regno
- IN_REG (0);
2592 else if (LOC_REGNO_P (regno
))
2593 return 32 + current_frame_info
.n_input_regs
+ regno
- LOC_REG (0);
2594 else if (OUT_REGNO_P (regno
))
2595 return (32 + current_frame_info
.n_input_regs
2596 + current_frame_info
.n_local_regs
+ regno
- OUT_REG (0));
2602 ia64_initialize_trampoline (addr
, fnaddr
, static_chain
)
2603 rtx addr
, fnaddr
, static_chain
;
2605 rtx addr_reg
, eight
= GEN_INT (8);
2607 /* Load up our iterator. */
2608 addr_reg
= gen_reg_rtx (Pmode
);
2609 emit_move_insn (addr_reg
, addr
);
2611 /* The first two words are the fake descriptor:
2612 __ia64_trampoline, ADDR+16. */
2613 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
2614 gen_rtx_SYMBOL_REF (Pmode
, "__ia64_trampoline"));
2615 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2617 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
2618 copy_to_reg (plus_constant (addr
, 16)));
2619 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2621 /* The third word is the target descriptor. */
2622 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), fnaddr
);
2623 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2625 /* The fourth word is the static chain. */
2626 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), static_chain
);
2629 /* Do any needed setup for a variadic function. CUM has not been updated
2630 for the last named argument which has type TYPE and mode MODE.
2632 We generate the actual spill instructions during prologue generation. */
2635 ia64_setup_incoming_varargs (cum
, int_mode
, type
, pretend_size
, second_time
)
2636 CUMULATIVE_ARGS cum
;
2640 int second_time ATTRIBUTE_UNUSED
;
2642 /* If this is a stdarg function, then skip the current argument. */
2643 if (! current_function_varargs
)
2644 ia64_function_arg_advance (&cum
, int_mode
, type
, 1);
2646 if (cum
.words
< MAX_ARGUMENT_SLOTS
)
2648 int n
= MAX_ARGUMENT_SLOTS
- cum
.words
;
2649 *pretend_size
= n
* UNITS_PER_WORD
;
2650 cfun
->machine
->n_varargs
= n
;
2654 /* Check whether TYPE is a homogeneous floating point aggregate. If
2655 it is, return the mode of the floating point type that appears
2656 in all leafs. If it is not, return VOIDmode.
2658 An aggregate is a homogeneous floating point aggregate is if all
2659 fields/elements in it have the same floating point type (e.g,
2660 SFmode). 128-bit quad-precision floats are excluded. */
2662 static enum machine_mode
2663 hfa_element_mode (type
, nested
)
2667 enum machine_mode element_mode
= VOIDmode
;
2668 enum machine_mode mode
;
2669 enum tree_code code
= TREE_CODE (type
);
2670 int know_element_mode
= 0;
2675 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
2676 case BOOLEAN_TYPE
: case CHAR_TYPE
: case POINTER_TYPE
:
2677 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
2678 case FILE_TYPE
: case SET_TYPE
: case LANG_TYPE
:
2682 /* Fortran complex types are supposed to be HFAs, so we need to handle
2683 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2686 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
)
2687 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type
))
2688 * BITS_PER_UNIT
, MODE_FLOAT
, 0);
2693 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2694 mode if this is contained within an aggregate. */
2696 return TYPE_MODE (type
);
2701 return TYPE_MODE (TREE_TYPE (type
));
2705 case QUAL_UNION_TYPE
:
2706 for (t
= TYPE_FIELDS (type
); t
; t
= TREE_CHAIN (t
))
2708 if (TREE_CODE (t
) != FIELD_DECL
)
2711 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
2712 if (know_element_mode
)
2714 if (mode
!= element_mode
)
2717 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
2721 know_element_mode
= 1;
2722 element_mode
= mode
;
2725 return element_mode
;
2728 /* If we reach here, we probably have some front-end specific type
2729 that the backend doesn't know about. This can happen via the
2730 aggregate_value_p call in init_function_start. All we can do is
2731 ignore unknown tree types. */
2738 /* Return rtx for register where argument is passed, or zero if it is passed
2741 /* ??? 128-bit quad-precision floats are always passed in general
2745 ia64_function_arg (cum
, mode
, type
, named
, incoming
)
2746 CUMULATIVE_ARGS
*cum
;
2747 enum machine_mode mode
;
2752 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
2753 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
2754 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
2757 enum machine_mode hfa_mode
= VOIDmode
;
2759 /* Integer and float arguments larger than 8 bytes start at the next even
2760 boundary. Aggregates larger than 8 bytes start at the next even boundary
2761 if the aggregate has 16 byte alignment. Net effect is that types with
2762 alignment greater than 8 start at the next even boundary. */
2763 /* ??? The ABI does not specify how to handle aggregates with alignment from
2764 9 to 15 bytes, or greater than 16. We handle them all as if they had
2765 16 byte alignment. Such aggregates can occur only if gcc extensions are
2767 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
2769 && (cum
->words
& 1))
2772 /* If all argument slots are used, then it must go on the stack. */
2773 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
2776 /* Check for and handle homogeneous FP aggregates. */
2778 hfa_mode
= hfa_element_mode (type
, 0);
2780 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2781 and unprototyped hfas are passed specially. */
2782 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
2786 int fp_regs
= cum
->fp_regs
;
2787 int int_regs
= cum
->words
+ offset
;
2788 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
2792 /* If prototyped, pass it in FR regs then GR regs.
2793 If not prototyped, pass it in both FR and GR regs.
2795 If this is an SFmode aggregate, then it is possible to run out of
2796 FR regs while GR regs are still left. In that case, we pass the
2797 remaining part in the GR regs. */
2799 /* Fill the FP regs. We do this always. We stop if we reach the end
2800 of the argument, the last FP register, or the last argument slot. */
2802 byte_size
= ((mode
== BLKmode
)
2803 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
2804 args_byte_size
= int_regs
* UNITS_PER_WORD
;
2806 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
2807 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
2809 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
2810 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
2814 args_byte_size
+= hfa_size
;
2818 /* If no prototype, then the whole thing must go in GR regs. */
2819 if (! cum
->prototype
)
2821 /* If this is an SFmode aggregate, then we might have some left over
2822 that needs to go in GR regs. */
2823 else if (byte_size
!= offset
)
2824 int_regs
+= offset
/ UNITS_PER_WORD
;
2826 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
2828 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
2830 enum machine_mode gr_mode
= DImode
;
2832 /* If we have an odd 4 byte hunk because we ran out of FR regs,
2833 then this goes in a GR reg left adjusted/little endian, right
2834 adjusted/big endian. */
2835 /* ??? Currently this is handled wrong, because 4-byte hunks are
2836 always right adjusted/little endian. */
2839 /* If we have an even 4 byte hunk because the aggregate is a
2840 multiple of 4 bytes in size, then this goes in a GR reg right
2841 adjusted/little endian. */
2842 else if (byte_size
- offset
== 4)
2845 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
2846 gen_rtx_REG (gr_mode
, (basereg
2849 offset
+= GET_MODE_SIZE (gr_mode
);
2853 /* If we ended up using just one location, just return that one loc. */
2855 return XEXP (loc
[0], 0);
2857 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
2860 /* Integral and aggregates go in general registers. If we have run out of
2861 FR registers, then FP values must also go in general registers. This can
2862 happen when we have a SFmode HFA. */
2863 else if (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
)
2864 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
2866 /* If there is a prototype, then FP values go in a FR register when
2867 named, and in a GR registeer when unnamed. */
2868 else if (cum
->prototype
)
2871 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
2873 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
2875 /* If there is no prototype, then FP values go in both FR and GR
2879 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
2880 gen_rtx_REG (mode
, (FR_ARG_FIRST
2883 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
2885 (basereg
+ cum
->words
2889 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, fp_reg
, gr_reg
));
2893 /* Return number of words, at the beginning of the argument, that must be
2894 put in registers. 0 is the argument is entirely in registers or entirely
2898 ia64_function_arg_partial_nregs (cum
, mode
, type
, named
)
2899 CUMULATIVE_ARGS
*cum
;
2900 enum machine_mode mode
;
2902 int named ATTRIBUTE_UNUSED
;
2904 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
2905 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
2909 /* Arguments with alignment larger than 8 bytes start at the next even
2911 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
2913 && (cum
->words
& 1))
2916 /* If all argument slots are used, then it must go on the stack. */
2917 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
2920 /* It doesn't matter whether the argument goes in FR or GR regs. If
2921 it fits within the 8 argument slots, then it goes entirely in
2922 registers. If it extends past the last argument slot, then the rest
2923 goes on the stack. */
2925 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
2928 return MAX_ARGUMENT_SLOTS
- cum
->words
- offset
;
2931 /* Update CUM to point after this argument. This is patterned after
2932 ia64_function_arg. */
2935 ia64_function_arg_advance (cum
, mode
, type
, named
)
2936 CUMULATIVE_ARGS
*cum
;
2937 enum machine_mode mode
;
2941 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
2942 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
2945 enum machine_mode hfa_mode
= VOIDmode
;
2947 /* If all arg slots are already full, then there is nothing to do. */
2948 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
2951 /* Arguments with alignment larger than 8 bytes start at the next even
2953 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
2955 && (cum
->words
& 1))
2958 cum
->words
+= words
+ offset
;
2960 /* Check for and handle homogeneous FP aggregates. */
2962 hfa_mode
= hfa_element_mode (type
, 0);
2964 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2965 and unprototyped hfas are passed specially. */
2966 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
2968 int fp_regs
= cum
->fp_regs
;
2969 /* This is the original value of cum->words + offset. */
2970 int int_regs
= cum
->words
- words
;
2971 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
2975 /* If prototyped, pass it in FR regs then GR regs.
2976 If not prototyped, pass it in both FR and GR regs.
2978 If this is an SFmode aggregate, then it is possible to run out of
2979 FR regs while GR regs are still left. In that case, we pass the
2980 remaining part in the GR regs. */
2982 /* Fill the FP regs. We do this always. We stop if we reach the end
2983 of the argument, the last FP register, or the last argument slot. */
2985 byte_size
= ((mode
== BLKmode
)
2986 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
2987 args_byte_size
= int_regs
* UNITS_PER_WORD
;
2989 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
2990 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
2993 args_byte_size
+= hfa_size
;
2997 cum
->fp_regs
= fp_regs
;
3000 /* Integral and aggregates go in general registers. If we have run out of
3001 FR registers, then FP values must also go in general registers. This can
3002 happen when we have a SFmode HFA. */
3003 else if (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
)
3006 /* If there is a prototype, then FP values go in a FR register when
3007 named, and in a GR registeer when unnamed. */
3008 else if (cum
->prototype
)
3013 /* ??? Complex types should not reach here. */
3014 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3016 /* If there is no prototype, then FP values go in both FR and GR
3019 /* ??? Complex types should not reach here. */
3020 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3025 /* Implement va_start. */
3028 ia64_va_start (stdarg_p
, valist
, nextarg
)
3036 arg_words
= current_function_args_info
.words
;
3041 ofs
= (arg_words
>= MAX_ARGUMENT_SLOTS
? -UNITS_PER_WORD
: 0);
3043 nextarg
= plus_constant (nextarg
, ofs
);
3044 std_expand_builtin_va_start (1, valist
, nextarg
);
3047 /* Implement va_arg. */
3050 ia64_va_arg (valist
, type
)
3055 /* Arguments with alignment larger than 8 bytes start at the next even
3057 if (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3059 t
= build (PLUS_EXPR
, TREE_TYPE (valist
), valist
,
3060 build_int_2 (2 * UNITS_PER_WORD
- 1, 0));
3061 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
3062 build_int_2 (-2 * UNITS_PER_WORD
, -1));
3063 t
= build (MODIFY_EXPR
, TREE_TYPE (valist
), valist
, t
);
3064 TREE_SIDE_EFFECTS (t
) = 1;
3065 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3068 return std_expand_builtin_va_arg (valist
, type
);
3071 /* Return 1 if function return value returned in memory. Return 0 if it is
3075 ia64_return_in_memory (valtype
)
3078 enum machine_mode mode
;
3079 enum machine_mode hfa_mode
;
3082 mode
= TYPE_MODE (valtype
);
3083 byte_size
= ((mode
== BLKmode
)
3084 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
3086 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3088 hfa_mode
= hfa_element_mode (valtype
, 0);
3089 if (hfa_mode
!= VOIDmode
)
3091 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3093 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
3099 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
3105 /* Return rtx for register that holds the function return value. */
3108 ia64_function_value (valtype
, func
)
3110 tree func ATTRIBUTE_UNUSED
;
3112 enum machine_mode mode
;
3113 enum machine_mode hfa_mode
;
3115 mode
= TYPE_MODE (valtype
);
3116 hfa_mode
= hfa_element_mode (valtype
, 0);
3118 if (hfa_mode
!= VOIDmode
)
3126 hfa_size
= GET_MODE_SIZE (hfa_mode
);
3127 byte_size
= ((mode
== BLKmode
)
3128 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
3130 for (i
= 0; offset
< byte_size
; i
++)
3132 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3133 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
3139 return XEXP (loc
[0], 0);
3141 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3143 else if (FLOAT_TYPE_P (valtype
))
3144 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
3146 return gen_rtx_REG (mode
, GR_RET_FIRST
);
3149 /* Print a memory address as an operand to reference that memory location. */
3151 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3152 also call this from ia64_print_operand for memory addresses. */
3155 ia64_print_operand_address (stream
, address
)
3156 FILE * stream ATTRIBUTE_UNUSED
;
3157 rtx address ATTRIBUTE_UNUSED
;
3161 /* Print an operand to a assembler instruction.
3162 C Swap and print a comparison operator.
3163 D Print an FP comparison operator.
3164 E Print 32 - constant, for SImode shifts as extract.
3165 e Print 64 - constant, for DImode rotates.
3166 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3167 a floating point register emitted normally.
3168 I Invert a predicate register by adding 1.
3169 J Select the proper predicate register for a condition.
3170 j Select the inverse predicate register for a condition.
3171 O Append .acq for volatile load.
3172 P Postincrement of a MEM.
3173 Q Append .rel for volatile store.
3174 S Shift amount for shladd instruction.
3175 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3176 for Intel assembler.
3177 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3178 for Intel assembler.
3179 r Print register name, or constant 0 as r0. HP compatibility for
3182 ia64_print_operand (file
, x
, code
)
3192 /* Handled below. */
3197 enum rtx_code c
= swap_condition (GET_CODE (x
));
3198 fputs (GET_RTX_NAME (c
), file
);
3203 switch (GET_CODE (x
))
3215 str
= GET_RTX_NAME (GET_CODE (x
));
3222 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
3226 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - INTVAL (x
));
3230 if (x
== CONST0_RTX (GET_MODE (x
)))
3231 str
= reg_names
[FR_REG (0)];
3232 else if (x
== CONST1_RTX (GET_MODE (x
)))
3233 str
= reg_names
[FR_REG (1)];
3234 else if (GET_CODE (x
) == REG
)
3235 str
= reg_names
[REGNO (x
)];
3242 fputs (reg_names
[REGNO (x
) + 1], file
);
3248 unsigned int regno
= REGNO (XEXP (x
, 0));
3249 if (GET_CODE (x
) == EQ
)
3253 fputs (reg_names
[regno
], file
);
3258 if (MEM_VOLATILE_P (x
))
3259 fputs(".acq", file
);
3264 HOST_WIDE_INT value
;
3266 switch (GET_CODE (XEXP (x
, 0)))
3272 x
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
3273 if (GET_CODE (x
) == CONST_INT
)
3275 else if (GET_CODE (x
) == REG
)
3277 fprintf (file
, ", %s", reg_names
[REGNO (x
)]);
3285 value
= GET_MODE_SIZE (GET_MODE (x
));
3289 value
= - (HOST_WIDE_INT
) GET_MODE_SIZE (GET_MODE (x
));
3295 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, value
);
3300 if (MEM_VOLATILE_P (x
))
3301 fputs(".rel", file
);
3305 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
3309 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
3311 fprintf (file
, "0x%x", (int) INTVAL (x
) & 0xffffffff);
3317 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
3319 const char *prefix
= "0x";
3320 if (INTVAL (x
) & 0x80000000)
3322 fprintf (file
, "0xffffffff");
3325 fprintf (file
, "%s%x", prefix
, (int) INTVAL (x
) & 0xffffffff);
3331 /* If this operand is the constant zero, write it as register zero.
3332 Any register, zero, or CONST_INT value is OK here. */
3333 if (GET_CODE (x
) == REG
)
3334 fputs (reg_names
[REGNO (x
)], file
);
3335 else if (x
== CONST0_RTX (GET_MODE (x
)))
3337 else if (GET_CODE (x
) == CONST_INT
)
3338 output_addr_const (file
, x
);
3340 output_operand_lossage ("invalid %%r value");
3347 /* For conditional branches, returns or calls, substitute
3348 sptk, dptk, dpnt, or spnt for %s. */
3349 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
3352 int pred_val
= INTVAL (XEXP (x
, 0));
3354 /* Guess top and bottom 10% statically predicted. */
3355 if (pred_val
< REG_BR_PROB_BASE
/ 50)
3357 else if (pred_val
< REG_BR_PROB_BASE
/ 2)
3359 else if (pred_val
< REG_BR_PROB_BASE
/ 100 * 98)
3364 else if (GET_CODE (current_output_insn
) == CALL_INSN
)
3369 fputs (which
, file
);
3374 x
= current_insn_predicate
;
3377 unsigned int regno
= REGNO (XEXP (x
, 0));
3378 if (GET_CODE (x
) == EQ
)
3380 fprintf (file
, "(%s) ", reg_names
[regno
]);
3385 output_operand_lossage ("ia64_print_operand: unknown code");
3389 switch (GET_CODE (x
))
3391 /* This happens for the spill/restore instructions. */
3396 /* ... fall through ... */
3399 fputs (reg_names
[REGNO (x
)], file
);
3404 rtx addr
= XEXP (x
, 0);
3405 if (GET_RTX_CLASS (GET_CODE (addr
)) == 'a')
3406 addr
= XEXP (addr
, 0);
3407 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
3412 output_addr_const (file
, x
);
3419 /* Calulate the cost of moving data from a register in class FROM to
3423 ia64_register_move_cost (from
, to
)
3424 enum reg_class from
, to
;
3426 int from_hard
, to_hard
;
3431 from_hard
= (from
== BR_REGS
|| from
== AR_M_REGS
|| from
== AR_I_REGS
);
3432 to_hard
= (to
== BR_REGS
|| to
== AR_M_REGS
|| to
== AR_I_REGS
);
3433 from_gr
= (from
== GENERAL_REGS
);
3434 to_gr
= (to
== GENERAL_REGS
);
3435 from_fr
= (from
== FR_REGS
);
3436 to_fr
= (to
== FR_REGS
);
3437 from_pr
= (from
== PR_REGS
);
3438 to_pr
= (to
== PR_REGS
);
3440 if (from_hard
&& to_hard
)
3442 else if ((from_hard
&& !to_gr
) || (!from_gr
&& to_hard
))
3445 /* Moving between PR registers takes two insns. */
3446 else if (from_pr
&& to_pr
)
3448 /* Moving between PR and anything but GR is impossible. */
3449 else if ((from_pr
&& !to_gr
) || (!from_gr
&& to_pr
))
3452 /* ??? Moving from FR<->GR must be more expensive than 2, so that we get
3453 secondary memory reloads for TFmode moves. Unfortunately, we don't
3454 have the mode here, so we can't check that. */
3455 /* Moreover, we have to make this at least as high as MEMORY_MOVE_COST
3456 to avoid spectacularly poor register class preferencing for TFmode. */
3457 else if (from_fr
!= to_fr
)
3463 /* This function returns the register class required for a secondary
3464 register when copying between one of the registers in CLASS, and X,
3465 using MODE. A return value of NO_REGS means that no secondary register
3469 ia64_secondary_reload_class (class, mode
, x
)
3470 enum reg_class
class;
3471 enum machine_mode mode ATTRIBUTE_UNUSED
;
3476 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
3477 regno
= true_regnum (x
);
3482 /* ??? This is required because of a bad gcse/cse/global interaction.
3483 We end up with two pseudos with overlapping lifetimes both of which
3484 are equiv to the same constant, and both which need to be in BR_REGS.
3485 This results in a BR_REGS to BR_REGS copy which doesn't exist. To
3486 reproduce, return NO_REGS here, and compile divdi3 in libgcc2.c.
3487 This seems to be a cse bug. cse_basic_block_end changes depending
3488 on the path length, which means the qty_first_reg check in
3489 make_regs_eqv can give different answers at different times. */
3490 /* ??? At some point I'll probably need a reload_indi pattern to handle
3492 if (BR_REGNO_P (regno
))
3495 /* This is needed if a pseudo used as a call_operand gets spilled to a
3497 if (GET_CODE (x
) == MEM
)
3502 /* This can happen when a paradoxical subreg is an operand to the
3504 /* ??? This shouldn't be necessary after instruction scheduling is
3505 enabled, because paradoxical subregs are not accepted by
3506 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3507 stop the paradoxical subreg stupidity in the *_operand functions
3509 if (GET_CODE (x
) == MEM
3510 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
3511 || GET_MODE (x
) == QImode
))
3514 /* This can happen because of the ior/and/etc patterns that accept FP
3515 registers as operands. If the third operand is a constant, then it
3516 needs to be reloaded into a FP register. */
3517 if (GET_CODE (x
) == CONST_INT
)
3520 /* This can happen because of register elimination in a muldi3 insn.
3521 E.g. `26107 * (unsigned long)&u'. */
3522 if (GET_CODE (x
) == PLUS
)
3527 /* ??? This happens if we cse/gcse a BImode value across a call,
3528 and the function has a nonlocal goto. This is because global
3529 does not allocate call crossing pseudos to hard registers when
3530 current_function_has_nonlocal_goto is true. This is relatively
3531 common for C++ programs that use exceptions. To reproduce,
3532 return NO_REGS and compile libstdc++. */
3533 if (GET_CODE (x
) == MEM
)
3536 /* This can happen when we take a BImode subreg of a DImode value,
3537 and that DImode value winds up in some non-GR register. */
3538 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
) && ! PR_REGNO_P (regno
))
3543 /* Since we have no offsettable memory addresses, we need a temporary
3544 to hold the address of the second word. */
3557 /* Emit text to declare externally defined variables and functions, because
3558 the Intel assembler does not support undefined externals. */
3561 ia64_asm_output_external (file
, decl
, name
)
3566 int save_referenced
;
3568 /* GNU as does not need anything here. */
3572 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3573 the linker when we do this, so we need to be careful not to do this for
3574 builtin functions which have no library equivalent. Unfortunately, we
3575 can't tell here whether or not a function will actually be called by
3576 expand_expr, so we pull in library functions even if we may not need
3578 if (! strcmp (name
, "__builtin_next_arg")
3579 || ! strcmp (name
, "alloca")
3580 || ! strcmp (name
, "__builtin_constant_p")
3581 || ! strcmp (name
, "__builtin_args_info"))
3584 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3586 save_referenced
= TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
));
3587 if (TREE_CODE (decl
) == FUNCTION_DECL
)
3589 fprintf (file
, "%s", TYPE_ASM_OP
);
3590 assemble_name (file
, name
);
3592 fprintf (file
, TYPE_OPERAND_FMT
, "function");
3595 ASM_GLOBALIZE_LABEL (file
, name
);
3596 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)) = save_referenced
;
3599 /* Parse the -mfixed-range= option string. */
3602 fix_range (const_str
)
3603 const char *const_str
;
3606 char *str
, *dash
, *comma
;
3608 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3609 REG2 are either register names or register numbers. The effect
3610 of this option is to mark the registers in the range from REG1 to
3611 REG2 as ``fixed'' so they won't be used by the compiler. This is
3612 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3614 i
= strlen (const_str
);
3615 str
= (char *) alloca (i
+ 1);
3616 memcpy (str
, const_str
, i
+ 1);
3620 dash
= strchr (str
, '-');
3623 warning ("value of -mfixed-range must have form REG1-REG2");
3628 comma
= strchr (dash
+ 1, ',');
3632 first
= decode_reg_name (str
);
3635 warning ("unknown register name: %s", str
);
3639 last
= decode_reg_name (dash
+ 1);
3642 warning ("unknown register name: %s", dash
+ 1);
3650 warning ("%s-%s is an empty range", str
, dash
+ 1);
3654 for (i
= first
; i
<= last
; ++i
)
3655 fixed_regs
[i
] = call_used_regs
[i
] = 1;
3665 /* Called to register all of our global variables with the garbage
3669 ia64_add_gc_roots ()
3671 ggc_add_rtx_root (&ia64_compare_op0
, 1);
3672 ggc_add_rtx_root (&ia64_compare_op1
, 1);
3676 ia64_init_machine_status (p
)
3680 (struct machine_function
*) xcalloc (1, sizeof (struct machine_function
));
3684 ia64_mark_machine_status (p
)
3687 struct machine_function
*machine
= p
->machine
;
3691 ggc_mark_rtx (machine
->ia64_eh_epilogue_sp
);
3692 ggc_mark_rtx (machine
->ia64_eh_epilogue_bsp
);
3693 ggc_mark_rtx (machine
->ia64_gp_save
);
3698 ia64_free_machine_status (p
)
3705 /* Handle TARGET_OPTIONS switches. */
3708 ia64_override_options ()
3710 if (TARGET_AUTO_PIC
)
3711 target_flags
|= MASK_CONST_GP
;
3713 if (TARGET_INLINE_DIV_LAT
&& TARGET_INLINE_DIV_THR
)
3715 warning ("cannot optimize division for both latency and throughput");
3716 target_flags
&= ~MASK_INLINE_DIV_THR
;
3719 if (ia64_fixed_range_string
)
3720 fix_range (ia64_fixed_range_string
);
3722 ia64_flag_schedule_insns2
= flag_schedule_insns_after_reload
;
3723 flag_schedule_insns_after_reload
= 0;
3725 ia64_section_threshold
= g_switch_set
? g_switch_value
: IA64_DEFAULT_GVALUE
;
3727 init_machine_status
= ia64_init_machine_status
;
3728 mark_machine_status
= ia64_mark_machine_status
;
3729 free_machine_status
= ia64_free_machine_status
;
3731 ia64_add_gc_roots ();
3734 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0
PARAMS((rtx
));
3735 static enum attr_itanium_class ia64_safe_itanium_class
PARAMS((rtx
));
3736 static enum attr_type ia64_safe_type
PARAMS((rtx
));
3738 static enum attr_itanium_requires_unit0
3739 ia64_safe_itanium_requires_unit0 (insn
)
3742 if (recog_memoized (insn
) >= 0)
3743 return get_attr_itanium_requires_unit0 (insn
);
3745 return ITANIUM_REQUIRES_UNIT0_NO
;
3748 static enum attr_itanium_class
3749 ia64_safe_itanium_class (insn
)
3752 if (recog_memoized (insn
) >= 0)
3753 return get_attr_itanium_class (insn
);
3755 return ITANIUM_CLASS_UNKNOWN
;
3758 static enum attr_type
3759 ia64_safe_type (insn
)
3762 if (recog_memoized (insn
) >= 0)
3763 return get_attr_type (insn
);
3765 return TYPE_UNKNOWN
;
3768 /* The following collection of routines emit instruction group stop bits as
3769 necessary to avoid dependencies. */
3771 /* Need to track some additional registers as far as serialization is
3772 concerned so we can properly handle br.call and br.ret. We could
3773 make these registers visible to gcc, but since these registers are
3774 never explicitly used in gcc generated code, it seems wasteful to
3775 do so (plus it would make the call and return patterns needlessly
3777 #define REG_GP (GR_REG (1))
3778 #define REG_RP (BR_REG (0))
3779 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
3780 /* This is used for volatile asms which may require a stop bit immediately
3781 before and after them. */
3782 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
3783 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
3784 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
3786 /* For each register, we keep track of how it has been written in the
3787 current instruction group.
3789 If a register is written unconditionally (no qualifying predicate),
3790 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
3792 If a register is written if its qualifying predicate P is true, we
3793 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
3794 may be written again by the complement of P (P^1) and when this happens,
3795 WRITE_COUNT gets set to 2.
3797 The result of this is that whenever an insn attempts to write a register
3798 whose WRITE_COUNT is two, we need to issue a insn group barrier first.
3800 If a predicate register is written by a floating-point insn, we set
3801 WRITTEN_BY_FP to true.
3803 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
3804 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
3806 struct reg_write_state
3808 unsigned int write_count
: 2;
3809 unsigned int first_pred
: 16;
3810 unsigned int written_by_fp
: 1;
3811 unsigned int written_by_and
: 1;
3812 unsigned int written_by_or
: 1;
3815 /* Cumulative info for the current instruction group. */
3816 struct reg_write_state rws_sum
[NUM_REGS
];
3817 /* Info for the current instruction. This gets copied to rws_sum after a
3818 stop bit is emitted. */
3819 struct reg_write_state rws_insn
[NUM_REGS
];
3821 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
3822 RTL for one instruction. */
3825 unsigned int is_write
: 1; /* Is register being written? */
3826 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
3827 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
3828 unsigned int is_and
: 1; /* Is register used as part of and.orcm? */
3829 unsigned int is_or
: 1; /* Is register used as part of or.andcm? */
3830 unsigned int is_sibcall
: 1; /* Is this a sibling or normal call? */
3833 static void rws_update
PARAMS ((struct reg_write_state
*, int,
3834 struct reg_flags
, int));
3835 static int rws_access_regno
PARAMS ((int, struct reg_flags
, int));
3836 static int rws_access_reg
PARAMS ((rtx
, struct reg_flags
, int));
3837 static void update_set_flags
PARAMS ((rtx
, struct reg_flags
*, int *, rtx
*));
3838 static int set_src_needs_barrier
PARAMS ((rtx
, struct reg_flags
, int, rtx
));
3839 static int rtx_needs_barrier
PARAMS ((rtx
, struct reg_flags
, int));
3840 static void init_insn_group_barriers
PARAMS ((void));
3841 static int group_barrier_needed_p
PARAMS ((rtx
));
3842 static int safe_group_barrier_needed_p
PARAMS ((rtx
));
3844 /* Update *RWS for REGNO, which is being written by the current instruction,
3845 with predicate PRED, and associated register flags in FLAGS. */
3848 rws_update (rws
, regno
, flags
, pred
)
3849 struct reg_write_state
*rws
;
3851 struct reg_flags flags
;
3854 rws
[regno
].write_count
+= pred
? 1 : 2;
3855 rws
[regno
].written_by_fp
|= flags
.is_fp
;
3856 /* ??? Not tracking and/or across differing predicates. */
3857 rws
[regno
].written_by_and
= flags
.is_and
;
3858 rws
[regno
].written_by_or
= flags
.is_or
;
3859 rws
[regno
].first_pred
= pred
;
3862 /* Handle an access to register REGNO of type FLAGS using predicate register
3863 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
3864 a dependency with an earlier instruction in the same group. */
3867 rws_access_regno (regno
, flags
, pred
)
3869 struct reg_flags flags
;
3872 int need_barrier
= 0;
3874 if (regno
>= NUM_REGS
)
3877 if (! PR_REGNO_P (regno
))
3878 flags
.is_and
= flags
.is_or
= 0;
3884 /* One insn writes same reg multiple times? */
3885 if (rws_insn
[regno
].write_count
> 0)
3888 /* Update info for current instruction. */
3889 rws_update (rws_insn
, regno
, flags
, pred
);
3890 write_count
= rws_sum
[regno
].write_count
;
3892 switch (write_count
)
3895 /* The register has not been written yet. */
3896 rws_update (rws_sum
, regno
, flags
, pred
);
3900 /* The register has been written via a predicate. If this is
3901 not a complementary predicate, then we need a barrier. */
3902 /* ??? This assumes that P and P+1 are always complementary
3903 predicates for P even. */
3904 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
3906 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
3908 else if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
3910 rws_update (rws_sum
, regno
, flags
, pred
);
3914 /* The register has been unconditionally written already. We
3916 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
3918 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
3922 rws_sum
[regno
].written_by_and
= flags
.is_and
;
3923 rws_sum
[regno
].written_by_or
= flags
.is_or
;
3932 if (flags
.is_branch
)
3934 /* Branches have several RAW exceptions that allow to avoid
3937 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== AR_PFS_REGNUM
)
3938 /* RAW dependencies on branch regs are permissible as long
3939 as the writer is a non-branch instruction. Since we
3940 never generate code that uses a branch register written
3941 by a branch instruction, handling this case is
3945 if (REGNO_REG_CLASS (regno
) == PR_REGS
3946 && ! rws_sum
[regno
].written_by_fp
)
3947 /* The predicates of a branch are available within the
3948 same insn group as long as the predicate was written by
3949 something other than a floating-point instruction. */
3953 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
3955 if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
3958 switch (rws_sum
[regno
].write_count
)
3961 /* The register has not been written yet. */
3965 /* The register has been written via a predicate. If this is
3966 not a complementary predicate, then we need a barrier. */
3967 /* ??? This assumes that P and P+1 are always complementary
3968 predicates for P even. */
3969 if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
3974 /* The register has been unconditionally written already. We
3984 return need_barrier
;
3988 rws_access_reg (reg
, flags
, pred
)
3990 struct reg_flags flags
;
3993 int regno
= REGNO (reg
);
3994 int n
= HARD_REGNO_NREGS (REGNO (reg
), GET_MODE (reg
));
3997 return rws_access_regno (regno
, flags
, pred
);
4000 int need_barrier
= 0;
4002 need_barrier
|= rws_access_regno (regno
+ n
, flags
, pred
);
4003 return need_barrier
;
4007 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4008 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4011 update_set_flags (x
, pflags
, ppred
, pcond
)
4013 struct reg_flags
*pflags
;
4017 rtx src
= SET_SRC (x
);
4021 switch (GET_CODE (src
))
4027 if (SET_DEST (x
) == pc_rtx
)
4028 /* X is a conditional branch. */
4032 int is_complemented
= 0;
4034 /* X is a conditional move. */
4035 rtx cond
= XEXP (src
, 0);
4036 if (GET_CODE (cond
) == EQ
)
4037 is_complemented
= 1;
4038 cond
= XEXP (cond
, 0);
4039 if (GET_CODE (cond
) != REG
4040 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
4043 if (XEXP (src
, 1) == SET_DEST (x
)
4044 || XEXP (src
, 2) == SET_DEST (x
))
4046 /* X is a conditional move that conditionally writes the
4049 /* We need another complement in this case. */
4050 if (XEXP (src
, 1) == SET_DEST (x
))
4051 is_complemented
= ! is_complemented
;
4053 *ppred
= REGNO (cond
);
4054 if (is_complemented
)
4058 /* ??? If this is a conditional write to the dest, then this
4059 instruction does not actually read one source. This probably
4060 doesn't matter, because that source is also the dest. */
4061 /* ??? Multiple writes to predicate registers are allowed
4062 if they are all AND type compares, or if they are all OR
4063 type compares. We do not generate such instructions
4066 /* ... fall through ... */
4069 if (GET_RTX_CLASS (GET_CODE (src
)) == '<'
4070 && GET_MODE_CLASS (GET_MODE (XEXP (src
, 0))) == MODE_FLOAT
)
4071 /* Set pflags->is_fp to 1 so that we know we're dealing
4072 with a floating point comparison when processing the
4073 destination of the SET. */
4076 /* Discover if this is a parallel comparison. We only handle
4077 and.orcm and or.andcm at present, since we must retain a
4078 strict inverse on the predicate pair. */
4079 else if (GET_CODE (src
) == AND
)
4081 else if (GET_CODE (src
) == IOR
)
4088 /* Subroutine of rtx_needs_barrier; this function determines whether the
4089 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4090 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4094 set_src_needs_barrier (x
, flags
, pred
, cond
)
4096 struct reg_flags flags
;
4100 int need_barrier
= 0;
4102 rtx src
= SET_SRC (x
);
4104 if (GET_CODE (src
) == CALL
)
4105 /* We don't need to worry about the result registers that
4106 get written by subroutine call. */
4107 return rtx_needs_barrier (src
, flags
, pred
);
4108 else if (SET_DEST (x
) == pc_rtx
)
4110 /* X is a conditional branch. */
4111 /* ??? This seems redundant, as the caller sets this bit for
4113 flags
.is_branch
= 1;
4114 return rtx_needs_barrier (src
, flags
, pred
);
4117 need_barrier
= rtx_needs_barrier (src
, flags
, pred
);
4119 /* This instruction unconditionally uses a predicate register. */
4121 need_barrier
|= rws_access_reg (cond
, flags
, 0);
4124 if (GET_CODE (dst
) == ZERO_EXTRACT
)
4126 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
4127 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
4128 dst
= XEXP (dst
, 0);
4130 return need_barrier
;
4133 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4134 Return 1 is this access creates a dependency with an earlier instruction
4135 in the same group. */
4138 rtx_needs_barrier (x
, flags
, pred
)
4140 struct reg_flags flags
;
4144 int is_complemented
= 0;
4145 int need_barrier
= 0;
4146 const char *format_ptr
;
4147 struct reg_flags new_flags
;
4155 switch (GET_CODE (x
))
4158 update_set_flags (x
, &new_flags
, &pred
, &cond
);
4159 need_barrier
= set_src_needs_barrier (x
, new_flags
, pred
, cond
);
4160 if (GET_CODE (SET_SRC (x
)) != CALL
)
4162 new_flags
.is_write
= 1;
4163 need_barrier
|= rtx_needs_barrier (SET_DEST (x
), new_flags
, pred
);
4168 new_flags
.is_write
= 0;
4169 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
4171 /* Avoid multiple register writes, in case this is a pattern with
4172 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4173 if (! flags
.is_sibcall
&& ! rws_insn
[REG_AR_CFM
].write_count
)
4175 new_flags
.is_write
= 1;
4176 need_barrier
|= rws_access_regno (REG_RP
, new_flags
, pred
);
4177 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, new_flags
, pred
);
4178 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4183 /* X is a predicated instruction. */
4185 cond
= COND_EXEC_TEST (x
);
4188 need_barrier
= rtx_needs_barrier (cond
, flags
, 0);
4190 if (GET_CODE (cond
) == EQ
)
4191 is_complemented
= 1;
4192 cond
= XEXP (cond
, 0);
4193 if (GET_CODE (cond
) != REG
4194 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
4196 pred
= REGNO (cond
);
4197 if (is_complemented
)
4200 need_barrier
|= rtx_needs_barrier (COND_EXEC_CODE (x
), flags
, pred
);
4201 return need_barrier
;
4205 /* Clobber & use are for earlier compiler-phases only. */
4210 /* We always emit stop bits for traditional asms. We emit stop bits
4211 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4212 if (GET_CODE (x
) != ASM_OPERANDS
4213 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
4215 /* Avoid writing the register multiple times if we have multiple
4216 asm outputs. This avoids an abort in rws_access_reg. */
4217 if (! rws_insn
[REG_VOLATILE
].write_count
)
4219 new_flags
.is_write
= 1;
4220 rws_access_regno (REG_VOLATILE
, new_flags
, pred
);
4225 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4226 We can not just fall through here since then we would be confused
4227 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4228 traditional asms unlike their normal usage. */
4230 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
4231 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
4236 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
4238 rtx pat
= XVECEXP (x
, 0, i
);
4239 if (GET_CODE (pat
) == SET
)
4241 update_set_flags (pat
, &new_flags
, &pred
, &cond
);
4242 need_barrier
|= set_src_needs_barrier (pat
, new_flags
, pred
, cond
);
4244 else if (GET_CODE (pat
) == USE
4245 || GET_CODE (pat
) == CALL
4246 || GET_CODE (pat
) == ASM_OPERANDS
)
4247 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
4248 else if (GET_CODE (pat
) != CLOBBER
&& GET_CODE (pat
) != RETURN
)
4251 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
4253 rtx pat
= XVECEXP (x
, 0, i
);
4254 if (GET_CODE (pat
) == SET
)
4256 if (GET_CODE (SET_SRC (pat
)) != CALL
)
4258 new_flags
.is_write
= 1;
4259 need_barrier
|= rtx_needs_barrier (SET_DEST (pat
), new_flags
,
4263 else if (GET_CODE (pat
) == CLOBBER
)
4264 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
4272 if (REGNO (x
) == AR_UNAT_REGNUM
)
4274 for (i
= 0; i
< 64; ++i
)
4275 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ i
, flags
, pred
);
4278 need_barrier
= rws_access_reg (x
, flags
, pred
);
4282 /* Find the regs used in memory address computation. */
4283 new_flags
.is_write
= 0;
4284 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
4287 case CONST_INT
: case CONST_DOUBLE
:
4288 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
4291 /* Operators with side-effects. */
4292 case POST_INC
: case POST_DEC
:
4293 if (GET_CODE (XEXP (x
, 0)) != REG
)
4296 new_flags
.is_write
= 0;
4297 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4298 new_flags
.is_write
= 1;
4299 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4303 if (GET_CODE (XEXP (x
, 0)) != REG
)
4306 new_flags
.is_write
= 0;
4307 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4308 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
4309 new_flags
.is_write
= 1;
4310 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4313 /* Handle common unary and binary ops for efficiency. */
4314 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
4315 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
4316 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
4317 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
4318 case NE
: case EQ
: case GE
: case GT
: case LE
:
4319 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
4320 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
4321 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
4324 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
4325 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
4326 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
4327 case SQRT
: case FFS
:
4328 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
4332 switch (XINT (x
, 1))
4334 case 1: /* st8.spill */
4335 case 2: /* ld8.fill */
4337 HOST_WIDE_INT offset
= INTVAL (XVECEXP (x
, 0, 1));
4338 HOST_WIDE_INT bit
= (offset
>> 3) & 63;
4340 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4341 new_flags
.is_write
= (XINT (x
, 1) == 1);
4342 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ bit
,
4347 case 3: /* stf.spill */
4348 case 4: /* ldf.spill */
4349 case 8: /* popcnt */
4350 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4353 case 7: /* pred_rel_mutex */
4354 case 9: /* pic call */
4356 case 19: /* fetchadd_acq */
4357 case 20: /* mov = ar.bsp */
4358 case 21: /* flushrs */
4359 case 22: /* bundle selector */
4360 case 23: /* cycle display */
4363 case 5: /* recip_approx */
4364 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4365 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
4368 case 13: /* cmpxchg_acq */
4369 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
4370 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 2), flags
, pred
);
4378 case UNSPEC_VOLATILE
:
4379 switch (XINT (x
, 1))
4382 /* Alloc must always be the first instruction. Currently, we
4383 only emit it at the function start, so we don't need to worry
4384 about emitting a stop bit before it. */
4385 need_barrier
= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
4387 new_flags
.is_write
= 1;
4388 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4389 return need_barrier
;
4391 case 1: /* blockage */
4392 case 2: /* insn group barrier */
4395 case 5: /* set_bsp */
4399 case 7: /* pred.rel.mutex */
4400 case 8: /* safe_across_calls all */
4401 case 9: /* safe_across_calls normal */
4410 new_flags
.is_write
= 0;
4411 need_barrier
= rws_access_regno (REG_RP
, flags
, pred
);
4412 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
4414 new_flags
.is_write
= 1;
4415 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
4416 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4420 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
4421 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
4422 switch (format_ptr
[i
])
4424 case '0': /* unused field */
4425 case 'i': /* integer */
4426 case 'n': /* note */
4427 case 'w': /* wide integer */
4428 case 's': /* pointer to string */
4429 case 'S': /* optional pointer to string */
4433 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
4438 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
4439 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
4448 return need_barrier
;
4451 /* Clear out the state for group_barrier_needed_p at the start of a
4452 sequence of insns. */
4455 init_insn_group_barriers ()
4457 memset (rws_sum
, 0, sizeof (rws_sum
));
4460 /* Cumulative info for the current instruction group. */
4461 struct reg_write_state rws_sum
[NUM_REGS
];
4463 /* Given the current state, recorded by previous calls to this function,
4464 determine whether a group barrier (a stop bit) is necessary before INSN.
4465 Return nonzero if so. */
4468 group_barrier_needed_p (insn
)
4472 int need_barrier
= 0;
4473 struct reg_flags flags
;
4475 memset (&flags
, 0, sizeof (flags
));
4476 switch (GET_CODE (insn
))
4482 /* A barrier doesn't imply an instruction group boundary. */
4486 memset (rws_insn
, 0, sizeof (rws_insn
));
4490 flags
.is_branch
= 1;
4491 flags
.is_sibcall
= SIBLING_CALL_P (insn
);
4492 memset (rws_insn
, 0, sizeof (rws_insn
));
4493 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
4497 flags
.is_branch
= 1;
4501 if (GET_CODE (PATTERN (insn
)) == USE
4502 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
4503 /* Don't care about USE and CLOBBER "insns"---those are used to
4504 indicate to the optimizer that it shouldn't get rid of
4505 certain operations. */
4508 pat
= PATTERN (insn
);
4510 /* Ug. Hack hacks hacked elsewhere. */
4511 switch (recog_memoized (insn
))
4513 /* We play dependency tricks with the epilogue in order
4514 to get proper schedules. Undo this for dv analysis. */
4515 case CODE_FOR_epilogue_deallocate_stack
:
4516 pat
= XVECEXP (pat
, 0, 0);
4519 /* The pattern we use for br.cloop confuses the code above.
4520 The second element of the vector is representative. */
4521 case CODE_FOR_doloop_end_internal
:
4522 pat
= XVECEXP (pat
, 0, 1);
4525 /* Doesn't generate code. */
4526 case CODE_FOR_pred_rel_mutex
:
4533 memset (rws_insn
, 0, sizeof (rws_insn
));
4534 need_barrier
= rtx_needs_barrier (pat
, flags
, 0);
4536 /* Check to see if the previous instruction was a volatile
4539 need_barrier
= rws_access_regno (REG_VOLATILE
, flags
, 0);
4546 return need_barrier
;
4549 /* Like group_barrier_needed_p, but do not clobber the current state. */
4552 safe_group_barrier_needed_p (insn
)
4555 struct reg_write_state rws_saved
[NUM_REGS
];
4557 memcpy (rws_saved
, rws_sum
, NUM_REGS
* sizeof *rws_saved
);
4558 t
= group_barrier_needed_p (insn
);
4559 memcpy (rws_sum
, rws_saved
, NUM_REGS
* sizeof *rws_saved
);
4563 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
4564 as necessary to eliminate dependendencies. This function assumes that
4565 a final instruction scheduling pass has been run which has already
4566 inserted most of the necessary stop bits. This function only inserts
4567 new ones at basic block boundaries, since these are invisible to the
4571 emit_insn_group_barriers (dump
, insns
)
4577 int insns_since_last_label
= 0;
4579 init_insn_group_barriers ();
4581 for (insn
= insns
; insn
; insn
= NEXT_INSN (insn
))
4583 if (GET_CODE (insn
) == CODE_LABEL
)
4585 if (insns_since_last_label
)
4587 insns_since_last_label
= 0;
4589 else if (GET_CODE (insn
) == NOTE
4590 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
4592 if (insns_since_last_label
)
4594 insns_since_last_label
= 0;
4596 else if (GET_CODE (insn
) == INSN
4597 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
4598 && XINT (PATTERN (insn
), 1) == 2)
4600 init_insn_group_barriers ();
4603 else if (INSN_P (insn
))
4605 insns_since_last_label
= 1;
4607 if (group_barrier_needed_p (insn
))
4612 fprintf (dump
, "Emitting stop before label %d\n",
4613 INSN_UID (last_label
));
4614 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label
);
4617 init_insn_group_barriers ();
4625 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4626 This function has to emit all necessary group barriers. */
4629 emit_all_insn_group_barriers (dump
, insns
)
4630 FILE *dump ATTRIBUTE_UNUSED
;
4635 init_insn_group_barriers ();
4637 for (insn
= insns
; insn
; insn
= NEXT_INSN (insn
))
4639 if (GET_CODE (insn
) == INSN
4640 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
4641 && XINT (PATTERN (insn
), 1) == 2)
4642 init_insn_group_barriers ();
4643 else if (INSN_P (insn
))
4645 if (group_barrier_needed_p (insn
))
4647 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
4648 init_insn_group_barriers ();
4649 group_barrier_needed_p (insn
);
4655 static int errata_find_address_regs
PARAMS ((rtx
*, void *));
4656 static void errata_emit_nops
PARAMS ((rtx
));
4657 static void fixup_errata
PARAMS ((void));
4659 /* This structure is used to track some details about the previous insns
4660 groups so we can determine if it may be necessary to insert NOPs to
4661 workaround hardware errata. */
4664 HARD_REG_SET p_reg_set
;
4665 HARD_REG_SET gr_reg_conditionally_set
;
4668 /* Index into the last_group array. */
4669 static int group_idx
;
4671 /* Called through for_each_rtx; determines if a hard register that was
4672 conditionally set in the previous group is used as an address register.
4673 It ensures that for_each_rtx returns 1 in that case. */
4675 errata_find_address_regs (xp
, data
)
4677 void *data ATTRIBUTE_UNUSED
;
4680 if (GET_CODE (x
) != MEM
)
4683 if (GET_CODE (x
) == POST_MODIFY
)
4685 if (GET_CODE (x
) == REG
)
4687 struct group
*prev_group
= last_group
+ (group_idx
^ 1);
4688 if (TEST_HARD_REG_BIT (prev_group
->gr_reg_conditionally_set
,
4696 /* Called for each insn; this function keeps track of the state in
4697 last_group and emits additional NOPs if necessary to work around
4698 an Itanium A/B step erratum. */
4700 errata_emit_nops (insn
)
4703 struct group
*this_group
= last_group
+ group_idx
;
4704 struct group
*prev_group
= last_group
+ (group_idx
^ 1);
4705 rtx pat
= PATTERN (insn
);
4706 rtx cond
= GET_CODE (pat
) == COND_EXEC
? COND_EXEC_TEST (pat
) : 0;
4707 rtx real_pat
= cond
? COND_EXEC_CODE (pat
) : pat
;
4708 enum attr_type type
;
4711 if (GET_CODE (real_pat
) == USE
4712 || GET_CODE (real_pat
) == CLOBBER
4713 || GET_CODE (real_pat
) == ASM_INPUT
4714 || GET_CODE (real_pat
) == ADDR_VEC
4715 || GET_CODE (real_pat
) == ADDR_DIFF_VEC
4716 || asm_noperands (PATTERN (insn
)) >= 0)
4719 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
4722 if (GET_CODE (set
) == PARALLEL
)
4725 set
= XVECEXP (real_pat
, 0, 0);
4726 for (i
= 1; i
< XVECLEN (real_pat
, 0); i
++)
4727 if (GET_CODE (XVECEXP (real_pat
, 0, i
)) != USE
4728 && GET_CODE (XVECEXP (real_pat
, 0, i
)) != CLOBBER
)
4735 if (set
&& GET_CODE (set
) != SET
)
4738 type
= get_attr_type (insn
);
4741 && set
&& REG_P (SET_DEST (set
)) && PR_REGNO_P (REGNO (SET_DEST (set
))))
4742 SET_HARD_REG_BIT (this_group
->p_reg_set
, REGNO (SET_DEST (set
)));
4744 if ((type
== TYPE_M
|| type
== TYPE_A
) && cond
&& set
4745 && REG_P (SET_DEST (set
))
4746 && GET_CODE (SET_SRC (set
)) != PLUS
4747 && GET_CODE (SET_SRC (set
)) != MINUS
4748 && (GET_CODE (SET_SRC (set
)) != ASHIFT
4749 || !shladd_operand (XEXP (SET_SRC (set
), 1), VOIDmode
))
4750 && (GET_CODE (SET_SRC (set
)) != MEM
4751 || GET_CODE (XEXP (SET_SRC (set
), 0)) != POST_MODIFY
)
4752 && GENERAL_REGNO_P (REGNO (SET_DEST (set
))))
4754 if (GET_RTX_CLASS (GET_CODE (cond
)) != '<'
4755 || ! REG_P (XEXP (cond
, 0)))
4758 if (TEST_HARD_REG_BIT (prev_group
->p_reg_set
, REGNO (XEXP (cond
, 0))))
4759 SET_HARD_REG_BIT (this_group
->gr_reg_conditionally_set
, REGNO (SET_DEST (set
)));
4761 if (for_each_rtx (&real_pat
, errata_find_address_regs
, NULL
))
4763 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
4764 emit_insn_before (gen_nop (), insn
);
4765 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
4767 memset (last_group
, 0, sizeof last_group
);
4771 /* Emit extra nops if they are required to work around hardware errata. */
4778 if (! TARGET_B_STEP
)
4782 memset (last_group
, 0, sizeof last_group
);
4784 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
4789 if (ia64_safe_type (insn
) == TYPE_S
)
4792 memset (last_group
+ group_idx
, 0, sizeof last_group
[group_idx
]);
4795 errata_emit_nops (insn
);
4799 /* Instruction scheduling support. */
4800 /* Describe one bundle. */
4804 /* Zero if there's no possibility of a stop in this bundle other than
4805 at the end, otherwise the position of the optional stop bit. */
4807 /* The types of the three slots. */
4808 enum attr_type t
[3];
4809 /* The pseudo op to be emitted into the assembler output. */
4813 #define NR_BUNDLES 10
4815 /* A list of all available bundles. */
4817 static const struct bundle bundle
[NR_BUNDLES
] =
4819 { 2, { TYPE_M
, TYPE_I
, TYPE_I
}, ".mii" },
4820 { 1, { TYPE_M
, TYPE_M
, TYPE_I
}, ".mmi" },
4821 { 0, { TYPE_M
, TYPE_F
, TYPE_I
}, ".mfi" },
4822 { 0, { TYPE_M
, TYPE_M
, TYPE_F
}, ".mmf" },
4823 #if NR_BUNDLES == 10
4824 { 0, { TYPE_B
, TYPE_B
, TYPE_B
}, ".bbb" },
4825 { 0, { TYPE_M
, TYPE_B
, TYPE_B
}, ".mbb" },
4827 { 0, { TYPE_M
, TYPE_I
, TYPE_B
}, ".mib" },
4828 { 0, { TYPE_M
, TYPE_M
, TYPE_B
}, ".mmb" },
4829 { 0, { TYPE_M
, TYPE_F
, TYPE_B
}, ".mfb" },
4830 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
4831 it matches an L type insn. Otherwise we'll try to generate L type
4833 { 0, { TYPE_M
, TYPE_L
, TYPE_X
}, ".mlx" }
4836 /* Describe a packet of instructions. Packets consist of two bundles that
4837 are visible to the hardware in one scheduling window. */
4841 const struct bundle
*t1
, *t2
;
4842 /* Precomputed value of the first split issue in this packet if a cycle
4843 starts at its beginning. */
4845 /* For convenience, the insn types are replicated here so we don't have
4846 to go through T1 and T2 all the time. */
4847 enum attr_type t
[6];
4850 /* An array containing all possible packets. */
4851 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
4852 static struct ia64_packet packets
[NR_PACKETS
];
4854 /* Map attr_type to a string with the name. */
4856 static const char *type_names
[] =
4858 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
4861 /* Nonzero if we should insert stop bits into the schedule. */
4862 int ia64_final_schedule
= 0;
4864 static int itanium_split_issue
PARAMS ((const struct ia64_packet
*, int));
4865 static rtx ia64_single_set
PARAMS ((rtx
));
4866 static int insn_matches_slot
PARAMS ((const struct ia64_packet
*, enum attr_type
, int, rtx
));
4867 static void ia64_emit_insn_before
PARAMS ((rtx
, rtx
));
4868 static void maybe_rotate
PARAMS ((FILE *));
4869 static void finish_last_head
PARAMS ((FILE *, int));
4870 static void rotate_one_bundle
PARAMS ((FILE *));
4871 static void rotate_two_bundles
PARAMS ((FILE *));
4872 static void cycle_end_fill_slots
PARAMS ((FILE *));
4873 static int packet_matches_p
PARAMS ((const struct ia64_packet
*, int, int *));
4874 static int get_split
PARAMS ((const struct ia64_packet
*, int));
4875 static int find_best_insn
PARAMS ((rtx
*, enum attr_type
*, int,
4876 const struct ia64_packet
*, int));
4877 static void find_best_packet
PARAMS ((int *, const struct ia64_packet
**,
4878 rtx
*, enum attr_type
*, int));
4879 static int itanium_reorder
PARAMS ((FILE *, rtx
*, rtx
*, int));
4880 static void dump_current_packet
PARAMS ((FILE *));
4881 static void schedule_stop
PARAMS ((FILE *));
4882 static rtx gen_nop_type
PARAMS ((enum attr_type
));
4883 static void ia64_emit_nops
PARAMS ((void));
4885 /* Map a bundle number to its pseudo-op. */
4891 return bundle
[b
].name
;
4894 /* Compute the slot which will cause a split issue in packet P if the
4895 current cycle begins at slot BEGIN. */
4898 itanium_split_issue (p
, begin
)
4899 const struct ia64_packet
*p
;
4902 int type_count
[TYPE_S
];
4908 /* Always split before and after MMF. */
4909 if (p
->t
[0] == TYPE_M
&& p
->t
[1] == TYPE_M
&& p
->t
[2] == TYPE_F
)
4911 if (p
->t
[3] == TYPE_M
&& p
->t
[4] == TYPE_M
&& p
->t
[5] == TYPE_F
)
4913 /* Always split after MBB and BBB. */
4914 if (p
->t
[1] == TYPE_B
)
4916 /* Split after first bundle in MIB BBB combination. */
4917 if (p
->t
[2] == TYPE_B
&& p
->t
[3] == TYPE_B
)
4921 memset (type_count
, 0, sizeof type_count
);
4922 for (i
= begin
; i
< split
; i
++)
4924 enum attr_type t0
= p
->t
[i
];
4925 /* An MLX bundle reserves the same units as an MFI bundle. */
4926 enum attr_type t
= (t0
== TYPE_L
? TYPE_F
4927 : t0
== TYPE_X
? TYPE_I
4929 int max
= (t
== TYPE_B
? 3 : t
== TYPE_F
? 1 : 2);
4930 if (type_count
[t
] == max
)
4937 /* Return the maximum number of instructions a cpu can issue. */
4945 /* Helper function - like single_set, but look inside COND_EXEC. */
4948 ia64_single_set (insn
)
4951 rtx x
= PATTERN (insn
);
4952 if (GET_CODE (x
) == COND_EXEC
)
4953 x
= COND_EXEC_CODE (x
);
4954 if (GET_CODE (x
) == SET
)
4956 return single_set_2 (insn
, x
);
4959 /* Adjust the cost of a scheduling dependency. Return the new cost of
4960 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4963 ia64_adjust_cost (insn
, link
, dep_insn
, cost
)
4964 rtx insn
, link
, dep_insn
;
4967 enum attr_type dep_type
;
4968 enum attr_itanium_class dep_class
;
4969 enum attr_itanium_class insn_class
;
4970 rtx dep_set
, set
, src
, addr
;
4972 if (GET_CODE (PATTERN (insn
)) == CLOBBER
4973 || GET_CODE (PATTERN (insn
)) == USE
4974 || GET_CODE (PATTERN (dep_insn
)) == CLOBBER
4975 || GET_CODE (PATTERN (dep_insn
)) == USE
4976 /* @@@ Not accurate for indirect calls. */
4977 || GET_CODE (insn
) == CALL_INSN
4978 || ia64_safe_type (insn
) == TYPE_S
)
4981 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
4982 || REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
4985 dep_type
= ia64_safe_type (dep_insn
);
4986 dep_class
= ia64_safe_itanium_class (dep_insn
);
4987 insn_class
= ia64_safe_itanium_class (insn
);
4989 /* Compares that feed a conditional branch can execute in the same
4991 dep_set
= ia64_single_set (dep_insn
);
4992 set
= ia64_single_set (insn
);
4994 if (dep_type
!= TYPE_F
4996 && GET_CODE (SET_DEST (dep_set
)) == REG
4997 && PR_REG (REGNO (SET_DEST (dep_set
)))
4998 && GET_CODE (insn
) == JUMP_INSN
)
5001 if (dep_set
&& GET_CODE (SET_DEST (dep_set
)) == MEM
)
5003 /* ??? Can't find any information in the documenation about whether
5007 splits issue. Assume it doesn't. */
5011 src
= set
? SET_SRC (set
) : 0;
5013 if (set
&& GET_CODE (SET_DEST (set
)) == MEM
)
5014 addr
= XEXP (SET_DEST (set
), 0);
5015 else if (set
&& GET_CODE (src
) == MEM
)
5016 addr
= XEXP (src
, 0);
5017 else if (set
&& GET_CODE (src
) == ZERO_EXTEND
5018 && GET_CODE (XEXP (src
, 0)) == MEM
)
5019 addr
= XEXP (XEXP (src
, 0), 0);
5020 else if (set
&& GET_CODE (src
) == UNSPEC
5021 && XVECLEN (XEXP (src
, 0), 0) > 0
5022 && GET_CODE (XVECEXP (src
, 0, 0)) == MEM
)
5023 addr
= XEXP (XVECEXP (src
, 0, 0), 0);
5024 if (addr
&& GET_CODE (addr
) == POST_MODIFY
)
5025 addr
= XEXP (addr
, 0);
5027 set
= ia64_single_set (dep_insn
);
5029 if ((dep_class
== ITANIUM_CLASS_IALU
5030 || dep_class
== ITANIUM_CLASS_ILOG
5031 || dep_class
== ITANIUM_CLASS_LD
)
5032 && (insn_class
== ITANIUM_CLASS_LD
5033 || insn_class
== ITANIUM_CLASS_ST
))
5035 if (! addr
|| ! set
)
5037 /* This isn't completely correct - an IALU that feeds an address has
5038 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5039 otherwise. Unfortunately there's no good way to describe this. */
5040 if (reg_overlap_mentioned_p (SET_DEST (set
), addr
))
5043 if ((dep_class
== ITANIUM_CLASS_IALU
5044 || dep_class
== ITANIUM_CLASS_ILOG
5045 || dep_class
== ITANIUM_CLASS_LD
)
5046 && (insn_class
== ITANIUM_CLASS_MMMUL
5047 || insn_class
== ITANIUM_CLASS_MMSHF
5048 || insn_class
== ITANIUM_CLASS_MMSHFI
))
5050 if (dep_class
== ITANIUM_CLASS_FMAC
5051 && (insn_class
== ITANIUM_CLASS_FMISC
5052 || insn_class
== ITANIUM_CLASS_FCVTFX
5053 || insn_class
== ITANIUM_CLASS_XMPY
))
5055 if ((dep_class
== ITANIUM_CLASS_FMAC
5056 || dep_class
== ITANIUM_CLASS_FMISC
5057 || dep_class
== ITANIUM_CLASS_FCVTFX
5058 || dep_class
== ITANIUM_CLASS_XMPY
)
5059 && insn_class
== ITANIUM_CLASS_STF
)
5061 if ((dep_class
== ITANIUM_CLASS_MMMUL
5062 || dep_class
== ITANIUM_CLASS_MMSHF
5063 || dep_class
== ITANIUM_CLASS_MMSHFI
)
5064 && (insn_class
== ITANIUM_CLASS_LD
5065 || insn_class
== ITANIUM_CLASS_ST
5066 || insn_class
== ITANIUM_CLASS_IALU
5067 || insn_class
== ITANIUM_CLASS_ILOG
5068 || insn_class
== ITANIUM_CLASS_ISHF
))
5074 /* Describe the current state of the Itanium pipeline. */
5077 /* The first slot that is used in the current cycle. */
5079 /* The next slot to fill. */
5081 /* The packet we have selected for the current issue window. */
5082 const struct ia64_packet
*packet
;
5083 /* The position of the split issue that occurs due to issue width
5084 limitations (6 if there's no split issue). */
5086 /* Record data about the insns scheduled so far in the same issue
5087 window. The elements up to but not including FIRST_SLOT belong
5088 to the previous cycle, the ones starting with FIRST_SLOT belong
5089 to the current cycle. */
5090 enum attr_type types
[6];
5093 /* Nonzero if we decided to schedule a stop bit. */
5097 /* Temporary arrays; they have enough elements to hold all insns that
5098 can be ready at the same time while scheduling of the current block.
5099 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5100 static rtx
*sched_ready
;
5101 static enum attr_type
*sched_types
;
5103 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5107 insn_matches_slot (p
, itype
, slot
, insn
)
5108 const struct ia64_packet
*p
;
5109 enum attr_type itype
;
5113 enum attr_itanium_requires_unit0 u0
;
5114 enum attr_type stype
= p
->t
[slot
];
5118 u0
= ia64_safe_itanium_requires_unit0 (insn
);
5119 if (u0
== ITANIUM_REQUIRES_UNIT0_YES
)
5122 for (i
= sched_data
.first_slot
; i
< slot
; i
++)
5123 if (p
->t
[i
] == stype
)
5126 if (GET_CODE (insn
) == CALL_INSN
)
5128 /* Reject calls in multiway branch packets. We want to limit
5129 the number of multiway branches we generate (since the branch
5130 predictor is limited), and this seems to work fairly well.
5131 (If we didn't do this, we'd have to add another test here to
5132 force calls into the third slot of the bundle.) */
5135 if (p
->t
[1] == TYPE_B
)
5140 if (p
->t
[4] == TYPE_B
)
5148 if (itype
== TYPE_A
)
5149 return stype
== TYPE_M
|| stype
== TYPE_I
;
5153 /* Like emit_insn_before, but skip cycle_display insns. This makes the
5154 assembly output a bit prettier. */
5157 ia64_emit_insn_before (insn
, before
)
5160 rtx prev
= PREV_INSN (before
);
5161 if (prev
&& GET_CODE (prev
) == INSN
5162 && GET_CODE (PATTERN (prev
)) == UNSPEC
5163 && XINT (PATTERN (prev
), 1) == 23)
5165 emit_insn_before (insn
, before
);
5169 /* Generate a nop insn of the given type. Note we never generate L type
5179 return gen_nop_m ();
5181 return gen_nop_i ();
5183 return gen_nop_b ();
5185 return gen_nop_f ();
5187 return gen_nop_x ();
5194 /* When rotating a bundle out of the issue window, insert a bundle selector
5195 insn in front of it. DUMP is the scheduling dump file or NULL. START
5196 is either 0 or 3, depending on whether we want to emit a bundle selector
5197 for the first bundle or the second bundle in the current issue window.
5199 The selector insns are emitted this late because the selected packet can
5200 be changed until parts of it get rotated out. */
5203 finish_last_head (dump
, start
)
5207 const struct ia64_packet
*p
= sched_data
.packet
;
5208 const struct bundle
*b
= start
== 0 ? p
->t1
: p
->t2
;
5209 int bundle_type
= b
- bundle
;
5213 if (! ia64_final_schedule
)
5216 for (i
= start
; sched_data
.insns
[i
] == 0; i
++)
5219 insn
= sched_data
.insns
[i
];
5222 fprintf (dump
, "// Emitting template before %d: %s\n",
5223 INSN_UID (insn
), b
->name
);
5225 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type
)), insn
);
5228 /* We can't schedule more insns this cycle. Fix up the scheduling state
5229 and advance FIRST_SLOT and CUR.
5230 We have to distribute the insns that are currently found between
5231 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5232 far, they are stored successively in the fields starting at FIRST_SLOT;
5233 now they must be moved to the correct slots.
5234 DUMP is the current scheduling dump file, or NULL. */
5237 cycle_end_fill_slots (dump
)
5240 const struct ia64_packet
*packet
= sched_data
.packet
;
5242 enum attr_type tmp_types
[6];
5245 memcpy (tmp_types
, sched_data
.types
, 6 * sizeof (enum attr_type
));
5246 memcpy (tmp_insns
, sched_data
.insns
, 6 * sizeof (rtx
));
5248 for (i
= slot
= sched_data
.first_slot
; i
< sched_data
.cur
; i
++)
5250 enum attr_type t
= tmp_types
[i
];
5251 if (t
!= ia64_safe_type (tmp_insns
[i
]))
5253 while (! insn_matches_slot (packet
, t
, slot
, tmp_insns
[i
]))
5255 if (slot
> sched_data
.split
)
5258 fprintf (dump
, "// Packet needs %s, have %s\n", type_names
[packet
->t
[slot
]],
5260 sched_data
.types
[slot
] = packet
->t
[slot
];
5261 sched_data
.insns
[slot
] = 0;
5262 sched_data
.stopbit
[slot
] = 0;
5265 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5266 actual slot type later. */
5267 sched_data
.types
[slot
] = packet
->t
[slot
];
5268 sched_data
.insns
[slot
] = tmp_insns
[i
];
5269 sched_data
.stopbit
[slot
] = 0;
5273 /* This isn't right - there's no need to pad out until the forced split;
5274 the CPU will automatically split if an insn isn't ready. */
5276 while (slot
< sched_data
.split
)
5278 sched_data
.types
[slot
] = packet
->t
[slot
];
5279 sched_data
.insns
[slot
] = 0;
5280 sched_data
.stopbit
[slot
] = 0;
5285 sched_data
.first_slot
= sched_data
.cur
= slot
;
5288 /* Bundle rotations, as described in the Itanium optimization manual.
5289 We can rotate either one or both bundles out of the issue window.
5290 DUMP is the current scheduling dump file, or NULL. */
5293 rotate_one_bundle (dump
)
5297 fprintf (dump
, "// Rotating one bundle.\n");
5299 finish_last_head (dump
, 0);
5300 if (sched_data
.cur
> 3)
5302 sched_data
.cur
-= 3;
5303 sched_data
.first_slot
-= 3;
5304 memmove (sched_data
.types
,
5305 sched_data
.types
+ 3,
5306 sched_data
.cur
* sizeof *sched_data
.types
);
5307 memmove (sched_data
.stopbit
,
5308 sched_data
.stopbit
+ 3,
5309 sched_data
.cur
* sizeof *sched_data
.stopbit
);
5310 memmove (sched_data
.insns
,
5311 sched_data
.insns
+ 3,
5312 sched_data
.cur
* sizeof *sched_data
.insns
);
5317 sched_data
.first_slot
= 0;
5322 rotate_two_bundles (dump
)
5326 fprintf (dump
, "// Rotating two bundles.\n");
5328 if (sched_data
.cur
== 0)
5331 finish_last_head (dump
, 0);
5332 if (sched_data
.cur
> 3)
5333 finish_last_head (dump
, 3);
5335 sched_data
.first_slot
= 0;
5338 /* We're beginning a new block. Initialize data structures as necessary. */
5341 ia64_sched_init (dump
, sched_verbose
, max_ready
)
5342 FILE *dump ATTRIBUTE_UNUSED
;
5343 int sched_verbose ATTRIBUTE_UNUSED
;
5346 static int initialized
= 0;
5354 for (i
= b1
= 0; b1
< NR_BUNDLES
; b1
++)
5356 const struct bundle
*t1
= bundle
+ b1
;
5357 for (b2
= 0; b2
< NR_BUNDLES
; b2
++, i
++)
5359 const struct bundle
*t2
= bundle
+ b2
;
5365 for (i
= 0; i
< NR_PACKETS
; i
++)
5368 for (j
= 0; j
< 3; j
++)
5369 packets
[i
].t
[j
] = packets
[i
].t1
->t
[j
];
5370 for (j
= 0; j
< 3; j
++)
5371 packets
[i
].t
[j
+ 3] = packets
[i
].t2
->t
[j
];
5372 packets
[i
].first_split
= itanium_split_issue (packets
+ i
, 0);
5377 init_insn_group_barriers ();
5379 memset (&sched_data
, 0, sizeof sched_data
);
5380 sched_types
= (enum attr_type
*) xmalloc (max_ready
5381 * sizeof (enum attr_type
));
5382 sched_ready
= (rtx
*) xmalloc (max_ready
* sizeof (rtx
));
5385 /* See if the packet P can match the insns we have already scheduled. Return
5386 nonzero if so. In *PSLOT, we store the first slot that is available for
5387 more instructions if we choose this packet.
5388 SPLIT holds the last slot we can use, there's a split issue after it so
5389 scheduling beyond it would cause us to use more than one cycle. */
5392 packet_matches_p (p
, split
, pslot
)
5393 const struct ia64_packet
*p
;
5397 int filled
= sched_data
.cur
;
5398 int first
= sched_data
.first_slot
;
5401 /* First, check if the first of the two bundles must be a specific one (due
5403 if (first
> 0 && sched_data
.stopbit
[0] && p
->t1
->possible_stop
!= 1)
5405 if (first
> 1 && sched_data
.stopbit
[1] && p
->t1
->possible_stop
!= 2)
5408 for (i
= 0; i
< first
; i
++)
5409 if (! insn_matches_slot (p
, sched_data
.types
[i
], i
,
5410 sched_data
.insns
[i
]))
5412 for (i
= slot
= first
; i
< filled
; i
++)
5414 while (slot
< split
)
5416 if (insn_matches_slot (p
, sched_data
.types
[i
], slot
,
5417 sched_data
.insns
[i
]))
5431 /* A frontend for itanium_split_issue. For a packet P and a slot
5432 number FIRST that describes the start of the current clock cycle,
5433 return the slot number of the first split issue. This function
5434 uses the cached number found in P if possible. */
5437 get_split (p
, first
)
5438 const struct ia64_packet
*p
;
5442 return p
->first_split
;
5443 return itanium_split_issue (p
, first
);
5446 /* Given N_READY insns in the array READY, whose types are found in the
5447 corresponding array TYPES, return the insn that is best suited to be
5448 scheduled in slot SLOT of packet P. */
5451 find_best_insn (ready
, types
, n_ready
, p
, slot
)
5453 enum attr_type
*types
;
5455 const struct ia64_packet
*p
;
5460 while (n_ready
-- > 0)
5462 rtx insn
= ready
[n_ready
];
5465 if (best
>= 0 && INSN_PRIORITY (ready
[n_ready
]) < best_pri
)
5467 /* If we have equally good insns, one of which has a stricter
5468 slot requirement, prefer the one with the stricter requirement. */
5469 if (best
>= 0 && types
[n_ready
] == TYPE_A
)
5471 if (insn_matches_slot (p
, types
[n_ready
], slot
, insn
))
5474 best_pri
= INSN_PRIORITY (ready
[best
]);
5476 /* If there's no way we could get a stricter requirement, stop
5478 if (types
[n_ready
] != TYPE_A
5479 && ia64_safe_itanium_requires_unit0 (ready
[n_ready
]))
5487 /* Select the best packet to use given the current scheduler state and the
5489 READY is an array holding N_READY ready insns; TYPES is a corresponding
5490 array that holds their types. Store the best packet in *PPACKET and the
5491 number of insns that can be scheduled in the current cycle in *PBEST. */
5494 find_best_packet (pbest
, ppacket
, ready
, types
, n_ready
)
5496 const struct ia64_packet
**ppacket
;
5498 enum attr_type
*types
;
5501 int first
= sched_data
.first_slot
;
5504 const struct ia64_packet
*best_packet
= NULL
;
5507 for (i
= 0; i
< NR_PACKETS
; i
++)
5509 const struct ia64_packet
*p
= packets
+ i
;
5511 int split
= get_split (p
, first
);
5513 int first_slot
, last_slot
;
5516 if (! packet_matches_p (p
, split
, &first_slot
))
5519 memcpy (sched_ready
, ready
, n_ready
* sizeof (rtx
));
5523 for (slot
= first_slot
; slot
< split
; slot
++)
5527 /* Disallow a degenerate case where the first bundle doesn't
5528 contain anything but NOPs! */
5529 if (first_slot
== 0 && win
== 0 && slot
== 3)
5535 insn_nr
= find_best_insn (sched_ready
, types
, n_ready
, p
, slot
);
5538 sched_ready
[insn_nr
] = 0;
5542 else if (p
->t
[slot
] == TYPE_B
)
5545 /* We must disallow MBB/BBB packets if any of their B slots would be
5546 filled with nops. */
5549 if (p
->t
[1] == TYPE_B
&& (b_nops
|| last_slot
< 2))
5554 if (p
->t
[4] == TYPE_B
&& (b_nops
|| last_slot
< 5))
5559 || (win
== best
&& last_slot
< lowest_end
))
5562 lowest_end
= last_slot
;
5567 *ppacket
= best_packet
;
5570 /* Reorder the ready list so that the insns that can be issued in this cycle
5571 are found in the correct order at the end of the list.
5572 DUMP is the scheduling dump file, or NULL. READY points to the start,
5573 E_READY to the end of the ready list. MAY_FAIL determines what should be
5574 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5575 otherwise we return 0.
5576 Return 1 if any insns can be scheduled in this cycle. */
5579 itanium_reorder (dump
, ready
, e_ready
, may_fail
)
5585 const struct ia64_packet
*best_packet
;
5586 int n_ready
= e_ready
- ready
;
5587 int first
= sched_data
.first_slot
;
5588 int i
, best
, best_split
, filled
;
5590 for (i
= 0; i
< n_ready
; i
++)
5591 sched_types
[i
] = ia64_safe_type (ready
[i
]);
5593 find_best_packet (&best
, &best_packet
, ready
, sched_types
, n_ready
);
5604 fprintf (dump
, "// Selected bundles: %s %s (%d insns)\n",
5605 best_packet
->t1
->name
,
5606 best_packet
->t2
? best_packet
->t2
->name
: NULL
, best
);
5609 best_split
= itanium_split_issue (best_packet
, first
);
5610 packet_matches_p (best_packet
, best_split
, &filled
);
5612 for (i
= filled
; i
< best_split
; i
++)
5616 insn_nr
= find_best_insn (ready
, sched_types
, n_ready
, best_packet
, i
);
5619 rtx insn
= ready
[insn_nr
];
5620 memmove (ready
+ insn_nr
, ready
+ insn_nr
+ 1,
5621 (n_ready
- insn_nr
- 1) * sizeof (rtx
));
5622 memmove (sched_types
+ insn_nr
, sched_types
+ insn_nr
+ 1,
5623 (n_ready
- insn_nr
- 1) * sizeof (enum attr_type
));
5624 ready
[--n_ready
] = insn
;
5628 sched_data
.packet
= best_packet
;
5629 sched_data
.split
= best_split
;
5633 /* Dump information about the current scheduling state to file DUMP. */
5636 dump_current_packet (dump
)
5640 fprintf (dump
, "// %d slots filled:", sched_data
.cur
);
5641 for (i
= 0; i
< sched_data
.first_slot
; i
++)
5643 rtx insn
= sched_data
.insns
[i
];
5644 fprintf (dump
, " %s", type_names
[sched_data
.types
[i
]]);
5646 fprintf (dump
, "/%s", type_names
[ia64_safe_type (insn
)]);
5647 if (sched_data
.stopbit
[i
])
5648 fprintf (dump
, " ;;");
5650 fprintf (dump
, " :::");
5651 for (i
= sched_data
.first_slot
; i
< sched_data
.cur
; i
++)
5653 rtx insn
= sched_data
.insns
[i
];
5654 enum attr_type t
= ia64_safe_type (insn
);
5655 fprintf (dump
, " (%d) %s", INSN_UID (insn
), type_names
[t
]);
5657 fprintf (dump
, "\n");
5660 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
5664 schedule_stop (dump
)
5667 const struct ia64_packet
*best
= sched_data
.packet
;
5672 fprintf (dump
, "// Stop bit, cur = %d.\n", sched_data
.cur
);
5674 if (sched_data
.cur
== 0)
5677 fprintf (dump
, "// At start of bundle, so nothing to do.\n");
5679 rotate_two_bundles (NULL
);
5683 for (i
= -1; i
< NR_PACKETS
; i
++)
5685 /* This is a slight hack to give the current packet the first chance.
5686 This is done to avoid e.g. switching from MIB to MBB bundles. */
5687 const struct ia64_packet
*p
= (i
>= 0 ? packets
+ i
: sched_data
.packet
);
5688 int split
= get_split (p
, sched_data
.first_slot
);
5689 const struct bundle
*compare
;
5692 if (! packet_matches_p (p
, split
, &next
))
5695 compare
= next
> 3 ? p
->t2
: p
->t1
;
5698 if (compare
->possible_stop
)
5699 stoppos
= compare
->possible_stop
;
5703 if (stoppos
< next
|| stoppos
>= best_stop
)
5705 if (compare
->possible_stop
== 0)
5707 stoppos
= (next
> 3 ? 6 : 3);
5709 if (stoppos
< next
|| stoppos
>= best_stop
)
5713 fprintf (dump
, "// switching from %s %s to %s %s (stop at %d)\n",
5714 best
->t1
->name
, best
->t2
->name
, p
->t1
->name
, p
->t2
->name
,
5717 best_stop
= stoppos
;
5721 sched_data
.packet
= best
;
5722 cycle_end_fill_slots (dump
);
5723 while (sched_data
.cur
< best_stop
)
5725 sched_data
.types
[sched_data
.cur
] = best
->t
[sched_data
.cur
];
5726 sched_data
.insns
[sched_data
.cur
] = 0;
5727 sched_data
.stopbit
[sched_data
.cur
] = 0;
5730 sched_data
.stopbit
[sched_data
.cur
- 1] = 1;
5731 sched_data
.first_slot
= best_stop
;
5734 dump_current_packet (dump
);
5737 /* If necessary, perform one or two rotations on the scheduling state.
5738 This should only be called if we are starting a new cycle. */
5744 if (sched_data
.cur
== 6)
5745 rotate_two_bundles (dump
);
5746 else if (sched_data
.cur
>= 3)
5747 rotate_one_bundle (dump
);
5748 sched_data
.first_slot
= sched_data
.cur
;
5751 /* We are about to being issuing insns for this clock cycle.
5752 Override the default sort algorithm to better slot instructions. */
5755 ia64_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
, reorder_type
)
5756 FILE *dump ATTRIBUTE_UNUSED
;
5757 int sched_verbose ATTRIBUTE_UNUSED
;
5762 int n_ready
= *pn_ready
;
5763 rtx
*e_ready
= ready
+ n_ready
;
5769 fprintf (dump
, "// ia64_sched_reorder (type %d):\n", reorder_type
);
5770 dump_current_packet (dump
);
5773 if (reorder_type
== 0)
5774 maybe_rotate (sched_verbose
? dump
: NULL
);
5776 /* First, move all USEs, CLOBBERs and other crud out of the way. */
5777 highest
= ready
[n_ready
- 1];
5778 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
5779 if (insnp
< e_ready
)
5782 enum attr_type t
= ia64_safe_type (insn
);
5783 if (t
== TYPE_UNKNOWN
)
5785 highest
= ready
[n_ready
- 1];
5786 ready
[n_ready
- 1] = insn
;
5788 if (ia64_final_schedule
&& group_barrier_needed_p (insn
))
5790 schedule_stop (sched_verbose
? dump
: NULL
);
5791 sched_data
.last_was_stop
= 1;
5792 maybe_rotate (sched_verbose
? dump
: NULL
);
5794 else if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
5795 || asm_noperands (PATTERN (insn
)) >= 0)
5797 /* It must be an asm of some kind. */
5798 cycle_end_fill_slots (sched_verbose
? dump
: NULL
);
5804 if (ia64_final_schedule
)
5806 int nr_need_stop
= 0;
5808 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
5809 if (safe_group_barrier_needed_p (*insnp
))
5812 /* Schedule a stop bit if
5813 - all insns require a stop bit, or
5814 - we are starting a new cycle and _any_ insns require a stop bit.
5815 The reason for the latter is that if our schedule is accurate, then
5816 the additional stop won't decrease performance at this point (since
5817 there's a split issue at this point anyway), but it gives us more
5818 freedom when scheduling the currently ready insns. */
5819 if ((reorder_type
== 0 && nr_need_stop
)
5820 || (reorder_type
== 1 && n_ready
== nr_need_stop
))
5822 schedule_stop (sched_verbose
? dump
: NULL
);
5823 sched_data
.last_was_stop
= 1;
5824 maybe_rotate (sched_verbose
? dump
: NULL
);
5825 if (reorder_type
== 1)
5832 /* Move down everything that needs a stop bit, preserving relative
5834 while (insnp
-- > ready
+ deleted
)
5835 while (insnp
>= ready
+ deleted
)
5838 if (! safe_group_barrier_needed_p (insn
))
5840 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
5846 if (deleted
!= nr_need_stop
)
5851 return itanium_reorder (sched_verbose
? dump
: NULL
,
5852 ready
, e_ready
, reorder_type
== 1);
5855 /* Like ia64_sched_reorder, but called after issuing each insn.
5856 Override the default sort algorithm to better slot instructions. */
5859 ia64_sched_reorder2 (dump
, sched_verbose
, ready
, pn_ready
, clock_var
)
5860 FILE *dump ATTRIBUTE_UNUSED
;
5861 int sched_verbose ATTRIBUTE_UNUSED
;
5864 int clock_var ATTRIBUTE_UNUSED
;
5866 if (sched_data
.last_was_stop
)
5869 /* Detect one special case and try to optimize it.
5870 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
5871 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
5872 if (sched_data
.first_slot
== 1
5873 && sched_data
.stopbit
[0]
5874 && ((sched_data
.cur
== 4
5875 && (sched_data
.types
[1] == TYPE_M
|| sched_data
.types
[1] == TYPE_A
)
5876 && (sched_data
.types
[2] == TYPE_I
|| sched_data
.types
[2] == TYPE_A
)
5877 && (sched_data
.types
[3] != TYPE_M
&& sched_data
.types
[3] != TYPE_A
))
5878 || (sched_data
.cur
== 3
5879 && (sched_data
.types
[1] == TYPE_M
|| sched_data
.types
[1] == TYPE_A
)
5880 && (sched_data
.types
[2] != TYPE_M
&& sched_data
.types
[2] != TYPE_I
5881 && sched_data
.types
[2] != TYPE_A
))))
5885 rtx stop
= PREV_INSN (sched_data
.insns
[1]);
5888 sched_data
.stopbit
[0] = 0;
5889 sched_data
.stopbit
[2] = 1;
5890 if (GET_CODE (stop
) != INSN
)
5893 pat
= PATTERN (stop
);
5894 /* Ignore cycle displays. */
5895 if (GET_CODE (pat
) == UNSPEC
&& XINT (pat
, 1) == 23)
5896 stop
= PREV_INSN (stop
);
5897 pat
= PATTERN (stop
);
5898 if (GET_CODE (pat
) != UNSPEC_VOLATILE
5899 || XINT (pat
, 1) != 2
5900 || INTVAL (XVECEXP (pat
, 0, 0)) != 1)
5902 XVECEXP (pat
, 0, 0) = GEN_INT (3);
5904 sched_data
.types
[5] = sched_data
.types
[3];
5905 sched_data
.types
[4] = sched_data
.types
[2];
5906 sched_data
.types
[3] = sched_data
.types
[1];
5907 sched_data
.insns
[5] = sched_data
.insns
[3];
5908 sched_data
.insns
[4] = sched_data
.insns
[2];
5909 sched_data
.insns
[3] = sched_data
.insns
[1];
5910 sched_data
.stopbit
[5] = sched_data
.stopbit
[4] = sched_data
.stopbit
[3] = 0;
5911 sched_data
.cur
+= 2;
5912 sched_data
.first_slot
= 3;
5913 for (i
= 0; i
< NR_PACKETS
; i
++)
5915 const struct ia64_packet
*p
= packets
+ i
;
5916 if (p
->t
[0] == TYPE_M
&& p
->t
[1] == TYPE_F
&& p
->t
[2] == TYPE_B
)
5918 sched_data
.packet
= p
;
5922 rotate_one_bundle (sched_verbose
? dump
: NULL
);
5925 for (i
= 0; i
< NR_PACKETS
; i
++)
5927 const struct ia64_packet
*p
= packets
+ i
;
5928 int split
= get_split (p
, sched_data
.first_slot
);
5931 /* Disallow multiway branches here. */
5932 if (p
->t
[1] == TYPE_B
)
5935 if (packet_matches_p (p
, split
, &next
) && next
< best
)
5938 sched_data
.packet
= p
;
5939 sched_data
.split
= split
;
5948 int more
= ia64_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
, 1);
5951 /* Did we schedule a stop? If so, finish this cycle. */
5952 if (sched_data
.cur
== sched_data
.first_slot
)
5957 fprintf (dump
, "// Can't issue more this cycle; updating type array.\n");
5959 cycle_end_fill_slots (sched_verbose
? dump
: NULL
);
5961 dump_current_packet (dump
);
5965 /* We are about to issue INSN. Return the number of insns left on the
5966 ready queue that can be issued this cycle. */
5969 ia64_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
5973 int can_issue_more ATTRIBUTE_UNUSED
;
5975 enum attr_type t
= ia64_safe_type (insn
);
5977 if (sched_data
.last_was_stop
)
5979 int t
= sched_data
.first_slot
;
5982 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t
)), insn
);
5983 init_insn_group_barriers ();
5984 sched_data
.last_was_stop
= 0;
5987 if (t
== TYPE_UNKNOWN
)
5990 fprintf (dump
, "// Ignoring type %s\n", type_names
[t
]);
5991 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
5992 || asm_noperands (PATTERN (insn
)) >= 0)
5994 /* This must be some kind of asm. Clear the scheduling state. */
5995 rotate_two_bundles (sched_verbose
? dump
: NULL
);
5996 if (ia64_final_schedule
)
5997 group_barrier_needed_p (insn
);
6002 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6003 important state info. Don't delete this test. */
6004 if (ia64_final_schedule
6005 && group_barrier_needed_p (insn
))
6008 sched_data
.stopbit
[sched_data
.cur
] = 0;
6009 sched_data
.insns
[sched_data
.cur
] = insn
;
6010 sched_data
.types
[sched_data
.cur
] = t
;
6014 fprintf (dump
, "// Scheduling insn %d of type %s\n",
6015 INSN_UID (insn
), type_names
[t
]);
6017 if (GET_CODE (insn
) == CALL_INSN
&& ia64_final_schedule
)
6019 schedule_stop (sched_verbose
? dump
: NULL
);
6020 sched_data
.last_was_stop
= 1;
6026 /* Free data allocated by ia64_sched_init. */
6029 ia64_sched_finish (dump
, sched_verbose
)
6034 fprintf (dump
, "// Finishing schedule.\n");
6035 rotate_two_bundles (NULL
);
6040 /* Emit pseudo-ops for the assembler to describe predicate relations.
6041 At present this assumes that we only consider predicate pairs to
6042 be mutex, and that the assembler can deduce proper values from
6043 straight-line code. */
6046 emit_predicate_relation_info ()
6050 for (i
= n_basic_blocks
- 1; i
>= 0; --i
)
6052 basic_block bb
= BASIC_BLOCK (i
);
6054 rtx head
= bb
->head
;
6056 /* We only need such notes at code labels. */
6057 if (GET_CODE (head
) != CODE_LABEL
)
6059 if (GET_CODE (NEXT_INSN (head
)) == NOTE
6060 && NOTE_LINE_NUMBER (NEXT_INSN (head
)) == NOTE_INSN_BASIC_BLOCK
)
6061 head
= NEXT_INSN (head
);
6063 for (r
= PR_REG (0); r
< PR_REG (64); r
+= 2)
6064 if (REGNO_REG_SET_P (bb
->global_live_at_start
, r
))
6066 rtx p
= gen_rtx_REG (BImode
, r
);
6067 rtx n
= emit_insn_after (gen_pred_rel_mutex (p
), head
);
6068 if (head
== bb
->end
)
6074 /* Look for conditional calls that do not return, and protect predicate
6075 relations around them. Otherwise the assembler will assume the call
6076 returns, and complain about uses of call-clobbered predicates after
6078 for (i
= n_basic_blocks
- 1; i
>= 0; --i
)
6080 basic_block bb
= BASIC_BLOCK (i
);
6081 rtx insn
= bb
->head
;
6085 if (GET_CODE (insn
) == CALL_INSN
6086 && GET_CODE (PATTERN (insn
)) == COND_EXEC
6087 && find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
6089 rtx b
= emit_insn_before (gen_safe_across_calls_all (), insn
);
6090 rtx a
= emit_insn_after (gen_safe_across_calls_normal (), insn
);
6091 if (bb
->head
== insn
)
6093 if (bb
->end
== insn
)
6097 if (insn
== bb
->end
)
6099 insn
= NEXT_INSN (insn
);
6104 /* Generate a NOP instruction of type T. We will never generate L type
6114 return gen_nop_m ();
6116 return gen_nop_i ();
6118 return gen_nop_b ();
6120 return gen_nop_f ();
6122 return gen_nop_x ();
6128 /* After the last scheduling pass, fill in NOPs. It's easier to do this
6129 here than while scheduling. */
6135 const struct bundle
*b
= 0;
6138 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6142 pat
= INSN_P (insn
) ? PATTERN (insn
) : const0_rtx
;
6143 if (GET_CODE (pat
) == USE
|| GET_CODE (pat
) == CLOBBER
)
6145 if ((GET_CODE (pat
) == UNSPEC
&& XINT (pat
, 1) == 22)
6146 || GET_CODE (insn
) == CODE_LABEL
)
6149 while (bundle_pos
< 3)
6151 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6154 if (GET_CODE (insn
) != CODE_LABEL
)
6155 b
= bundle
+ INTVAL (XVECEXP (pat
, 0, 0));
6161 else if (GET_CODE (pat
) == UNSPEC_VOLATILE
&& XINT (pat
, 1) == 2)
6163 int t
= INTVAL (XVECEXP (pat
, 0, 0));
6165 while (bundle_pos
< t
)
6167 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6173 if (bundle_pos
== 3)
6176 if (b
&& INSN_P (insn
))
6178 t
= ia64_safe_type (insn
);
6179 if (asm_noperands (PATTERN (insn
)) >= 0
6180 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)
6182 while (bundle_pos
< 3)
6184 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6190 if (t
== TYPE_UNKNOWN
)
6192 while (bundle_pos
< 3)
6194 if (t
== b
->t
[bundle_pos
]
6195 || (t
== TYPE_A
&& (b
->t
[bundle_pos
] == TYPE_M
6196 || b
->t
[bundle_pos
] == TYPE_I
)))
6199 emit_insn_before (gen_nop_type (b
->t
[bundle_pos
]), insn
);
6208 /* Perform machine dependent operations on the rtl chain INSNS. */
6214 /* If optimizing, we'll have split before scheduling. */
6216 split_all_insns (0);
6218 /* Make sure the CFG and global_live_at_start are correct
6219 for emit_predicate_relation_info. */
6220 find_basic_blocks (insns
, max_reg_num (), NULL
);
6221 life_analysis (insns
, NULL
, PROP_DEATH_NOTES
);
6223 if (ia64_flag_schedule_insns2
)
6225 ia64_final_schedule
= 1;
6226 schedule_ebbs (rtl_dump_file
);
6227 ia64_final_schedule
= 0;
6229 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6230 place as they were during scheduling. */
6231 emit_insn_group_barriers (rtl_dump_file
, insns
);
6235 emit_all_insn_group_barriers (rtl_dump_file
, insns
);
6238 emit_predicate_relation_info ();
6241 /* Return true if REGNO is used by the epilogue. */
6244 ia64_epilogue_uses (regno
)
6247 /* When a function makes a call through a function descriptor, we
6248 will write a (potentially) new value to "gp". After returning
6249 from such a call, we need to make sure the function restores the
6250 original gp-value, even if the function itself does not use the
6252 if (regno
== R_GR (1)
6254 && !(TARGET_AUTO_PIC
|| TARGET_NO_PIC
))
6257 /* For functions defined with the syscall_linkage attribute, all input
6258 registers are marked as live at all function exits. This prevents the
6259 register allocator from using the input registers, which in turn makes it
6260 possible to restart a system call after an interrupt without having to
6261 save/restore the input registers. This also prevents kernel data from
6262 leaking to application code. */
6264 if (IN_REGNO_P (regno
)
6265 && lookup_attribute ("syscall_linkage",
6266 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
6269 /* Conditional return patterns can't represent the use of `b0' as
6270 the return address, so we force the value live this way. */
6271 if (regno
== R_BR (0))
6274 if (regs_ever_live
[AR_LC_REGNUM
] && regno
== AR_LC_REGNUM
)
6276 if (! current_function_is_leaf
&& regno
== AR_PFS_REGNUM
)
6278 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
6279 && regno
== AR_UNAT_REGNUM
)
6285 /* Return true if IDENTIFIER is a valid attribute for TYPE. */
6288 ia64_valid_type_attribute (type
, attributes
, identifier
, args
)
6290 tree attributes ATTRIBUTE_UNUSED
;
6294 /* We only support an attribute for function calls. */
6296 if (TREE_CODE (type
) != FUNCTION_TYPE
6297 && TREE_CODE (type
) != METHOD_TYPE
)
6300 /* The "syscall_linkage" attribute says the callee is a system call entry
6301 point. This affects ia64_epilogue_uses. */
6303 if (is_attribute_p ("syscall_linkage", identifier
))
6304 return args
== NULL_TREE
;
6309 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6311 We add @ to the name if this goes in small data/bss. We can only put
6312 a variable in small data/bss if it is defined in this module or a module
6313 that we are statically linked with. We can't check the second condition,
6314 but TREE_STATIC gives us the first one. */
6316 /* ??? If we had IPA, we could check the second condition. We could support
6317 programmer added section attributes if the variable is not defined in this
6320 /* ??? See the v850 port for a cleaner way to do this. */
6322 /* ??? We could also support own long data here. Generating movl/add/ld8
6323 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6324 code faster because there is one less load. This also includes incomplete
6325 types which can't go in sdata/sbss. */
6327 /* ??? See select_section. We must put short own readonly variables in
6328 sdata/sbss instead of the more natural rodata, because we can't perform
6329 the DECL_READONLY_SECTION test here. */
6331 extern struct obstack
* saveable_obstack
;
6334 ia64_encode_section_info (decl
)
6337 const char *symbol_str
;
6339 if (TREE_CODE (decl
) == FUNCTION_DECL
)
6341 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl
), 0)) = 1;
6345 /* Careful not to prod global register variables. */
6346 if (TREE_CODE (decl
) != VAR_DECL
6347 || GET_CODE (DECL_RTL (decl
)) != MEM
6348 || GET_CODE (XEXP (DECL_RTL (decl
), 0)) != SYMBOL_REF
)
6351 symbol_str
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
6353 /* We assume that -fpic is used only to create a shared library (dso).
6354 With -fpic, no global data can ever be sdata.
6355 Without -fpic, global common uninitialized data can never be sdata, since
6356 it can unify with a real definition in a dso. */
6357 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
6358 to access them. The linker may then be able to do linker relaxation to
6359 optimize references to them. Currently sdata implies use of gprel. */
6360 /* We need the DECL_EXTERNAL check for C++. static class data members get
6361 both TREE_STATIC and DECL_EXTERNAL set, to indicate that they are
6362 statically allocated, but the space is allocated somewhere else. Such
6363 decls can not be own data. */
6364 if (! TARGET_NO_SDATA
6365 && TREE_STATIC (decl
) && ! DECL_EXTERNAL (decl
)
6366 && ! (DECL_ONE_ONLY (decl
) || DECL_WEAK (decl
))
6367 && ! (TREE_PUBLIC (decl
)
6369 || (DECL_COMMON (decl
)
6370 && (DECL_INITIAL (decl
) == 0
6371 || DECL_INITIAL (decl
) == error_mark_node
))))
6372 /* Either the variable must be declared without a section attribute,
6373 or the section must be sdata or sbss. */
6374 && (DECL_SECTION_NAME (decl
) == 0
6375 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl
)),
6377 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl
)),
6380 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (decl
));
6382 /* If the variable has already been defined in the output file, then it
6383 is too late to put it in sdata if it wasn't put there in the first
6384 place. The test is here rather than above, because if it is already
6385 in sdata, then it can stay there. */
6387 if (TREE_ASM_WRITTEN (decl
))
6390 /* If this is an incomplete type with size 0, then we can't put it in
6391 sdata because it might be too big when completed. */
6393 && size
<= (HOST_WIDE_INT
) ia64_section_threshold
6394 && symbol_str
[0] != SDATA_NAME_FLAG_CHAR
)
6396 size_t len
= strlen (symbol_str
);
6397 char *newstr
= alloca (len
+ 1);
6400 *newstr
= SDATA_NAME_FLAG_CHAR
;
6401 memcpy (newstr
+ 1, symbol_str
, len
+ 1);
6403 string
= ggc_alloc_string (newstr
, len
+ 1);
6404 XSTR (XEXP (DECL_RTL (decl
), 0), 0) = string
;
6407 /* This decl is marked as being in small data/bss but it shouldn't
6408 be; one likely explanation for this is that the decl has been
6409 moved into a different section from the one it was in when
6410 ENCODE_SECTION_INFO was first called. Remove the '@'.*/
6411 else if (symbol_str
[0] == SDATA_NAME_FLAG_CHAR
)
6413 XSTR (XEXP (DECL_RTL (decl
), 0), 0)
6414 = ggc_strdup (symbol_str
+ 1);
6418 /* Output assembly directives for prologue regions. */
6420 /* The current basic block number. */
6422 static int block_num
;
6424 /* True if we need a copy_state command at the start of the next block. */
6426 static int need_copy_state
;
6428 /* The function emits unwind directives for the start of an epilogue. */
6433 /* If this isn't the last block of the function, then we need to label the
6434 current state, and copy it back in at the start of the next block. */
6436 if (block_num
!= n_basic_blocks
- 1)
6438 fprintf (asm_out_file
, "\t.label_state 1\n");
6439 need_copy_state
= 1;
6442 fprintf (asm_out_file
, "\t.restore sp\n");
6445 /* This function processes a SET pattern looking for specific patterns
6446 which result in emitting an assembly directive required for unwinding. */
6449 process_set (asm_out_file
, pat
)
6453 rtx src
= SET_SRC (pat
);
6454 rtx dest
= SET_DEST (pat
);
6455 int src_regno
, dest_regno
;
6457 /* Look for the ALLOC insn. */
6458 if (GET_CODE (src
) == UNSPEC_VOLATILE
6459 && XINT (src
, 1) == 0
6460 && GET_CODE (dest
) == REG
)
6462 dest_regno
= REGNO (dest
);
6464 /* If this isn't the final destination for ar.pfs, the alloc
6465 shouldn't have been marked frame related. */
6466 if (dest_regno
!= current_frame_info
.reg_save_ar_pfs
)
6469 fprintf (asm_out_file
, "\t.save ar.pfs, r%d\n",
6470 ia64_dbx_register_number (dest_regno
));
6474 /* Look for SP = .... */
6475 if (GET_CODE (dest
) == REG
&& REGNO (dest
) == STACK_POINTER_REGNUM
)
6477 if (GET_CODE (src
) == PLUS
)
6479 rtx op0
= XEXP (src
, 0);
6480 rtx op1
= XEXP (src
, 1);
6481 if (op0
== dest
&& GET_CODE (op1
) == CONST_INT
)
6483 if (INTVAL (op1
) < 0)
6485 fputs ("\t.fframe ", asm_out_file
);
6486 fprintf (asm_out_file
, HOST_WIDE_INT_PRINT_DEC
,
6488 fputc ('\n', asm_out_file
);
6491 process_epilogue ();
6496 else if (GET_CODE (src
) == REG
6497 && REGNO (src
) == HARD_FRAME_POINTER_REGNUM
)
6498 process_epilogue ();
6505 /* Register move we need to look at. */
6506 if (GET_CODE (dest
) == REG
&& GET_CODE (src
) == REG
)
6508 src_regno
= REGNO (src
);
6509 dest_regno
= REGNO (dest
);
6514 /* Saving return address pointer. */
6515 if (dest_regno
!= current_frame_info
.reg_save_b0
)
6517 fprintf (asm_out_file
, "\t.save rp, r%d\n",
6518 ia64_dbx_register_number (dest_regno
));
6522 if (dest_regno
!= current_frame_info
.reg_save_pr
)
6524 fprintf (asm_out_file
, "\t.save pr, r%d\n",
6525 ia64_dbx_register_number (dest_regno
));
6528 case AR_UNAT_REGNUM
:
6529 if (dest_regno
!= current_frame_info
.reg_save_ar_unat
)
6531 fprintf (asm_out_file
, "\t.save ar.unat, r%d\n",
6532 ia64_dbx_register_number (dest_regno
));
6536 if (dest_regno
!= current_frame_info
.reg_save_ar_lc
)
6538 fprintf (asm_out_file
, "\t.save ar.lc, r%d\n",
6539 ia64_dbx_register_number (dest_regno
));
6542 case STACK_POINTER_REGNUM
:
6543 if (dest_regno
!= HARD_FRAME_POINTER_REGNUM
6544 || ! frame_pointer_needed
)
6546 fprintf (asm_out_file
, "\t.vframe r%d\n",
6547 ia64_dbx_register_number (dest_regno
));
6551 /* Everything else should indicate being stored to memory. */
6556 /* Memory store we need to look at. */
6557 if (GET_CODE (dest
) == MEM
&& GET_CODE (src
) == REG
)
6563 if (GET_CODE (XEXP (dest
, 0)) == REG
)
6565 base
= XEXP (dest
, 0);
6568 else if (GET_CODE (XEXP (dest
, 0)) == PLUS
6569 && GET_CODE (XEXP (XEXP (dest
, 0), 1)) == CONST_INT
)
6571 base
= XEXP (XEXP (dest
, 0), 0);
6572 off
= INTVAL (XEXP (XEXP (dest
, 0), 1));
6577 if (base
== hard_frame_pointer_rtx
)
6579 saveop
= ".savepsp";
6582 else if (base
== stack_pointer_rtx
)
6587 src_regno
= REGNO (src
);
6591 if (current_frame_info
.reg_save_b0
!= 0)
6593 fprintf (asm_out_file
, "\t%s rp, %ld\n", saveop
, off
);
6597 if (current_frame_info
.reg_save_pr
!= 0)
6599 fprintf (asm_out_file
, "\t%s pr, %ld\n", saveop
, off
);
6603 if (current_frame_info
.reg_save_ar_lc
!= 0)
6605 fprintf (asm_out_file
, "\t%s ar.lc, %ld\n", saveop
, off
);
6609 if (current_frame_info
.reg_save_ar_pfs
!= 0)
6611 fprintf (asm_out_file
, "\t%s ar.pfs, %ld\n", saveop
, off
);
6614 case AR_UNAT_REGNUM
:
6615 if (current_frame_info
.reg_save_ar_unat
!= 0)
6617 fprintf (asm_out_file
, "\t%s ar.unat, %ld\n", saveop
, off
);
6624 fprintf (asm_out_file
, "\t.save.g 0x%x\n",
6625 1 << (src_regno
- GR_REG (4)));
6633 fprintf (asm_out_file
, "\t.save.b 0x%x\n",
6634 1 << (src_regno
- BR_REG (1)));
6641 fprintf (asm_out_file
, "\t.save.f 0x%x\n",
6642 1 << (src_regno
- FR_REG (2)));
6645 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
6646 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
6647 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
6648 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
6649 fprintf (asm_out_file
, "\t.save.gf 0x0, 0x%x\n",
6650 1 << (src_regno
- FR_REG (12)));
6662 /* This function looks at a single insn and emits any directives
6663 required to unwind this insn. */
6665 process_for_unwind_directive (asm_out_file
, insn
)
6669 if (flag_unwind_tables
6670 || (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
6674 if (GET_CODE (insn
) == NOTE
6675 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
6677 block_num
= NOTE_BASIC_BLOCK (insn
)->index
;
6679 /* Restore unwind state from immediately before the epilogue. */
6680 if (need_copy_state
)
6682 fprintf (asm_out_file
, "\t.body\n");
6683 fprintf (asm_out_file
, "\t.copy_state 1\n");
6684 need_copy_state
= 0;
6688 if (! RTX_FRAME_RELATED_P (insn
))
6691 pat
= find_reg_note (insn
, REG_FRAME_RELATED_EXPR
, NULL_RTX
);
6693 pat
= XEXP (pat
, 0);
6695 pat
= PATTERN (insn
);
6697 switch (GET_CODE (pat
))
6700 process_set (asm_out_file
, pat
);
6706 int limit
= XVECLEN (pat
, 0);
6707 for (par_index
= 0; par_index
< limit
; par_index
++)
6709 rtx x
= XVECEXP (pat
, 0, par_index
);
6710 if (GET_CODE (x
) == SET
)
6711 process_set (asm_out_file
, x
);
6724 ia64_init_builtins ()
6726 tree psi_type_node
= build_pointer_type (integer_type_node
);
6727 tree pdi_type_node
= build_pointer_type (long_integer_type_node
);
6728 tree endlink
= void_list_node
;
6730 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
6731 tree si_ftype_psi_si_si
6732 = build_function_type (integer_type_node
,
6733 tree_cons (NULL_TREE
, psi_type_node
,
6734 tree_cons (NULL_TREE
, integer_type_node
,
6735 tree_cons (NULL_TREE
,
6739 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
6740 tree di_ftype_pdi_di_di
6741 = build_function_type (long_integer_type_node
,
6742 tree_cons (NULL_TREE
, pdi_type_node
,
6743 tree_cons (NULL_TREE
,
6744 long_integer_type_node
,
6745 tree_cons (NULL_TREE
,
6746 long_integer_type_node
,
6748 /* __sync_synchronize */
6749 tree void_ftype_void
6750 = build_function_type (void_type_node
, endlink
);
6752 /* __sync_lock_test_and_set_si */
6753 tree si_ftype_psi_si
6754 = build_function_type (integer_type_node
,
6755 tree_cons (NULL_TREE
, psi_type_node
,
6756 tree_cons (NULL_TREE
, integer_type_node
, endlink
)));
6758 /* __sync_lock_test_and_set_di */
6759 tree di_ftype_pdi_di
6760 = build_function_type (long_integer_type_node
,
6761 tree_cons (NULL_TREE
, pdi_type_node
,
6762 tree_cons (NULL_TREE
, long_integer_type_node
,
6765 /* __sync_lock_release_si */
6767 = build_function_type (void_type_node
, tree_cons (NULL_TREE
, psi_type_node
,
6770 /* __sync_lock_release_di */
6772 = build_function_type (void_type_node
, tree_cons (NULL_TREE
, pdi_type_node
,
6775 #define def_builtin(name, type, code) \
6776 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL_PTR)
6778 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si
,
6779 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
);
6780 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di
,
6781 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
);
6782 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si
,
6783 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
);
6784 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di
,
6785 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
);
6787 def_builtin ("__sync_synchronize", void_ftype_void
,
6788 IA64_BUILTIN_SYNCHRONIZE
);
6790 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si
,
6791 IA64_BUILTIN_LOCK_TEST_AND_SET_SI
);
6792 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di
,
6793 IA64_BUILTIN_LOCK_TEST_AND_SET_DI
);
6794 def_builtin ("__sync_lock_release_si", void_ftype_psi
,
6795 IA64_BUILTIN_LOCK_RELEASE_SI
);
6796 def_builtin ("__sync_lock_release_di", void_ftype_pdi
,
6797 IA64_BUILTIN_LOCK_RELEASE_DI
);
6799 def_builtin ("__builtin_ia64_bsp",
6800 build_function_type (ptr_type_node
, endlink
),
6803 def_builtin ("__builtin_ia64_flushrs",
6804 build_function_type (void_type_node
, endlink
),
6805 IA64_BUILTIN_FLUSHRS
);
6807 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si
,
6808 IA64_BUILTIN_FETCH_AND_ADD_SI
);
6809 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si
,
6810 IA64_BUILTIN_FETCH_AND_SUB_SI
);
6811 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si
,
6812 IA64_BUILTIN_FETCH_AND_OR_SI
);
6813 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si
,
6814 IA64_BUILTIN_FETCH_AND_AND_SI
);
6815 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si
,
6816 IA64_BUILTIN_FETCH_AND_XOR_SI
);
6817 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si
,
6818 IA64_BUILTIN_FETCH_AND_NAND_SI
);
6820 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si
,
6821 IA64_BUILTIN_ADD_AND_FETCH_SI
);
6822 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si
,
6823 IA64_BUILTIN_SUB_AND_FETCH_SI
);
6824 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si
,
6825 IA64_BUILTIN_OR_AND_FETCH_SI
);
6826 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si
,
6827 IA64_BUILTIN_AND_AND_FETCH_SI
);
6828 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si
,
6829 IA64_BUILTIN_XOR_AND_FETCH_SI
);
6830 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si
,
6831 IA64_BUILTIN_NAND_AND_FETCH_SI
);
6833 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di
,
6834 IA64_BUILTIN_FETCH_AND_ADD_DI
);
6835 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di
,
6836 IA64_BUILTIN_FETCH_AND_SUB_DI
);
6837 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di
,
6838 IA64_BUILTIN_FETCH_AND_OR_DI
);
6839 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di
,
6840 IA64_BUILTIN_FETCH_AND_AND_DI
);
6841 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di
,
6842 IA64_BUILTIN_FETCH_AND_XOR_DI
);
6843 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di
,
6844 IA64_BUILTIN_FETCH_AND_NAND_DI
);
6846 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di
,
6847 IA64_BUILTIN_ADD_AND_FETCH_DI
);
6848 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di
,
6849 IA64_BUILTIN_SUB_AND_FETCH_DI
);
6850 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di
,
6851 IA64_BUILTIN_OR_AND_FETCH_DI
);
6852 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di
,
6853 IA64_BUILTIN_AND_AND_FETCH_DI
);
6854 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di
,
6855 IA64_BUILTIN_XOR_AND_FETCH_DI
);
6856 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di
,
6857 IA64_BUILTIN_NAND_AND_FETCH_DI
);
6862 /* Expand fetch_and_op intrinsics. The basic code sequence is:
6870 cmpxchgsz.acq tmp = [ptr], tmp
6871 } while (tmp != ret)
6875 ia64_expand_fetch_and_op (binoptab
, mode
, arglist
, target
)
6877 enum machine_mode mode
;
6881 rtx ret
, label
, tmp
, ccv
, insn
, mem
, value
;
6884 arg0
= TREE_VALUE (arglist
);
6885 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
6886 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
6887 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
6889 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
6890 MEM_VOLATILE_P (mem
) = 1;
6892 if (target
&& register_operand (target
, mode
))
6895 ret
= gen_reg_rtx (mode
);
6897 emit_insn (gen_mf ());
6899 /* Special case for fetchadd instructions. */
6900 if (binoptab
== add_optab
&& fetchadd_operand (value
, VOIDmode
))
6903 insn
= gen_fetchadd_acq_si (ret
, mem
, value
);
6905 insn
= gen_fetchadd_acq_di (ret
, mem
, value
);
6910 tmp
= gen_reg_rtx (mode
);
6911 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
6912 emit_move_insn (tmp
, mem
);
6914 label
= gen_label_rtx ();
6916 emit_move_insn (ret
, tmp
);
6917 emit_move_insn (ccv
, tmp
);
6919 /* Perform the specific operation. Special case NAND by noticing
6920 one_cmpl_optab instead. */
6921 if (binoptab
== one_cmpl_optab
)
6923 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
6924 binoptab
= and_optab
;
6926 tmp
= expand_binop (mode
, binoptab
, tmp
, value
, tmp
, 1, OPTAB_WIDEN
);
6929 insn
= gen_cmpxchg_acq_si (tmp
, mem
, tmp
, ccv
);
6931 insn
= gen_cmpxchg_acq_di (tmp
, mem
, tmp
, ccv
);
6934 emit_cmp_and_jump_insns (tmp
, ret
, NE
, 0, mode
, 1, 0, label
);
6939 /* Expand op_and_fetch intrinsics. The basic code sequence is:
6947 cmpxchgsz.acq tmp = [ptr], ret
6948 } while (tmp != old)
6952 ia64_expand_op_and_fetch (binoptab
, mode
, arglist
, target
)
6954 enum machine_mode mode
;
6958 rtx old
, label
, tmp
, ret
, ccv
, insn
, mem
, value
;
6961 arg0
= TREE_VALUE (arglist
);
6962 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
6963 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
6964 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
6966 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
6967 MEM_VOLATILE_P (mem
) = 1;
6969 if (target
&& ! register_operand (target
, mode
))
6972 emit_insn (gen_mf ());
6973 tmp
= gen_reg_rtx (mode
);
6974 old
= gen_reg_rtx (mode
);
6975 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
6977 emit_move_insn (tmp
, mem
);
6979 label
= gen_label_rtx ();
6981 emit_move_insn (old
, tmp
);
6982 emit_move_insn (ccv
, tmp
);
6984 /* Perform the specific operation. Special case NAND by noticing
6985 one_cmpl_optab instead. */
6986 if (binoptab
== one_cmpl_optab
)
6988 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
6989 binoptab
= and_optab
;
6991 ret
= expand_binop (mode
, binoptab
, tmp
, value
, target
, 1, OPTAB_WIDEN
);
6994 insn
= gen_cmpxchg_acq_si (tmp
, mem
, ret
, ccv
);
6996 insn
= gen_cmpxchg_acq_di (tmp
, mem
, ret
, ccv
);
6999 emit_cmp_and_jump_insns (tmp
, old
, NE
, 0, mode
, 1, 0, label
);
7004 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7008 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7011 For bool_ it's the same except return ret == oldval.
7015 ia64_expand_compare_and_swap (mode
, boolp
, arglist
, target
)
7016 enum machine_mode mode
;
7021 tree arg0
, arg1
, arg2
;
7022 rtx mem
, old
, new, ccv
, tmp
, insn
;
7024 arg0
= TREE_VALUE (arglist
);
7025 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7026 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
7027 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7028 old
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7029 new = expand_expr (arg2
, NULL_RTX
, mode
, 0);
7031 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7032 MEM_VOLATILE_P (mem
) = 1;
7034 if (! register_operand (old
, mode
))
7035 old
= copy_to_mode_reg (mode
, old
);
7036 if (! register_operand (new, mode
))
7037 new = copy_to_mode_reg (mode
, new);
7039 if (! boolp
&& target
&& register_operand (target
, mode
))
7042 tmp
= gen_reg_rtx (mode
);
7044 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
7045 emit_move_insn (ccv
, old
);
7046 emit_insn (gen_mf ());
7048 insn
= gen_cmpxchg_acq_si (tmp
, mem
, new, ccv
);
7050 insn
= gen_cmpxchg_acq_di (tmp
, mem
, new, ccv
);
7056 target
= gen_reg_rtx (mode
);
7057 return emit_store_flag_force (target
, EQ
, tmp
, old
, mode
, 1, 1);
7063 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7066 ia64_expand_lock_test_and_set (mode
, arglist
, target
)
7067 enum machine_mode mode
;
7072 rtx mem
, new, ret
, insn
;
7074 arg0
= TREE_VALUE (arglist
);
7075 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7076 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7077 new = expand_expr (arg1
, NULL_RTX
, mode
, 0);
7079 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7080 MEM_VOLATILE_P (mem
) = 1;
7081 if (! register_operand (new, mode
))
7082 new = copy_to_mode_reg (mode
, new);
7084 if (target
&& register_operand (target
, mode
))
7087 ret
= gen_reg_rtx (mode
);
7090 insn
= gen_xchgsi (ret
, mem
, new);
7092 insn
= gen_xchgdi (ret
, mem
, new);
7098 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7101 ia64_expand_lock_release (mode
, arglist
, target
)
7102 enum machine_mode mode
;
7104 rtx target ATTRIBUTE_UNUSED
;
7109 arg0
= TREE_VALUE (arglist
);
7110 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7112 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7113 MEM_VOLATILE_P (mem
) = 1;
7115 emit_move_insn (mem
, const0_rtx
);
7121 ia64_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
7124 rtx subtarget ATTRIBUTE_UNUSED
;
7125 enum machine_mode mode ATTRIBUTE_UNUSED
;
7126 int ignore ATTRIBUTE_UNUSED
;
7128 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
7129 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
7130 tree arglist
= TREE_OPERAND (exp
, 1);
7134 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
7135 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
7136 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
7137 case IA64_BUILTIN_LOCK_RELEASE_SI
:
7138 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
7139 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
7140 case IA64_BUILTIN_FETCH_AND_OR_SI
:
7141 case IA64_BUILTIN_FETCH_AND_AND_SI
:
7142 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
7143 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
7144 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
7145 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
7146 case IA64_BUILTIN_OR_AND_FETCH_SI
:
7147 case IA64_BUILTIN_AND_AND_FETCH_SI
:
7148 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
7149 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
7153 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
7154 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
7155 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
7156 case IA64_BUILTIN_LOCK_RELEASE_DI
:
7157 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
7158 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
7159 case IA64_BUILTIN_FETCH_AND_OR_DI
:
7160 case IA64_BUILTIN_FETCH_AND_AND_DI
:
7161 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
7162 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
7163 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
7164 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
7165 case IA64_BUILTIN_OR_AND_FETCH_DI
:
7166 case IA64_BUILTIN_AND_AND_FETCH_DI
:
7167 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
7168 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
7178 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
7179 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
7180 return ia64_expand_compare_and_swap (mode
, 1, arglist
, target
);
7182 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
7183 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
7184 return ia64_expand_compare_and_swap (mode
, 0, arglist
, target
);
7186 case IA64_BUILTIN_SYNCHRONIZE
:
7187 emit_insn (gen_mf ());
7190 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
7191 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
7192 return ia64_expand_lock_test_and_set (mode
, arglist
, target
);
7194 case IA64_BUILTIN_LOCK_RELEASE_SI
:
7195 case IA64_BUILTIN_LOCK_RELEASE_DI
:
7196 return ia64_expand_lock_release (mode
, arglist
, target
);
7198 case IA64_BUILTIN_BSP
:
7199 if (! target
|| ! register_operand (target
, DImode
))
7200 target
= gen_reg_rtx (DImode
);
7201 emit_insn (gen_bsp_value (target
));
7204 case IA64_BUILTIN_FLUSHRS
:
7205 emit_insn (gen_flushrs ());
7208 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
7209 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
7210 return ia64_expand_fetch_and_op (add_optab
, mode
, arglist
, target
);
7212 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
7213 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
7214 return ia64_expand_fetch_and_op (sub_optab
, mode
, arglist
, target
);
7216 case IA64_BUILTIN_FETCH_AND_OR_SI
:
7217 case IA64_BUILTIN_FETCH_AND_OR_DI
:
7218 return ia64_expand_fetch_and_op (ior_optab
, mode
, arglist
, target
);
7220 case IA64_BUILTIN_FETCH_AND_AND_SI
:
7221 case IA64_BUILTIN_FETCH_AND_AND_DI
:
7222 return ia64_expand_fetch_and_op (and_optab
, mode
, arglist
, target
);
7224 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
7225 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
7226 return ia64_expand_fetch_and_op (xor_optab
, mode
, arglist
, target
);
7228 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
7229 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
7230 return ia64_expand_fetch_and_op (one_cmpl_optab
, mode
, arglist
, target
);
7232 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
7233 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
7234 return ia64_expand_op_and_fetch (add_optab
, mode
, arglist
, target
);
7236 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
7237 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
7238 return ia64_expand_op_and_fetch (sub_optab
, mode
, arglist
, target
);
7240 case IA64_BUILTIN_OR_AND_FETCH_SI
:
7241 case IA64_BUILTIN_OR_AND_FETCH_DI
:
7242 return ia64_expand_op_and_fetch (ior_optab
, mode
, arglist
, target
);
7244 case IA64_BUILTIN_AND_AND_FETCH_SI
:
7245 case IA64_BUILTIN_AND_AND_FETCH_DI
:
7246 return ia64_expand_op_and_fetch (and_optab
, mode
, arglist
, target
);
7248 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
7249 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
7250 return ia64_expand_op_and_fetch (xor_optab
, mode
, arglist
, target
);
7252 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
7253 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
7254 return ia64_expand_op_and_fetch (one_cmpl_optab
, mode
, arglist
, target
);