* alpha.h: NULL_PTR -> NULL.
[official-gcc.git] / gcc / config / ia64 / ia64.c
blob31907fad6503f60c1823f86fa48e7d427cac74b3
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-attr.h"
35 #include "flags.h"
36 #include "recog.h"
37 #include "expr.h"
38 #include "obstack.h"
39 #include "except.h"
40 #include "function.h"
41 #include "ggc.h"
42 #include "basic-block.h"
43 #include "toplev.h"
44 #include "sched-int.h"
46 /* This is used for communication between ASM_OUTPUT_LABEL and
47 ASM_OUTPUT_LABELREF. */
48 int ia64_asm_output_label = 0;
50 /* Define the information needed to generate branch and scc insns. This is
51 stored from the compare operation. */
52 struct rtx_def * ia64_compare_op0;
53 struct rtx_def * ia64_compare_op1;
55 /* Register names for ia64_expand_prologue. */
56 static const char * const ia64_reg_numbers[96] =
57 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
58 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
59 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
60 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
61 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
62 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
63 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
64 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
65 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
66 "r104","r105","r106","r107","r108","r109","r110","r111",
67 "r112","r113","r114","r115","r116","r117","r118","r119",
68 "r120","r121","r122","r123","r124","r125","r126","r127"};
70 /* ??? These strings could be shared with REGISTER_NAMES. */
71 static const char * const ia64_input_reg_names[8] =
72 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
74 /* ??? These strings could be shared with REGISTER_NAMES. */
75 static const char * const ia64_local_reg_names[80] =
76 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
77 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
78 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
79 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
80 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
81 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
82 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
83 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
84 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
85 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
87 /* ??? These strings could be shared with REGISTER_NAMES. */
88 static const char * const ia64_output_reg_names[8] =
89 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
91 /* String used with the -mfixed-range= option. */
92 const char *ia64_fixed_range_string;
94 /* Determines whether we run our final scheduling pass or not. We always
95 avoid the normal second scheduling pass. */
96 static int ia64_flag_schedule_insns2;
98 /* Variables which are this size or smaller are put in the sdata/sbss
99 sections. */
101 unsigned int ia64_section_threshold;
103 static int find_gr_spill PARAMS ((int));
104 static int next_scratch_gr_reg PARAMS ((void));
105 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
106 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
107 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
108 static void finish_spill_pointers PARAMS ((void));
109 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
110 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
111 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
112 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
113 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
114 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
116 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
117 static void fix_range PARAMS ((const char *));
118 static void ia64_add_gc_roots PARAMS ((void));
119 static void ia64_init_machine_status PARAMS ((struct function *));
120 static void ia64_mark_machine_status PARAMS ((struct function *));
121 static void ia64_free_machine_status PARAMS ((struct function *));
122 static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
123 static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
124 static void emit_predicate_relation_info PARAMS ((void));
125 static void process_epilogue PARAMS ((void));
126 static int process_set PARAMS ((FILE *, rtx));
128 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
129 tree, rtx));
130 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
131 tree, rtx));
132 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
133 tree, rtx));
134 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
135 tree, rtx));
136 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
138 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
141 call_operand (op, mode)
142 rtx op;
143 enum machine_mode mode;
145 if (mode != GET_MODE (op))
146 return 0;
148 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
149 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
152 /* Return 1 if OP refers to a symbol in the sdata section. */
155 sdata_symbolic_operand (op, mode)
156 rtx op;
157 enum machine_mode mode ATTRIBUTE_UNUSED;
159 switch (GET_CODE (op))
161 case CONST:
162 if (GET_CODE (XEXP (op, 0)) != PLUS
163 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
164 break;
165 op = XEXP (XEXP (op, 0), 0);
166 /* FALLTHRU */
168 case SYMBOL_REF:
169 if (CONSTANT_POOL_ADDRESS_P (op))
170 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
171 else
172 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
174 default:
175 break;
178 return 0;
181 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
184 got_symbolic_operand (op, mode)
185 rtx op;
186 enum machine_mode mode ATTRIBUTE_UNUSED;
188 switch (GET_CODE (op))
190 case CONST:
191 op = XEXP (op, 0);
192 if (GET_CODE (op) != PLUS)
193 return 0;
194 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
195 return 0;
196 op = XEXP (op, 1);
197 if (GET_CODE (op) != CONST_INT)
198 return 0;
200 return 1;
202 /* Ok if we're not using GOT entries at all. */
203 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
204 return 1;
206 /* "Ok" while emitting rtl, since otherwise we won't be provided
207 with the entire offset during emission, which makes it very
208 hard to split the offset into high and low parts. */
209 if (rtx_equal_function_value_matters)
210 return 1;
212 /* Force the low 14 bits of the constant to zero so that we do not
213 use up so many GOT entries. */
214 return (INTVAL (op) & 0x3fff) == 0;
216 case SYMBOL_REF:
217 case LABEL_REF:
218 return 1;
220 default:
221 break;
223 return 0;
226 /* Return 1 if OP refers to a symbol. */
229 symbolic_operand (op, mode)
230 rtx op;
231 enum machine_mode mode ATTRIBUTE_UNUSED;
233 switch (GET_CODE (op))
235 case CONST:
236 case SYMBOL_REF:
237 case LABEL_REF:
238 return 1;
240 default:
241 break;
243 return 0;
246 /* Return 1 if OP refers to a function. */
249 function_operand (op, mode)
250 rtx op;
251 enum machine_mode mode ATTRIBUTE_UNUSED;
253 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
254 return 1;
255 else
256 return 0;
259 /* Return 1 if OP is setjmp or a similar function. */
261 /* ??? This is an unsatisfying solution. Should rethink. */
264 setjmp_operand (op, mode)
265 rtx op;
266 enum machine_mode mode ATTRIBUTE_UNUSED;
268 const char *name;
269 int retval = 0;
271 if (GET_CODE (op) != SYMBOL_REF)
272 return 0;
274 name = XSTR (op, 0);
276 /* The following code is borrowed from special_function_p in calls.c. */
278 /* Disregard prefix _, __ or __x. */
279 if (name[0] == '_')
281 if (name[1] == '_' && name[2] == 'x')
282 name += 3;
283 else if (name[1] == '_')
284 name += 2;
285 else
286 name += 1;
289 if (name[0] == 's')
291 retval
292 = ((name[1] == 'e'
293 && (! strcmp (name, "setjmp")
294 || ! strcmp (name, "setjmp_syscall")))
295 || (name[1] == 'i'
296 && ! strcmp (name, "sigsetjmp"))
297 || (name[1] == 'a'
298 && ! strcmp (name, "savectx")));
300 else if ((name[0] == 'q' && name[1] == 's'
301 && ! strcmp (name, "qsetjmp"))
302 || (name[0] == 'v' && name[1] == 'f'
303 && ! strcmp (name, "vfork")))
304 retval = 1;
306 return retval;
309 /* Return 1 if OP is a general operand, but when pic exclude symbolic
310 operands. */
312 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
313 from PREDICATE_CODES. */
316 move_operand (op, mode)
317 rtx op;
318 enum machine_mode mode;
320 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
321 return 0;
323 return general_operand (op, mode);
326 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
329 gr_register_operand (op, mode)
330 rtx op;
331 enum machine_mode mode;
333 if (! register_operand (op, mode))
334 return 0;
335 if (GET_CODE (op) == SUBREG)
336 op = SUBREG_REG (op);
337 if (GET_CODE (op) == REG)
339 unsigned int regno = REGNO (op);
340 if (regno < FIRST_PSEUDO_REGISTER)
341 return GENERAL_REGNO_P (regno);
343 return 1;
346 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
349 fr_register_operand (op, mode)
350 rtx op;
351 enum machine_mode mode;
353 if (! register_operand (op, mode))
354 return 0;
355 if (GET_CODE (op) == SUBREG)
356 op = SUBREG_REG (op);
357 if (GET_CODE (op) == REG)
359 unsigned int regno = REGNO (op);
360 if (regno < FIRST_PSEUDO_REGISTER)
361 return FR_REGNO_P (regno);
363 return 1;
366 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
369 grfr_register_operand (op, mode)
370 rtx op;
371 enum machine_mode mode;
373 if (! register_operand (op, mode))
374 return 0;
375 if (GET_CODE (op) == SUBREG)
376 op = SUBREG_REG (op);
377 if (GET_CODE (op) == REG)
379 unsigned int regno = REGNO (op);
380 if (regno < FIRST_PSEUDO_REGISTER)
381 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
383 return 1;
386 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
389 gr_nonimmediate_operand (op, mode)
390 rtx op;
391 enum machine_mode mode;
393 if (! nonimmediate_operand (op, mode))
394 return 0;
395 if (GET_CODE (op) == SUBREG)
396 op = SUBREG_REG (op);
397 if (GET_CODE (op) == REG)
399 unsigned int regno = REGNO (op);
400 if (regno < FIRST_PSEUDO_REGISTER)
401 return GENERAL_REGNO_P (regno);
403 return 1;
406 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
409 fr_nonimmediate_operand (op, mode)
410 rtx op;
411 enum machine_mode mode;
413 if (! nonimmediate_operand (op, mode))
414 return 0;
415 if (GET_CODE (op) == SUBREG)
416 op = SUBREG_REG (op);
417 if (GET_CODE (op) == REG)
419 unsigned int regno = REGNO (op);
420 if (regno < FIRST_PSEUDO_REGISTER)
421 return FR_REGNO_P (regno);
423 return 1;
426 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
429 grfr_nonimmediate_operand (op, mode)
430 rtx op;
431 enum machine_mode mode;
433 if (! nonimmediate_operand (op, mode))
434 return 0;
435 if (GET_CODE (op) == SUBREG)
436 op = SUBREG_REG (op);
437 if (GET_CODE (op) == REG)
439 unsigned int regno = REGNO (op);
440 if (regno < FIRST_PSEUDO_REGISTER)
441 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
443 return 1;
446 /* Return 1 if OP is a GR register operand, or zero. */
449 gr_reg_or_0_operand (op, mode)
450 rtx op;
451 enum machine_mode mode;
453 return (op == const0_rtx || gr_register_operand (op, mode));
456 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
459 gr_reg_or_5bit_operand (op, mode)
460 rtx op;
461 enum machine_mode mode;
463 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
464 || GET_CODE (op) == CONSTANT_P_RTX
465 || gr_register_operand (op, mode));
468 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
471 gr_reg_or_6bit_operand (op, mode)
472 rtx op;
473 enum machine_mode mode;
475 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
476 || GET_CODE (op) == CONSTANT_P_RTX
477 || gr_register_operand (op, mode));
480 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
483 gr_reg_or_8bit_operand (op, mode)
484 rtx op;
485 enum machine_mode mode;
487 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
488 || GET_CODE (op) == CONSTANT_P_RTX
489 || gr_register_operand (op, mode));
492 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
495 grfr_reg_or_8bit_operand (op, mode)
496 rtx op;
497 enum machine_mode mode;
499 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
500 || GET_CODE (op) == CONSTANT_P_RTX
501 || grfr_register_operand (op, mode));
504 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
505 operand. */
508 gr_reg_or_8bit_adjusted_operand (op, mode)
509 rtx op;
510 enum machine_mode mode;
512 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
513 || GET_CODE (op) == CONSTANT_P_RTX
514 || gr_register_operand (op, mode));
517 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
518 immediate and an 8 bit adjusted immediate operand. This is necessary
519 because when we emit a compare, we don't know what the condition will be,
520 so we need the union of the immediates accepted by GT and LT. */
523 gr_reg_or_8bit_and_adjusted_operand (op, mode)
524 rtx op;
525 enum machine_mode mode;
527 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
528 && CONST_OK_FOR_L (INTVAL (op)))
529 || GET_CODE (op) == CONSTANT_P_RTX
530 || gr_register_operand (op, mode));
533 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
536 gr_reg_or_14bit_operand (op, mode)
537 rtx op;
538 enum machine_mode mode;
540 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
541 || GET_CODE (op) == CONSTANT_P_RTX
542 || gr_register_operand (op, mode));
545 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
548 gr_reg_or_22bit_operand (op, mode)
549 rtx op;
550 enum machine_mode mode;
552 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
553 || GET_CODE (op) == CONSTANT_P_RTX
554 || gr_register_operand (op, mode));
557 /* Return 1 if OP is a 6 bit immediate operand. */
560 shift_count_operand (op, mode)
561 rtx op;
562 enum machine_mode mode ATTRIBUTE_UNUSED;
564 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
565 || GET_CODE (op) == CONSTANT_P_RTX);
568 /* Return 1 if OP is a 5 bit immediate operand. */
571 shift_32bit_count_operand (op, mode)
572 rtx op;
573 enum machine_mode mode ATTRIBUTE_UNUSED;
575 return ((GET_CODE (op) == CONST_INT
576 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
577 || GET_CODE (op) == CONSTANT_P_RTX);
580 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
583 shladd_operand (op, mode)
584 rtx op;
585 enum machine_mode mode ATTRIBUTE_UNUSED;
587 return (GET_CODE (op) == CONST_INT
588 && (INTVAL (op) == 2 || INTVAL (op) == 4
589 || INTVAL (op) == 8 || INTVAL (op) == 16));
592 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
595 fetchadd_operand (op, mode)
596 rtx op;
597 enum machine_mode mode ATTRIBUTE_UNUSED;
599 return (GET_CODE (op) == CONST_INT
600 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
601 INTVAL (op) == -4 || INTVAL (op) == -1 ||
602 INTVAL (op) == 1 || INTVAL (op) == 4 ||
603 INTVAL (op) == 8 || INTVAL (op) == 16));
606 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
609 fr_reg_or_fp01_operand (op, mode)
610 rtx op;
611 enum machine_mode mode;
613 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
614 || fr_register_operand (op, mode));
617 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
618 POST_MODIFY with a REG as displacement. */
621 destination_operand (op, mode)
622 rtx op;
623 enum machine_mode mode;
625 if (! nonimmediate_operand (op, mode))
626 return 0;
627 if (GET_CODE (op) == MEM
628 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
629 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
630 return 0;
631 return 1;
634 /* Like memory_operand, but don't allow post-increments. */
637 not_postinc_memory_operand (op, mode)
638 rtx op;
639 enum machine_mode mode;
641 return (memory_operand (op, mode)
642 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
645 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
646 signed immediate operand. */
649 normal_comparison_operator (op, mode)
650 register rtx op;
651 enum machine_mode mode;
653 enum rtx_code code = GET_CODE (op);
654 return ((mode == VOIDmode || GET_MODE (op) == mode)
655 && (code == EQ || code == NE
656 || code == GT || code == LE || code == GTU || code == LEU));
659 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
660 signed immediate operand. */
663 adjusted_comparison_operator (op, mode)
664 register rtx op;
665 enum machine_mode mode;
667 enum rtx_code code = GET_CODE (op);
668 return ((mode == VOIDmode || GET_MODE (op) == mode)
669 && (code == LT || code == GE || code == LTU || code == GEU));
672 /* Return 1 if this is a signed inequality operator. */
675 signed_inequality_operator (op, mode)
676 register rtx op;
677 enum machine_mode mode;
679 enum rtx_code code = GET_CODE (op);
680 return ((mode == VOIDmode || GET_MODE (op) == mode)
681 && (code == GE || code == GT
682 || code == LE || code == LT));
685 /* Return 1 if this operator is valid for predication. */
688 predicate_operator (op, mode)
689 register rtx op;
690 enum machine_mode mode;
692 enum rtx_code code = GET_CODE (op);
693 return ((GET_MODE (op) == mode || mode == VOIDmode)
694 && (code == EQ || code == NE));
697 /* Return 1 if this is the ar.lc register. */
700 ar_lc_reg_operand (op, mode)
701 register rtx op;
702 enum machine_mode mode;
704 return (GET_MODE (op) == DImode
705 && (mode == DImode || mode == VOIDmode)
706 && GET_CODE (op) == REG
707 && REGNO (op) == AR_LC_REGNUM);
710 /* Return 1 if this is the ar.ccv register. */
713 ar_ccv_reg_operand (op, mode)
714 register rtx op;
715 enum machine_mode mode;
717 return ((GET_MODE (op) == mode || mode == VOIDmode)
718 && GET_CODE (op) == REG
719 && REGNO (op) == AR_CCV_REGNUM);
722 /* Like general_operand, but don't allow (mem (addressof)). */
725 general_tfmode_operand (op, mode)
726 rtx op;
727 enum machine_mode mode;
729 if (! general_operand (op, mode))
730 return 0;
731 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
732 return 0;
733 return 1;
736 /* Similarly. */
739 destination_tfmode_operand (op, mode)
740 rtx op;
741 enum machine_mode mode;
743 if (! destination_operand (op, mode))
744 return 0;
745 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
746 return 0;
747 return 1;
750 /* Similarly. */
753 tfreg_or_fp01_operand (op, mode)
754 rtx op;
755 enum machine_mode mode;
757 if (GET_CODE (op) == SUBREG)
758 return 0;
759 return fr_reg_or_fp01_operand (op, mode);
762 /* Return 1 if the operands of a move are ok. */
765 ia64_move_ok (dst, src)
766 rtx dst, src;
768 /* If we're under init_recog_no_volatile, we'll not be able to use
769 memory_operand. So check the code directly and don't worry about
770 the validity of the underlying address, which should have been
771 checked elsewhere anyway. */
772 if (GET_CODE (dst) != MEM)
773 return 1;
774 if (GET_CODE (src) == MEM)
775 return 0;
776 if (register_operand (src, VOIDmode))
777 return 1;
779 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
780 if (INTEGRAL_MODE_P (GET_MODE (dst)))
781 return src == const0_rtx;
782 else
783 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
786 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
787 Return the length of the field, or <= 0 on failure. */
790 ia64_depz_field_mask (rop, rshift)
791 rtx rop, rshift;
793 unsigned HOST_WIDE_INT op = INTVAL (rop);
794 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
796 /* Get rid of the zero bits we're shifting in. */
797 op >>= shift;
799 /* We must now have a solid block of 1's at bit 0. */
800 return exact_log2 (op + 1);
803 /* Expand a symbolic constant load. */
804 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
806 void
807 ia64_expand_load_address (dest, src, scratch)
808 rtx dest, src, scratch;
810 rtx temp;
812 /* The destination could be a MEM during initial rtl generation,
813 which isn't a valid destination for the PIC load address patterns. */
814 if (! register_operand (dest, DImode))
815 temp = gen_reg_rtx (DImode);
816 else
817 temp = dest;
819 if (TARGET_AUTO_PIC)
820 emit_insn (gen_load_gprel64 (temp, src));
821 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
822 emit_insn (gen_load_fptr (temp, src));
823 else if (sdata_symbolic_operand (src, DImode))
824 emit_insn (gen_load_gprel (temp, src));
825 else if (GET_CODE (src) == CONST
826 && GET_CODE (XEXP (src, 0)) == PLUS
827 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
828 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
830 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
831 rtx sym = XEXP (XEXP (src, 0), 0);
832 HOST_WIDE_INT ofs, hi, lo;
834 /* Split the offset into a sign extended 14-bit low part
835 and a complementary high part. */
836 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
837 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
838 hi = ofs - lo;
840 if (! scratch)
841 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
843 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
844 scratch));
845 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
847 else
849 rtx insn;
850 if (! scratch)
851 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
853 insn = emit_insn (gen_load_symptr (temp, src, scratch));
854 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
857 if (temp != dest)
858 emit_move_insn (dest, temp);
862 ia64_gp_save_reg (setjmp_p)
863 int setjmp_p;
865 rtx save = cfun->machine->ia64_gp_save;
867 if (save != NULL)
869 /* We can't save GP in a pseudo if we are calling setjmp, because
870 pseudos won't be restored by longjmp. For now, we save it in r4. */
871 /* ??? It would be more efficient to save this directly into a stack
872 slot. Unfortunately, the stack slot address gets cse'd across
873 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
874 place. */
876 /* ??? Get the barf bag, Virginia. We've got to replace this thing
877 in place, since this rtx is used in exception handling receivers.
878 Moreover, we must get this rtx out of regno_reg_rtx or reload
879 will do the wrong thing. */
880 unsigned int old_regno = REGNO (save);
881 if (setjmp_p && old_regno != GR_REG (4))
883 REGNO (save) = GR_REG (4);
884 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
887 else
889 if (setjmp_p)
890 save = gen_rtx_REG (DImode, GR_REG (4));
891 else if (! optimize)
892 save = gen_rtx_REG (DImode, LOC_REG (0));
893 else
894 save = gen_reg_rtx (DImode);
895 cfun->machine->ia64_gp_save = save;
898 return save;
901 /* Split a post-reload TImode reference into two DImode components. */
904 ia64_split_timode (out, in, scratch)
905 rtx out[2];
906 rtx in, scratch;
908 switch (GET_CODE (in))
910 case REG:
911 out[0] = gen_rtx_REG (DImode, REGNO (in));
912 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
913 return NULL_RTX;
915 case MEM:
917 rtx base = XEXP (in, 0);
919 switch (GET_CODE (base))
921 case REG:
922 out[0] = change_address (in, DImode, NULL_RTX);
923 break;
924 case POST_MODIFY:
925 base = XEXP (base, 0);
926 out[0] = change_address (in, DImode, NULL_RTX);
927 break;
929 /* Since we're changing the mode, we need to change to POST_MODIFY
930 as well to preserve the size of the increment. Either that or
931 do the update in two steps, but we've already got this scratch
932 register handy so let's use it. */
933 case POST_INC:
934 base = XEXP (base, 0);
935 out[0] = change_address (in, DImode,
936 gen_rtx_POST_MODIFY (Pmode, base,plus_constant (base, 16)));
937 break;
938 case POST_DEC:
939 base = XEXP (base, 0);
940 out[0] = change_address (in, DImode,
941 gen_rtx_POST_MODIFY (Pmode, base,plus_constant (base, -16)));
942 break;
943 default:
944 abort ();
947 if (scratch == NULL_RTX)
948 abort ();
949 out[1] = change_address (in, DImode, scratch);
950 return gen_adddi3 (scratch, base, GEN_INT (8));
953 case CONST_INT:
954 case CONST_DOUBLE:
955 split_double (in, &out[0], &out[1]);
956 return NULL_RTX;
958 default:
959 abort ();
963 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
964 through memory plus an extra GR scratch register. Except that you can
965 either get the first from SECONDARY_MEMORY_NEEDED or the second from
966 SECONDARY_RELOAD_CLASS, but not both.
968 We got into problems in the first place by allowing a construct like
969 (subreg:TF (reg:TI)), which we got from a union containing a long double.
970 This solution attempts to prevent this situation from ocurring. When
971 we see something like the above, we spill the inner register to memory. */
974 spill_tfmode_operand (in, force)
975 rtx in;
976 int force;
978 if (GET_CODE (in) == SUBREG
979 && GET_MODE (SUBREG_REG (in)) == TImode
980 && GET_CODE (SUBREG_REG (in)) == REG)
982 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
983 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
985 else if (force && GET_CODE (in) == REG)
987 rtx mem = gen_mem_addressof (in, NULL_TREE);
988 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
990 else if (GET_CODE (in) == MEM
991 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
993 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
995 else
996 return in;
999 /* Emit comparison instruction if necessary, returning the expression
1000 that holds the compare result in the proper mode. */
1003 ia64_expand_compare (code, mode)
1004 enum rtx_code code;
1005 enum machine_mode mode;
1007 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1008 rtx cmp;
1010 /* If we have a BImode input, then we already have a compare result, and
1011 do not need to emit another comparison. */
1012 if (GET_MODE (op0) == BImode)
1014 if ((code == NE || code == EQ) && op1 == const0_rtx)
1015 cmp = op0;
1016 else
1017 abort ();
1019 else
1021 cmp = gen_reg_rtx (BImode);
1022 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1023 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1024 code = NE;
1027 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1030 /* Emit the appropriate sequence for a call. */
1032 void
1033 ia64_expand_call (retval, addr, nextarg, sibcall_p)
1034 rtx retval;
1035 rtx addr;
1036 rtx nextarg;
1037 int sibcall_p;
1039 rtx insn, b0, gp_save, narg_rtx;
1040 int narg;
1042 addr = XEXP (addr, 0);
1043 b0 = gen_rtx_REG (DImode, R_BR (0));
1045 if (! nextarg)
1046 narg = 0;
1047 else if (IN_REGNO_P (REGNO (nextarg)))
1048 narg = REGNO (nextarg) - IN_REG (0);
1049 else
1050 narg = REGNO (nextarg) - OUT_REG (0);
1051 narg_rtx = GEN_INT (narg);
1053 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1055 if (sibcall_p)
1056 insn = gen_sibcall_nopic (addr, narg_rtx, b0);
1057 else if (! retval)
1058 insn = gen_call_nopic (addr, narg_rtx, b0);
1059 else
1060 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1061 emit_call_insn (insn);
1062 return;
1065 if (sibcall_p)
1066 gp_save = NULL_RTX;
1067 else
1068 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1070 /* If this is an indirect call, then we have the address of a descriptor. */
1071 if (! symbolic_operand (addr, VOIDmode))
1073 rtx dest;
1075 if (! sibcall_p)
1076 emit_move_insn (gp_save, pic_offset_table_rtx);
1078 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1079 emit_move_insn (pic_offset_table_rtx,
1080 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
1082 if (sibcall_p)
1083 insn = gen_sibcall_pic (dest, narg_rtx, b0);
1084 else if (! retval)
1085 insn = gen_call_pic (dest, narg_rtx, b0);
1086 else
1087 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1088 emit_call_insn (insn);
1090 if (! sibcall_p)
1091 emit_move_insn (pic_offset_table_rtx, gp_save);
1093 else if (TARGET_CONST_GP)
1095 if (sibcall_p)
1096 insn = gen_sibcall_nopic (addr, narg_rtx, b0);
1097 else if (! retval)
1098 insn = gen_call_nopic (addr, narg_rtx, b0);
1099 else
1100 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1101 emit_call_insn (insn);
1103 else
1105 if (sibcall_p)
1106 emit_call_insn (gen_sibcall_pic (addr, narg_rtx, b0));
1107 else
1109 emit_move_insn (gp_save, pic_offset_table_rtx);
1111 if (! retval)
1112 insn = gen_call_pic (addr, narg_rtx, b0);
1113 else
1114 insn = gen_call_value_pic (retval, addr, narg_rtx, b0);
1115 emit_call_insn (insn);
1117 emit_move_insn (pic_offset_table_rtx, gp_save);
1122 /* Begin the assembly file. */
1124 void
1125 emit_safe_across_calls (f)
1126 FILE *f;
1128 unsigned int rs, re;
1129 int out_state;
1131 rs = 1;
1132 out_state = 0;
1133 while (1)
1135 while (rs < 64 && call_used_regs[PR_REG (rs)])
1136 rs++;
1137 if (rs >= 64)
1138 break;
1139 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1140 continue;
1141 if (out_state == 0)
1143 fputs ("\t.pred.safe_across_calls ", f);
1144 out_state = 1;
1146 else
1147 fputc (',', f);
1148 if (re == rs + 1)
1149 fprintf (f, "p%u", rs);
1150 else
1151 fprintf (f, "p%u-p%u", rs, re - 1);
1152 rs = re + 1;
1154 if (out_state)
1155 fputc ('\n', f);
1159 /* Structure to be filled in by ia64_compute_frame_size with register
1160 save masks and offsets for the current function. */
1162 struct ia64_frame_info
1164 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1165 the caller's scratch area. */
1166 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1167 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1168 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
1169 HARD_REG_SET mask; /* mask of saved registers. */
1170 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1171 registers or long-term scratches. */
1172 int n_spilled; /* number of spilled registers. */
1173 int reg_fp; /* register for fp. */
1174 int reg_save_b0; /* save register for b0. */
1175 int reg_save_pr; /* save register for prs. */
1176 int reg_save_ar_pfs; /* save register for ar.pfs. */
1177 int reg_save_ar_unat; /* save register for ar.unat. */
1178 int reg_save_ar_lc; /* save register for ar.lc. */
1179 int n_input_regs; /* number of input registers used. */
1180 int n_local_regs; /* number of local registers used. */
1181 int n_output_regs; /* number of output registers used. */
1182 int n_rotate_regs; /* number of rotating registers used. */
1184 char need_regstk; /* true if a .regstk directive needed. */
1185 char initialized; /* true if the data is finalized. */
1188 /* Current frame information calculated by ia64_compute_frame_size. */
1189 static struct ia64_frame_info current_frame_info;
1191 /* Helper function for ia64_compute_frame_size: find an appropriate general
1192 register to spill some special register to. SPECIAL_SPILL_MASK contains
1193 bits in GR0 to GR31 that have already been allocated by this routine.
1194 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1196 static int
1197 find_gr_spill (try_locals)
1198 int try_locals;
1200 int regno;
1202 /* If this is a leaf function, first try an otherwise unused
1203 call-clobbered register. */
1204 if (current_function_is_leaf)
1206 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1207 if (! regs_ever_live[regno]
1208 && call_used_regs[regno]
1209 && ! fixed_regs[regno]
1210 && ! global_regs[regno]
1211 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1213 current_frame_info.gr_used_mask |= 1 << regno;
1214 return regno;
1218 if (try_locals)
1220 regno = current_frame_info.n_local_regs;
1221 /* If there is a frame pointer, then we can't use loc79, because
1222 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1223 reg_name switching code in ia64_expand_prologue. */
1224 if (regno < (80 - frame_pointer_needed))
1226 current_frame_info.n_local_regs = regno + 1;
1227 return LOC_REG (0) + regno;
1231 /* Failed to find a general register to spill to. Must use stack. */
1232 return 0;
1235 /* In order to make for nice schedules, we try to allocate every temporary
1236 to a different register. We must of course stay away from call-saved,
1237 fixed, and global registers. We must also stay away from registers
1238 allocated in current_frame_info.gr_used_mask, since those include regs
1239 used all through the prologue.
1241 Any register allocated here must be used immediately. The idea is to
1242 aid scheduling, not to solve data flow problems. */
1244 static int last_scratch_gr_reg;
1246 static int
1247 next_scratch_gr_reg ()
1249 int i, regno;
1251 for (i = 0; i < 32; ++i)
1253 regno = (last_scratch_gr_reg + i + 1) & 31;
1254 if (call_used_regs[regno]
1255 && ! fixed_regs[regno]
1256 && ! global_regs[regno]
1257 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1259 last_scratch_gr_reg = regno;
1260 return regno;
1264 /* There must be _something_ available. */
1265 abort ();
1268 /* Helper function for ia64_compute_frame_size, called through
1269 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1271 static void
1272 mark_reg_gr_used_mask (reg, data)
1273 rtx reg;
1274 void *data ATTRIBUTE_UNUSED;
1276 unsigned int regno = REGNO (reg);
1277 if (regno < 32)
1278 current_frame_info.gr_used_mask |= 1 << regno;
1281 /* Returns the number of bytes offset between the frame pointer and the stack
1282 pointer for the current function. SIZE is the number of bytes of space
1283 needed for local variables. */
1285 static void
1286 ia64_compute_frame_size (size)
1287 HOST_WIDE_INT size;
1289 HOST_WIDE_INT total_size;
1290 HOST_WIDE_INT spill_size = 0;
1291 HOST_WIDE_INT extra_spill_size = 0;
1292 HOST_WIDE_INT pretend_args_size;
1293 HARD_REG_SET mask;
1294 int n_spilled = 0;
1295 int spilled_gr_p = 0;
1296 int spilled_fr_p = 0;
1297 unsigned int regno;
1298 int i;
1300 if (current_frame_info.initialized)
1301 return;
1303 memset (&current_frame_info, 0, sizeof current_frame_info);
1304 CLEAR_HARD_REG_SET (mask);
1306 /* Don't allocate scratches to the return register. */
1307 diddle_return_value (mark_reg_gr_used_mask, NULL);
1309 /* Don't allocate scratches to the EH scratch registers. */
1310 if (cfun->machine->ia64_eh_epilogue_sp)
1311 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1312 if (cfun->machine->ia64_eh_epilogue_bsp)
1313 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1315 /* Find the size of the register stack frame. We have only 80 local
1316 registers, because we reserve 8 for the inputs and 8 for the
1317 outputs. */
1319 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1320 since we'll be adjusting that down later. */
1321 regno = LOC_REG (78) + ! frame_pointer_needed;
1322 for (; regno >= LOC_REG (0); regno--)
1323 if (regs_ever_live[regno])
1324 break;
1325 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1327 /* For functions marked with the syscall_linkage attribute, we must mark
1328 all eight input registers as in use, so that locals aren't visible to
1329 the caller. */
1331 if (cfun->machine->n_varargs > 0
1332 || lookup_attribute ("syscall_linkage",
1333 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1334 current_frame_info.n_input_regs = 8;
1335 else
1337 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1338 if (regs_ever_live[regno])
1339 break;
1340 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1343 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1344 if (regs_ever_live[regno])
1345 break;
1346 i = regno - OUT_REG (0) + 1;
1348 /* When -p profiling, we need one output register for the mcount argument.
1349 Likwise for -a profiling for the bb_init_func argument. For -ax
1350 profiling, we need two output registers for the two bb_init_trace_func
1351 arguments. */
1352 if (profile_flag || profile_block_flag == 1)
1353 i = MAX (i, 1);
1354 else if (profile_block_flag == 2)
1355 i = MAX (i, 2);
1356 current_frame_info.n_output_regs = i;
1358 /* ??? No rotating register support yet. */
1359 current_frame_info.n_rotate_regs = 0;
1361 /* Discover which registers need spilling, and how much room that
1362 will take. Begin with floating point and general registers,
1363 which will always wind up on the stack. */
1365 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1366 if (regs_ever_live[regno] && ! call_used_regs[regno])
1368 SET_HARD_REG_BIT (mask, regno);
1369 spill_size += 16;
1370 n_spilled += 1;
1371 spilled_fr_p = 1;
1374 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1375 if (regs_ever_live[regno] && ! call_used_regs[regno])
1377 SET_HARD_REG_BIT (mask, regno);
1378 spill_size += 8;
1379 n_spilled += 1;
1380 spilled_gr_p = 1;
1383 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1384 if (regs_ever_live[regno] && ! call_used_regs[regno])
1386 SET_HARD_REG_BIT (mask, regno);
1387 spill_size += 8;
1388 n_spilled += 1;
1391 /* Now come all special registers that might get saved in other
1392 general registers. */
1394 if (frame_pointer_needed)
1396 current_frame_info.reg_fp = find_gr_spill (1);
1397 /* If we did not get a register, then we take LOC79. This is guaranteed
1398 to be free, even if regs_ever_live is already set, because this is
1399 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1400 as we don't count loc79 above. */
1401 if (current_frame_info.reg_fp == 0)
1403 current_frame_info.reg_fp = LOC_REG (79);
1404 current_frame_info.n_local_regs++;
1408 if (! current_function_is_leaf)
1410 /* Emit a save of BR0 if we call other functions. Do this even
1411 if this function doesn't return, as EH depends on this to be
1412 able to unwind the stack. */
1413 SET_HARD_REG_BIT (mask, BR_REG (0));
1415 current_frame_info.reg_save_b0 = find_gr_spill (1);
1416 if (current_frame_info.reg_save_b0 == 0)
1418 spill_size += 8;
1419 n_spilled += 1;
1422 /* Similarly for ar.pfs. */
1423 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1424 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1425 if (current_frame_info.reg_save_ar_pfs == 0)
1427 extra_spill_size += 8;
1428 n_spilled += 1;
1431 else
1433 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1435 SET_HARD_REG_BIT (mask, BR_REG (0));
1436 spill_size += 8;
1437 n_spilled += 1;
1441 /* Unwind descriptor hackery: things are most efficient if we allocate
1442 consecutive GR save registers for RP, PFS, FP in that order. However,
1443 it is absolutely critical that FP get the only hard register that's
1444 guaranteed to be free, so we allocated it first. If all three did
1445 happen to be allocated hard regs, and are consecutive, rearrange them
1446 into the preferred order now. */
1447 if (current_frame_info.reg_fp != 0
1448 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1449 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1451 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1452 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1453 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1456 /* See if we need to store the predicate register block. */
1457 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1458 if (regs_ever_live[regno] && ! call_used_regs[regno])
1459 break;
1460 if (regno <= PR_REG (63))
1462 SET_HARD_REG_BIT (mask, PR_REG (0));
1463 current_frame_info.reg_save_pr = find_gr_spill (1);
1464 if (current_frame_info.reg_save_pr == 0)
1466 extra_spill_size += 8;
1467 n_spilled += 1;
1470 /* ??? Mark them all as used so that register renaming and such
1471 are free to use them. */
1472 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1473 regs_ever_live[regno] = 1;
1476 /* If we're forced to use st8.spill, we're forced to save and restore
1477 ar.unat as well. */
1478 if (spilled_gr_p || cfun->machine->n_varargs)
1480 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1481 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1482 if (current_frame_info.reg_save_ar_unat == 0)
1484 extra_spill_size += 8;
1485 n_spilled += 1;
1489 if (regs_ever_live[AR_LC_REGNUM])
1491 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1492 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1493 if (current_frame_info.reg_save_ar_lc == 0)
1495 extra_spill_size += 8;
1496 n_spilled += 1;
1500 /* If we have an odd number of words of pretend arguments written to
1501 the stack, then the FR save area will be unaligned. We round the
1502 size of this area up to keep things 16 byte aligned. */
1503 if (spilled_fr_p)
1504 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1505 else
1506 pretend_args_size = current_function_pretend_args_size;
1508 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1509 + current_function_outgoing_args_size);
1510 total_size = IA64_STACK_ALIGN (total_size);
1512 /* We always use the 16-byte scratch area provided by the caller, but
1513 if we are a leaf function, there's no one to which we need to provide
1514 a scratch area. */
1515 if (current_function_is_leaf)
1516 total_size = MAX (0, total_size - 16);
1518 current_frame_info.total_size = total_size;
1519 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1520 current_frame_info.spill_size = spill_size;
1521 current_frame_info.extra_spill_size = extra_spill_size;
1522 COPY_HARD_REG_SET (current_frame_info.mask, mask);
1523 current_frame_info.n_spilled = n_spilled;
1524 current_frame_info.initialized = reload_completed;
1527 /* Compute the initial difference between the specified pair of registers. */
1529 HOST_WIDE_INT
1530 ia64_initial_elimination_offset (from, to)
1531 int from, to;
1533 HOST_WIDE_INT offset;
1535 ia64_compute_frame_size (get_frame_size ());
1536 switch (from)
1538 case FRAME_POINTER_REGNUM:
1539 if (to == HARD_FRAME_POINTER_REGNUM)
1541 if (current_function_is_leaf)
1542 offset = -current_frame_info.total_size;
1543 else
1544 offset = -(current_frame_info.total_size
1545 - current_function_outgoing_args_size - 16);
1547 else if (to == STACK_POINTER_REGNUM)
1549 if (current_function_is_leaf)
1550 offset = 0;
1551 else
1552 offset = 16 + current_function_outgoing_args_size;
1554 else
1555 abort ();
1556 break;
1558 case ARG_POINTER_REGNUM:
1559 /* Arguments start above the 16 byte save area, unless stdarg
1560 in which case we store through the 16 byte save area. */
1561 if (to == HARD_FRAME_POINTER_REGNUM)
1562 offset = 16 - current_function_pretend_args_size;
1563 else if (to == STACK_POINTER_REGNUM)
1564 offset = (current_frame_info.total_size
1565 + 16 - current_function_pretend_args_size);
1566 else
1567 abort ();
1568 break;
1570 case RETURN_ADDRESS_POINTER_REGNUM:
1571 offset = 0;
1572 break;
1574 default:
1575 abort ();
1578 return offset;
1581 /* If there are more than a trivial number of register spills, we use
1582 two interleaved iterators so that we can get two memory references
1583 per insn group.
1585 In order to simplify things in the prologue and epilogue expanders,
1586 we use helper functions to fix up the memory references after the
1587 fact with the appropriate offsets to a POST_MODIFY memory mode.
1588 The following data structure tracks the state of the two iterators
1589 while insns are being emitted. */
1591 struct spill_fill_data
1593 rtx init_after; /* point at which to emit intializations */
1594 rtx init_reg[2]; /* initial base register */
1595 rtx iter_reg[2]; /* the iterator registers */
1596 rtx *prev_addr[2]; /* address of last memory use */
1597 HOST_WIDE_INT prev_off[2]; /* last offset */
1598 int n_iter; /* number of iterators in use */
1599 int next_iter; /* next iterator to use */
1600 unsigned int save_gr_used_mask;
1603 static struct spill_fill_data spill_fill_data;
1605 static void
1606 setup_spill_pointers (n_spills, init_reg, cfa_off)
1607 int n_spills;
1608 rtx init_reg;
1609 HOST_WIDE_INT cfa_off;
1611 int i;
1613 spill_fill_data.init_after = get_last_insn ();
1614 spill_fill_data.init_reg[0] = init_reg;
1615 spill_fill_data.init_reg[1] = init_reg;
1616 spill_fill_data.prev_addr[0] = NULL;
1617 spill_fill_data.prev_addr[1] = NULL;
1618 spill_fill_data.prev_off[0] = cfa_off;
1619 spill_fill_data.prev_off[1] = cfa_off;
1620 spill_fill_data.next_iter = 0;
1621 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1623 spill_fill_data.n_iter = 1 + (n_spills > 2);
1624 for (i = 0; i < spill_fill_data.n_iter; ++i)
1626 int regno = next_scratch_gr_reg ();
1627 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1628 current_frame_info.gr_used_mask |= 1 << regno;
1632 static void
1633 finish_spill_pointers ()
1635 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1638 static rtx
1639 spill_restore_mem (reg, cfa_off)
1640 rtx reg;
1641 HOST_WIDE_INT cfa_off;
1643 int iter = spill_fill_data.next_iter;
1644 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1645 rtx disp_rtx = GEN_INT (disp);
1646 rtx mem;
1648 if (spill_fill_data.prev_addr[iter])
1650 if (CONST_OK_FOR_N (disp))
1651 *spill_fill_data.prev_addr[iter]
1652 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1653 gen_rtx_PLUS (DImode,
1654 spill_fill_data.iter_reg[iter],
1655 disp_rtx));
1656 else
1658 /* ??? Could use register post_modify for loads. */
1659 if (! CONST_OK_FOR_I (disp))
1661 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1662 emit_move_insn (tmp, disp_rtx);
1663 disp_rtx = tmp;
1665 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1666 spill_fill_data.iter_reg[iter], disp_rtx));
1669 /* Micro-optimization: if we've created a frame pointer, it's at
1670 CFA 0, which may allow the real iterator to be initialized lower,
1671 slightly increasing parallelism. Also, if there are few saves
1672 it may eliminate the iterator entirely. */
1673 else if (disp == 0
1674 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1675 && frame_pointer_needed)
1677 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1678 MEM_ALIAS_SET (mem) = get_varargs_alias_set ();
1679 return mem;
1681 else
1683 rtx seq;
1685 if (disp == 0)
1686 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1687 spill_fill_data.init_reg[iter]);
1688 else
1690 start_sequence ();
1692 if (! CONST_OK_FOR_I (disp))
1694 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1695 emit_move_insn (tmp, disp_rtx);
1696 disp_rtx = tmp;
1699 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1700 spill_fill_data.init_reg[iter],
1701 disp_rtx));
1703 seq = gen_sequence ();
1704 end_sequence ();
1707 /* Careful for being the first insn in a sequence. */
1708 if (spill_fill_data.init_after)
1709 spill_fill_data.init_after
1710 = emit_insn_after (seq, spill_fill_data.init_after);
1711 else
1713 rtx first = get_insns ();
1714 if (first)
1715 spill_fill_data.init_after
1716 = emit_insn_before (seq, first);
1717 else
1718 spill_fill_data.init_after = emit_insn (seq);
1722 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
1724 /* ??? Not all of the spills are for varargs, but some of them are.
1725 The rest of the spills belong in an alias set of their own. But
1726 it doesn't actually hurt to include them here. */
1727 MEM_ALIAS_SET (mem) = get_varargs_alias_set ();
1729 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1730 spill_fill_data.prev_off[iter] = cfa_off;
1732 if (++iter >= spill_fill_data.n_iter)
1733 iter = 0;
1734 spill_fill_data.next_iter = iter;
1736 return mem;
1739 static void
1740 do_spill (move_fn, reg, cfa_off, frame_reg)
1741 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1742 rtx reg, frame_reg;
1743 HOST_WIDE_INT cfa_off;
1745 rtx mem, insn;
1747 mem = spill_restore_mem (reg, cfa_off);
1748 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
1750 if (frame_reg)
1752 rtx base;
1753 HOST_WIDE_INT off;
1755 RTX_FRAME_RELATED_P (insn) = 1;
1757 /* Don't even pretend that the unwind code can intuit its way
1758 through a pair of interleaved post_modify iterators. Just
1759 provide the correct answer. */
1761 if (frame_pointer_needed)
1763 base = hard_frame_pointer_rtx;
1764 off = - cfa_off;
1766 else
1768 base = stack_pointer_rtx;
1769 off = current_frame_info.total_size - cfa_off;
1772 REG_NOTES (insn)
1773 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1774 gen_rtx_SET (VOIDmode,
1775 gen_rtx_MEM (GET_MODE (reg),
1776 plus_constant (base, off)),
1777 frame_reg),
1778 REG_NOTES (insn));
1782 static void
1783 do_restore (move_fn, reg, cfa_off)
1784 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1785 rtx reg;
1786 HOST_WIDE_INT cfa_off;
1788 emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1789 GEN_INT (cfa_off)));
1792 /* Wrapper functions that discards the CONST_INT spill offset. These
1793 exist so that we can give gr_spill/gr_fill the offset they need and
1794 use a consistant function interface. */
1796 static rtx
1797 gen_movdi_x (dest, src, offset)
1798 rtx dest, src;
1799 rtx offset ATTRIBUTE_UNUSED;
1801 return gen_movdi (dest, src);
1804 static rtx
1805 gen_fr_spill_x (dest, src, offset)
1806 rtx dest, src;
1807 rtx offset ATTRIBUTE_UNUSED;
1809 return gen_fr_spill (dest, src);
1812 static rtx
1813 gen_fr_restore_x (dest, src, offset)
1814 rtx dest, src;
1815 rtx offset ATTRIBUTE_UNUSED;
1817 return gen_fr_restore (dest, src);
1820 /* Called after register allocation to add any instructions needed for the
1821 prologue. Using a prologue insn is favored compared to putting all of the
1822 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
1823 to intermix instructions with the saves of the caller saved registers. In
1824 some cases, it might be necessary to emit a barrier instruction as the last
1825 insn to prevent such scheduling.
1827 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
1828 so that the debug info generation code can handle them properly.
1830 The register save area is layed out like so:
1831 cfa+16
1832 [ varargs spill area ]
1833 [ fr register spill area ]
1834 [ br register spill area ]
1835 [ ar register spill area ]
1836 [ pr register spill area ]
1837 [ gr register spill area ] */
1839 /* ??? Get inefficient code when the frame size is larger than can fit in an
1840 adds instruction. */
1842 void
1843 ia64_expand_prologue ()
1845 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
1846 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
1847 rtx reg, alt_reg;
1849 ia64_compute_frame_size (get_frame_size ());
1850 last_scratch_gr_reg = 15;
1852 /* If there is no epilogue, then we don't need some prologue insns.
1853 We need to avoid emitting the dead prologue insns, because flow
1854 will complain about them. */
1855 if (optimize)
1857 edge e;
1859 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
1860 if ((e->flags & EDGE_FAKE) == 0
1861 && (e->flags & EDGE_FALLTHRU) != 0)
1862 break;
1863 epilogue_p = (e != NULL);
1865 else
1866 epilogue_p = 1;
1868 /* Set the local, input, and output register names. We need to do this
1869 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1870 half. If we use in/loc/out register names, then we get assembler errors
1871 in crtn.S because there is no alloc insn or regstk directive in there. */
1872 if (! TARGET_REG_NAMES)
1874 int inputs = current_frame_info.n_input_regs;
1875 int locals = current_frame_info.n_local_regs;
1876 int outputs = current_frame_info.n_output_regs;
1878 for (i = 0; i < inputs; i++)
1879 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
1880 for (i = 0; i < locals; i++)
1881 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
1882 for (i = 0; i < outputs; i++)
1883 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
1886 /* Set the frame pointer register name. The regnum is logically loc79,
1887 but of course we'll not have allocated that many locals. Rather than
1888 worrying about renumbering the existing rtxs, we adjust the name. */
1889 /* ??? This code means that we can never use one local register when
1890 there is a frame pointer. loc79 gets wasted in this case, as it is
1891 renamed to a register that will never be used. See also the try_locals
1892 code in find_gr_spill. */
1893 if (current_frame_info.reg_fp)
1895 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
1896 reg_names[HARD_FRAME_POINTER_REGNUM]
1897 = reg_names[current_frame_info.reg_fp];
1898 reg_names[current_frame_info.reg_fp] = tmp;
1901 /* Fix up the return address placeholder. */
1902 /* ??? We can fail if __builtin_return_address is used, and we didn't
1903 allocate a register in which to save b0. I can't think of a way to
1904 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
1905 then be sure that I got the right one. Further, reload doesn't seem
1906 to care if an eliminable register isn't used, and "eliminates" it
1907 anyway. */
1908 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
1909 && current_frame_info.reg_save_b0 != 0)
1910 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
1912 /* We don't need an alloc instruction if we've used no outputs or locals. */
1913 if (current_frame_info.n_local_regs == 0
1914 && current_frame_info.n_output_regs == 0
1915 && current_frame_info.n_input_regs <= current_function_args_info.words)
1917 /* If there is no alloc, but there are input registers used, then we
1918 need a .regstk directive. */
1919 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
1920 ar_pfs_save_reg = NULL_RTX;
1922 else
1924 current_frame_info.need_regstk = 0;
1926 if (current_frame_info.reg_save_ar_pfs)
1927 regno = current_frame_info.reg_save_ar_pfs;
1928 else
1929 regno = next_scratch_gr_reg ();
1930 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
1932 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
1933 GEN_INT (current_frame_info.n_input_regs),
1934 GEN_INT (current_frame_info.n_local_regs),
1935 GEN_INT (current_frame_info.n_output_regs),
1936 GEN_INT (current_frame_info.n_rotate_regs)));
1937 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
1940 /* Set up frame pointer, stack pointer, and spill iterators. */
1942 n_varargs = cfun->machine->n_varargs;
1943 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
1944 stack_pointer_rtx, 0);
1946 if (frame_pointer_needed)
1948 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
1949 RTX_FRAME_RELATED_P (insn) = 1;
1952 if (current_frame_info.total_size != 0)
1954 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
1955 rtx offset;
1957 if (CONST_OK_FOR_I (- current_frame_info.total_size))
1958 offset = frame_size_rtx;
1959 else
1961 regno = next_scratch_gr_reg ();
1962 offset = gen_rtx_REG (DImode, regno);
1963 emit_move_insn (offset, frame_size_rtx);
1966 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
1967 stack_pointer_rtx, offset));
1969 if (! frame_pointer_needed)
1971 RTX_FRAME_RELATED_P (insn) = 1;
1972 if (GET_CODE (offset) != CONST_INT)
1974 REG_NOTES (insn)
1975 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1976 gen_rtx_SET (VOIDmode,
1977 stack_pointer_rtx,
1978 gen_rtx_PLUS (DImode,
1979 stack_pointer_rtx,
1980 frame_size_rtx)),
1981 REG_NOTES (insn));
1985 /* ??? At this point we must generate a magic insn that appears to
1986 modify the stack pointer, the frame pointer, and all spill
1987 iterators. This would allow the most scheduling freedom. For
1988 now, just hard stop. */
1989 emit_insn (gen_blockage ());
1992 /* Must copy out ar.unat before doing any integer spills. */
1993 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
1995 if (current_frame_info.reg_save_ar_unat)
1996 ar_unat_save_reg
1997 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
1998 else
2000 alt_regno = next_scratch_gr_reg ();
2001 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2002 current_frame_info.gr_used_mask |= 1 << alt_regno;
2005 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2006 insn = emit_move_insn (ar_unat_save_reg, reg);
2007 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2009 /* Even if we're not going to generate an epilogue, we still
2010 need to save the register so that EH works. */
2011 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2012 emit_insn (gen_rtx_USE (VOIDmode, ar_unat_save_reg));
2014 else
2015 ar_unat_save_reg = NULL_RTX;
2017 /* Spill all varargs registers. Do this before spilling any GR registers,
2018 since we want the UNAT bits for the GR registers to override the UNAT
2019 bits from varargs, which we don't care about. */
2021 cfa_off = -16;
2022 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2024 reg = gen_rtx_REG (DImode, regno);
2025 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2028 /* Locate the bottom of the register save area. */
2029 cfa_off = (current_frame_info.spill_cfa_off
2030 + current_frame_info.spill_size
2031 + current_frame_info.extra_spill_size);
2033 /* Save the predicate register block either in a register or in memory. */
2034 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2036 reg = gen_rtx_REG (DImode, PR_REG (0));
2037 if (current_frame_info.reg_save_pr != 0)
2039 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2040 insn = emit_move_insn (alt_reg, reg);
2042 /* ??? Denote pr spill/fill by a DImode move that modifies all
2043 64 hard registers. */
2044 RTX_FRAME_RELATED_P (insn) = 1;
2045 REG_NOTES (insn)
2046 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2047 gen_rtx_SET (VOIDmode, alt_reg, reg),
2048 REG_NOTES (insn));
2050 /* Even if we're not going to generate an epilogue, we still
2051 need to save the register so that EH works. */
2052 if (! epilogue_p)
2053 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2055 else
2057 alt_regno = next_scratch_gr_reg ();
2058 alt_reg = gen_rtx_REG (DImode, alt_regno);
2059 insn = emit_move_insn (alt_reg, reg);
2060 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2061 cfa_off -= 8;
2065 /* Handle AR regs in numerical order. All of them get special handling. */
2066 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2067 && current_frame_info.reg_save_ar_unat == 0)
2069 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2070 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2071 cfa_off -= 8;
2074 /* The alloc insn already copied ar.pfs into a general register. The
2075 only thing we have to do now is copy that register to a stack slot
2076 if we'd not allocated a local register for the job. */
2077 if (current_frame_info.reg_save_ar_pfs == 0
2078 && ! current_function_is_leaf)
2080 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2081 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2082 cfa_off -= 8;
2085 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2087 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2088 if (current_frame_info.reg_save_ar_lc != 0)
2090 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2091 insn = emit_move_insn (alt_reg, reg);
2092 RTX_FRAME_RELATED_P (insn) = 1;
2094 /* Even if we're not going to generate an epilogue, we still
2095 need to save the register so that EH works. */
2096 if (! epilogue_p)
2097 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2099 else
2101 alt_regno = next_scratch_gr_reg ();
2102 alt_reg = gen_rtx_REG (DImode, alt_regno);
2103 emit_move_insn (alt_reg, reg);
2104 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2105 cfa_off -= 8;
2109 /* We should now be at the base of the gr/br/fr spill area. */
2110 if (cfa_off != (current_frame_info.spill_cfa_off
2111 + current_frame_info.spill_size))
2112 abort ();
2114 /* Spill all general registers. */
2115 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2116 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2118 reg = gen_rtx_REG (DImode, regno);
2119 do_spill (gen_gr_spill, reg, cfa_off, reg);
2120 cfa_off -= 8;
2123 /* Handle BR0 specially -- it may be getting stored permanently in
2124 some GR register. */
2125 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2127 reg = gen_rtx_REG (DImode, BR_REG (0));
2128 if (current_frame_info.reg_save_b0 != 0)
2130 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2131 insn = emit_move_insn (alt_reg, reg);
2132 RTX_FRAME_RELATED_P (insn) = 1;
2134 /* Even if we're not going to generate an epilogue, we still
2135 need to save the register so that EH works. */
2136 if (! epilogue_p)
2137 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2139 else
2141 alt_regno = next_scratch_gr_reg ();
2142 alt_reg = gen_rtx_REG (DImode, alt_regno);
2143 emit_move_insn (alt_reg, reg);
2144 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2145 cfa_off -= 8;
2149 /* Spill the rest of the BR registers. */
2150 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2151 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2153 alt_regno = next_scratch_gr_reg ();
2154 alt_reg = gen_rtx_REG (DImode, alt_regno);
2155 reg = gen_rtx_REG (DImode, regno);
2156 emit_move_insn (alt_reg, reg);
2157 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2158 cfa_off -= 8;
2161 /* Align the frame and spill all FR registers. */
2162 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2163 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2165 if (cfa_off & 15)
2166 abort ();
2167 reg = gen_rtx_REG (TFmode, regno);
2168 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2169 cfa_off -= 16;
2172 if (cfa_off != current_frame_info.spill_cfa_off)
2173 abort ();
2175 finish_spill_pointers ();
2178 /* Called after register allocation to add any instructions needed for the
2179 epilogue. Using a epilogue insn is favored compared to putting all of the
2180 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
2181 to intermix instructions with the saves of the caller saved registers. In
2182 some cases, it might be necessary to emit a barrier instruction as the last
2183 insn to prevent such scheduling. */
2185 void
2186 ia64_expand_epilogue (sibcall_p)
2187 int sibcall_p;
2189 rtx insn, reg, alt_reg, ar_unat_save_reg;
2190 int regno, alt_regno, cfa_off;
2192 ia64_compute_frame_size (get_frame_size ());
2194 /* If there is a frame pointer, then we use it instead of the stack
2195 pointer, so that the stack pointer does not need to be valid when
2196 the epilogue starts. See EXIT_IGNORE_STACK. */
2197 if (frame_pointer_needed)
2198 setup_spill_pointers (current_frame_info.n_spilled,
2199 hard_frame_pointer_rtx, 0);
2200 else
2201 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2202 current_frame_info.total_size);
2204 if (current_frame_info.total_size != 0)
2206 /* ??? At this point we must generate a magic insn that appears to
2207 modify the spill iterators and the frame pointer. This would
2208 allow the most scheduling freedom. For now, just hard stop. */
2209 emit_insn (gen_blockage ());
2212 /* Locate the bottom of the register save area. */
2213 cfa_off = (current_frame_info.spill_cfa_off
2214 + current_frame_info.spill_size
2215 + current_frame_info.extra_spill_size);
2217 /* Restore the predicate registers. */
2218 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2220 if (current_frame_info.reg_save_pr != 0)
2221 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2222 else
2224 alt_regno = next_scratch_gr_reg ();
2225 alt_reg = gen_rtx_REG (DImode, alt_regno);
2226 do_restore (gen_movdi_x, alt_reg, cfa_off);
2227 cfa_off -= 8;
2229 reg = gen_rtx_REG (DImode, PR_REG (0));
2230 emit_move_insn (reg, alt_reg);
2233 /* Restore the application registers. */
2235 /* Load the saved unat from the stack, but do not restore it until
2236 after the GRs have been restored. */
2237 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2239 if (current_frame_info.reg_save_ar_unat != 0)
2240 ar_unat_save_reg
2241 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2242 else
2244 alt_regno = next_scratch_gr_reg ();
2245 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2246 current_frame_info.gr_used_mask |= 1 << alt_regno;
2247 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2248 cfa_off -= 8;
2251 else
2252 ar_unat_save_reg = NULL_RTX;
2254 if (current_frame_info.reg_save_ar_pfs != 0)
2256 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2257 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2258 emit_move_insn (reg, alt_reg);
2260 else if (! current_function_is_leaf)
2262 alt_regno = next_scratch_gr_reg ();
2263 alt_reg = gen_rtx_REG (DImode, alt_regno);
2264 do_restore (gen_movdi_x, alt_reg, cfa_off);
2265 cfa_off -= 8;
2266 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2267 emit_move_insn (reg, alt_reg);
2270 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2272 if (current_frame_info.reg_save_ar_lc != 0)
2273 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2274 else
2276 alt_regno = next_scratch_gr_reg ();
2277 alt_reg = gen_rtx_REG (DImode, alt_regno);
2278 do_restore (gen_movdi_x, alt_reg, cfa_off);
2279 cfa_off -= 8;
2281 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2282 emit_move_insn (reg, alt_reg);
2285 /* We should now be at the base of the gr/br/fr spill area. */
2286 if (cfa_off != (current_frame_info.spill_cfa_off
2287 + current_frame_info.spill_size))
2288 abort ();
2290 /* Restore all general registers. */
2291 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2292 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2294 reg = gen_rtx_REG (DImode, regno);
2295 do_restore (gen_gr_restore, reg, cfa_off);
2296 cfa_off -= 8;
2299 /* Restore the branch registers. Handle B0 specially, as it may
2300 have gotten stored in some GR register. */
2301 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2303 if (current_frame_info.reg_save_b0 != 0)
2304 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2305 else
2307 alt_regno = next_scratch_gr_reg ();
2308 alt_reg = gen_rtx_REG (DImode, alt_regno);
2309 do_restore (gen_movdi_x, alt_reg, cfa_off);
2310 cfa_off -= 8;
2312 reg = gen_rtx_REG (DImode, BR_REG (0));
2313 emit_move_insn (reg, alt_reg);
2316 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2317 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2319 alt_regno = next_scratch_gr_reg ();
2320 alt_reg = gen_rtx_REG (DImode, alt_regno);
2321 do_restore (gen_movdi_x, alt_reg, cfa_off);
2322 cfa_off -= 8;
2323 reg = gen_rtx_REG (DImode, regno);
2324 emit_move_insn (reg, alt_reg);
2327 /* Restore floating point registers. */
2328 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2329 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2331 if (cfa_off & 15)
2332 abort ();
2333 reg = gen_rtx_REG (TFmode, regno);
2334 do_restore (gen_fr_restore_x, reg, cfa_off);
2335 cfa_off -= 16;
2338 /* Restore ar.unat for real. */
2339 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2341 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2342 emit_move_insn (reg, ar_unat_save_reg);
2345 if (cfa_off != current_frame_info.spill_cfa_off)
2346 abort ();
2348 finish_spill_pointers ();
2350 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2352 /* ??? At this point we must generate a magic insn that appears to
2353 modify the spill iterators, the stack pointer, and the frame
2354 pointer. This would allow the most scheduling freedom. For now,
2355 just hard stop. */
2356 emit_insn (gen_blockage ());
2359 if (cfun->machine->ia64_eh_epilogue_sp)
2360 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2361 else if (frame_pointer_needed)
2363 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2364 RTX_FRAME_RELATED_P (insn) = 1;
2366 else if (current_frame_info.total_size)
2368 rtx offset, frame_size_rtx;
2370 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2371 if (CONST_OK_FOR_I (current_frame_info.total_size))
2372 offset = frame_size_rtx;
2373 else
2375 regno = next_scratch_gr_reg ();
2376 offset = gen_rtx_REG (DImode, regno);
2377 emit_move_insn (offset, frame_size_rtx);
2380 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2381 offset));
2383 RTX_FRAME_RELATED_P (insn) = 1;
2384 if (GET_CODE (offset) != CONST_INT)
2386 REG_NOTES (insn)
2387 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2388 gen_rtx_SET (VOIDmode,
2389 stack_pointer_rtx,
2390 gen_rtx_PLUS (DImode,
2391 stack_pointer_rtx,
2392 frame_size_rtx)),
2393 REG_NOTES (insn));
2397 if (cfun->machine->ia64_eh_epilogue_bsp)
2398 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2400 if (! sibcall_p)
2401 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2402 else
2403 /* We must emit an alloc to force the input registers to become output
2404 registers. Otherwise, if the callee tries to pass its parameters
2405 through to another call without an intervening alloc, then these
2406 values get lost. */
2407 /* ??? We don't need to preserve all input registers. We only need to
2408 preserve those input registers used as arguments to the sibling call.
2409 It is unclear how to compute that number here. */
2410 emit_insn (gen_alloc (gen_rtx_REG (DImode, GR_REG (2)),
2411 GEN_INT (0), GEN_INT (0),
2412 GEN_INT (current_frame_info.n_input_regs),
2413 GEN_INT (0)));
2416 /* Return 1 if br.ret can do all the work required to return from a
2417 function. */
2420 ia64_direct_return ()
2422 if (reload_completed && ! frame_pointer_needed)
2424 ia64_compute_frame_size (get_frame_size ());
2426 return (current_frame_info.total_size == 0
2427 && current_frame_info.n_spilled == 0
2428 && current_frame_info.reg_save_b0 == 0
2429 && current_frame_info.reg_save_pr == 0
2430 && current_frame_info.reg_save_ar_pfs == 0
2431 && current_frame_info.reg_save_ar_unat == 0
2432 && current_frame_info.reg_save_ar_lc == 0);
2434 return 0;
2438 ia64_hard_regno_rename_ok (from, to)
2439 int from;
2440 int to;
2442 /* Don't clobber any of the registers we reserved for the prologue. */
2443 if (to == current_frame_info.reg_fp
2444 || to == current_frame_info.reg_save_b0
2445 || to == current_frame_info.reg_save_pr
2446 || to == current_frame_info.reg_save_ar_pfs
2447 || to == current_frame_info.reg_save_ar_unat
2448 || to == current_frame_info.reg_save_ar_lc)
2449 return 0;
2451 if (from == current_frame_info.reg_fp
2452 || from == current_frame_info.reg_save_b0
2453 || from == current_frame_info.reg_save_pr
2454 || from == current_frame_info.reg_save_ar_pfs
2455 || from == current_frame_info.reg_save_ar_unat
2456 || from == current_frame_info.reg_save_ar_lc)
2457 return 0;
2459 /* Don't use output registers outside the register frame. */
2460 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2461 return 0;
2463 /* Retain even/oddness on predicate register pairs. */
2464 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2465 return (from & 1) == (to & 1);
2467 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2468 if (from == GR_REG (4) && current_function_calls_setjmp)
2469 return 0;
2471 return 1;
2474 /* Emit the function prologue. */
2476 void
2477 ia64_function_prologue (file, size)
2478 FILE *file;
2479 int size ATTRIBUTE_UNUSED;
2481 int mask, grsave, grsave_prev;
2483 if (current_frame_info.need_regstk)
2484 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2485 current_frame_info.n_input_regs,
2486 current_frame_info.n_local_regs,
2487 current_frame_info.n_output_regs,
2488 current_frame_info.n_rotate_regs);
2490 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2491 return;
2493 /* Emit the .prologue directive. */
2495 mask = 0;
2496 grsave = grsave_prev = 0;
2497 if (current_frame_info.reg_save_b0 != 0)
2499 mask |= 8;
2500 grsave = grsave_prev = current_frame_info.reg_save_b0;
2502 if (current_frame_info.reg_save_ar_pfs != 0
2503 && (grsave_prev == 0
2504 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2506 mask |= 4;
2507 if (grsave_prev == 0)
2508 grsave = current_frame_info.reg_save_ar_pfs;
2509 grsave_prev = current_frame_info.reg_save_ar_pfs;
2511 if (current_frame_info.reg_fp != 0
2512 && (grsave_prev == 0
2513 || current_frame_info.reg_fp == grsave_prev + 1))
2515 mask |= 2;
2516 if (grsave_prev == 0)
2517 grsave = HARD_FRAME_POINTER_REGNUM;
2518 grsave_prev = current_frame_info.reg_fp;
2520 if (current_frame_info.reg_save_pr != 0
2521 && (grsave_prev == 0
2522 || current_frame_info.reg_save_pr == grsave_prev + 1))
2524 mask |= 1;
2525 if (grsave_prev == 0)
2526 grsave = current_frame_info.reg_save_pr;
2529 if (mask)
2530 fprintf (file, "\t.prologue %d, %d\n", mask,
2531 ia64_dbx_register_number (grsave));
2532 else
2533 fputs ("\t.prologue\n", file);
2535 /* Emit a .spill directive, if necessary, to relocate the base of
2536 the register spill area. */
2537 if (current_frame_info.spill_cfa_off != -16)
2538 fprintf (file, "\t.spill %ld\n",
2539 (long) (current_frame_info.spill_cfa_off
2540 + current_frame_info.spill_size));
2543 /* Emit the .body directive at the scheduled end of the prologue. */
2545 void
2546 ia64_output_end_prologue (file)
2547 FILE *file;
2549 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2550 return;
2552 fputs ("\t.body\n", file);
2555 /* Emit the function epilogue. */
2557 void
2558 ia64_function_epilogue (file, size)
2559 FILE *file ATTRIBUTE_UNUSED;
2560 int size ATTRIBUTE_UNUSED;
2562 int i;
2564 /* Reset from the function's potential modifications. */
2565 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
2567 if (current_frame_info.reg_fp)
2569 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2570 reg_names[HARD_FRAME_POINTER_REGNUM]
2571 = reg_names[current_frame_info.reg_fp];
2572 reg_names[current_frame_info.reg_fp] = tmp;
2574 if (! TARGET_REG_NAMES)
2576 for (i = 0; i < current_frame_info.n_input_regs; i++)
2577 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2578 for (i = 0; i < current_frame_info.n_local_regs; i++)
2579 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2580 for (i = 0; i < current_frame_info.n_output_regs; i++)
2581 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2584 current_frame_info.initialized = 0;
2588 ia64_dbx_register_number (regno)
2589 int regno;
2591 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2592 from its home at loc79 to something inside the register frame. We
2593 must perform the same renumbering here for the debug info. */
2594 if (current_frame_info.reg_fp)
2596 if (regno == HARD_FRAME_POINTER_REGNUM)
2597 regno = current_frame_info.reg_fp;
2598 else if (regno == current_frame_info.reg_fp)
2599 regno = HARD_FRAME_POINTER_REGNUM;
2602 if (IN_REGNO_P (regno))
2603 return 32 + regno - IN_REG (0);
2604 else if (LOC_REGNO_P (regno))
2605 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2606 else if (OUT_REGNO_P (regno))
2607 return (32 + current_frame_info.n_input_regs
2608 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2609 else
2610 return regno;
2613 void
2614 ia64_initialize_trampoline (addr, fnaddr, static_chain)
2615 rtx addr, fnaddr, static_chain;
2617 rtx addr_reg, eight = GEN_INT (8);
2619 /* Load up our iterator. */
2620 addr_reg = gen_reg_rtx (Pmode);
2621 emit_move_insn (addr_reg, addr);
2623 /* The first two words are the fake descriptor:
2624 __ia64_trampoline, ADDR+16. */
2625 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2626 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2627 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2629 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2630 copy_to_reg (plus_constant (addr, 16)));
2631 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2633 /* The third word is the target descriptor. */
2634 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2635 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2637 /* The fourth word is the static chain. */
2638 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2641 /* Do any needed setup for a variadic function. CUM has not been updated
2642 for the last named argument which has type TYPE and mode MODE.
2644 We generate the actual spill instructions during prologue generation. */
2646 void
2647 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
2648 CUMULATIVE_ARGS cum;
2649 int int_mode;
2650 tree type;
2651 int * pretend_size;
2652 int second_time ATTRIBUTE_UNUSED;
2654 /* If this is a stdarg function, then skip the current argument. */
2655 if (! current_function_varargs)
2656 ia64_function_arg_advance (&cum, int_mode, type, 1);
2658 if (cum.words < MAX_ARGUMENT_SLOTS)
2660 int n = MAX_ARGUMENT_SLOTS - cum.words;
2661 *pretend_size = n * UNITS_PER_WORD;
2662 cfun->machine->n_varargs = n;
2666 /* Check whether TYPE is a homogeneous floating point aggregate. If
2667 it is, return the mode of the floating point type that appears
2668 in all leafs. If it is not, return VOIDmode.
2670 An aggregate is a homogeneous floating point aggregate is if all
2671 fields/elements in it have the same floating point type (e.g,
2672 SFmode). 128-bit quad-precision floats are excluded. */
2674 static enum machine_mode
2675 hfa_element_mode (type, nested)
2676 tree type;
2677 int nested;
2679 enum machine_mode element_mode = VOIDmode;
2680 enum machine_mode mode;
2681 enum tree_code code = TREE_CODE (type);
2682 int know_element_mode = 0;
2683 tree t;
2685 switch (code)
2687 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2688 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2689 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2690 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2691 case FUNCTION_TYPE:
2692 return VOIDmode;
2694 /* Fortran complex types are supposed to be HFAs, so we need to handle
2695 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2696 types though. */
2697 case COMPLEX_TYPE:
2698 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
2699 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
2700 * BITS_PER_UNIT, MODE_FLOAT, 0);
2701 else
2702 return VOIDmode;
2704 case REAL_TYPE:
2705 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2706 mode if this is contained within an aggregate. */
2707 if (nested)
2708 return TYPE_MODE (type);
2709 else
2710 return VOIDmode;
2712 case ARRAY_TYPE:
2713 return TYPE_MODE (TREE_TYPE (type));
2715 case RECORD_TYPE:
2716 case UNION_TYPE:
2717 case QUAL_UNION_TYPE:
2718 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
2720 if (TREE_CODE (t) != FIELD_DECL)
2721 continue;
2723 mode = hfa_element_mode (TREE_TYPE (t), 1);
2724 if (know_element_mode)
2726 if (mode != element_mode)
2727 return VOIDmode;
2729 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
2730 return VOIDmode;
2731 else
2733 know_element_mode = 1;
2734 element_mode = mode;
2737 return element_mode;
2739 default:
2740 /* If we reach here, we probably have some front-end specific type
2741 that the backend doesn't know about. This can happen via the
2742 aggregate_value_p call in init_function_start. All we can do is
2743 ignore unknown tree types. */
2744 return VOIDmode;
2747 return VOIDmode;
2750 /* Return rtx for register where argument is passed, or zero if it is passed
2751 on the stack. */
2753 /* ??? 128-bit quad-precision floats are always passed in general
2754 registers. */
2757 ia64_function_arg (cum, mode, type, named, incoming)
2758 CUMULATIVE_ARGS *cum;
2759 enum machine_mode mode;
2760 tree type;
2761 int named;
2762 int incoming;
2764 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
2765 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2766 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2767 / UNITS_PER_WORD);
2768 int offset = 0;
2769 enum machine_mode hfa_mode = VOIDmode;
2771 /* Integer and float arguments larger than 8 bytes start at the next even
2772 boundary. Aggregates larger than 8 bytes start at the next even boundary
2773 if the aggregate has 16 byte alignment. Net effect is that types with
2774 alignment greater than 8 start at the next even boundary. */
2775 /* ??? The ABI does not specify how to handle aggregates with alignment from
2776 9 to 15 bytes, or greater than 16. We handle them all as if they had
2777 16 byte alignment. Such aggregates can occur only if gcc extensions are
2778 used. */
2779 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2780 : (words > 1))
2781 && (cum->words & 1))
2782 offset = 1;
2784 /* If all argument slots are used, then it must go on the stack. */
2785 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2786 return 0;
2788 /* Check for and handle homogeneous FP aggregates. */
2789 if (type)
2790 hfa_mode = hfa_element_mode (type, 0);
2792 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2793 and unprototyped hfas are passed specially. */
2794 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2796 rtx loc[16];
2797 int i = 0;
2798 int fp_regs = cum->fp_regs;
2799 int int_regs = cum->words + offset;
2800 int hfa_size = GET_MODE_SIZE (hfa_mode);
2801 int byte_size;
2802 int args_byte_size;
2804 /* If prototyped, pass it in FR regs then GR regs.
2805 If not prototyped, pass it in both FR and GR regs.
2807 If this is an SFmode aggregate, then it is possible to run out of
2808 FR regs while GR regs are still left. In that case, we pass the
2809 remaining part in the GR regs. */
2811 /* Fill the FP regs. We do this always. We stop if we reach the end
2812 of the argument, the last FP register, or the last argument slot. */
2814 byte_size = ((mode == BLKmode)
2815 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2816 args_byte_size = int_regs * UNITS_PER_WORD;
2817 offset = 0;
2818 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2819 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
2821 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2822 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
2823 + fp_regs)),
2824 GEN_INT (offset));
2825 offset += hfa_size;
2826 args_byte_size += hfa_size;
2827 fp_regs++;
2830 /* If no prototype, then the whole thing must go in GR regs. */
2831 if (! cum->prototype)
2832 offset = 0;
2833 /* If this is an SFmode aggregate, then we might have some left over
2834 that needs to go in GR regs. */
2835 else if (byte_size != offset)
2836 int_regs += offset / UNITS_PER_WORD;
2838 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
2840 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
2842 enum machine_mode gr_mode = DImode;
2844 /* If we have an odd 4 byte hunk because we ran out of FR regs,
2845 then this goes in a GR reg left adjusted/little endian, right
2846 adjusted/big endian. */
2847 /* ??? Currently this is handled wrong, because 4-byte hunks are
2848 always right adjusted/little endian. */
2849 if (offset & 0x4)
2850 gr_mode = SImode;
2851 /* If we have an even 4 byte hunk because the aggregate is a
2852 multiple of 4 bytes in size, then this goes in a GR reg right
2853 adjusted/little endian. */
2854 else if (byte_size - offset == 4)
2855 gr_mode = SImode;
2857 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2858 gen_rtx_REG (gr_mode, (basereg
2859 + int_regs)),
2860 GEN_INT (offset));
2861 offset += GET_MODE_SIZE (gr_mode);
2862 int_regs++;
2865 /* If we ended up using just one location, just return that one loc. */
2866 if (i == 1)
2867 return XEXP (loc[0], 0);
2868 else
2869 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
2872 /* Integral and aggregates go in general registers. If we have run out of
2873 FR registers, then FP values must also go in general registers. This can
2874 happen when we have a SFmode HFA. */
2875 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
2876 return gen_rtx_REG (mode, basereg + cum->words + offset);
2878 /* If there is a prototype, then FP values go in a FR register when
2879 named, and in a GR registeer when unnamed. */
2880 else if (cum->prototype)
2882 if (! named)
2883 return gen_rtx_REG (mode, basereg + cum->words + offset);
2884 else
2885 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
2887 /* If there is no prototype, then FP values go in both FR and GR
2888 registers. */
2889 else
2891 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
2892 gen_rtx_REG (mode, (FR_ARG_FIRST
2893 + cum->fp_regs)),
2894 const0_rtx);
2895 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
2896 gen_rtx_REG (mode,
2897 (basereg + cum->words
2898 + offset)),
2899 const0_rtx);
2901 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
2905 /* Return number of words, at the beginning of the argument, that must be
2906 put in registers. 0 is the argument is entirely in registers or entirely
2907 in memory. */
2910 ia64_function_arg_partial_nregs (cum, mode, type, named)
2911 CUMULATIVE_ARGS *cum;
2912 enum machine_mode mode;
2913 tree type;
2914 int named ATTRIBUTE_UNUSED;
2916 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2917 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2918 / UNITS_PER_WORD);
2919 int offset = 0;
2921 /* Arguments with alignment larger than 8 bytes start at the next even
2922 boundary. */
2923 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2924 : (words > 1))
2925 && (cum->words & 1))
2926 offset = 1;
2928 /* If all argument slots are used, then it must go on the stack. */
2929 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2930 return 0;
2932 /* It doesn't matter whether the argument goes in FR or GR regs. If
2933 it fits within the 8 argument slots, then it goes entirely in
2934 registers. If it extends past the last argument slot, then the rest
2935 goes on the stack. */
2937 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
2938 return 0;
2940 return MAX_ARGUMENT_SLOTS - cum->words - offset;
2943 /* Update CUM to point after this argument. This is patterned after
2944 ia64_function_arg. */
2946 void
2947 ia64_function_arg_advance (cum, mode, type, named)
2948 CUMULATIVE_ARGS *cum;
2949 enum machine_mode mode;
2950 tree type;
2951 int named;
2953 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2954 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2955 / UNITS_PER_WORD);
2956 int offset = 0;
2957 enum machine_mode hfa_mode = VOIDmode;
2959 /* If all arg slots are already full, then there is nothing to do. */
2960 if (cum->words >= MAX_ARGUMENT_SLOTS)
2961 return;
2963 /* Arguments with alignment larger than 8 bytes start at the next even
2964 boundary. */
2965 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2966 : (words > 1))
2967 && (cum->words & 1))
2968 offset = 1;
2970 cum->words += words + offset;
2972 /* Check for and handle homogeneous FP aggregates. */
2973 if (type)
2974 hfa_mode = hfa_element_mode (type, 0);
2976 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2977 and unprototyped hfas are passed specially. */
2978 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2980 int fp_regs = cum->fp_regs;
2981 /* This is the original value of cum->words + offset. */
2982 int int_regs = cum->words - words;
2983 int hfa_size = GET_MODE_SIZE (hfa_mode);
2984 int byte_size;
2985 int args_byte_size;
2987 /* If prototyped, pass it in FR regs then GR regs.
2988 If not prototyped, pass it in both FR and GR regs.
2990 If this is an SFmode aggregate, then it is possible to run out of
2991 FR regs while GR regs are still left. In that case, we pass the
2992 remaining part in the GR regs. */
2994 /* Fill the FP regs. We do this always. We stop if we reach the end
2995 of the argument, the last FP register, or the last argument slot. */
2997 byte_size = ((mode == BLKmode)
2998 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2999 args_byte_size = int_regs * UNITS_PER_WORD;
3000 offset = 0;
3001 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3002 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3004 offset += hfa_size;
3005 args_byte_size += hfa_size;
3006 fp_regs++;
3009 cum->fp_regs = fp_regs;
3012 /* Integral and aggregates go in general registers. If we have run out of
3013 FR registers, then FP values must also go in general registers. This can
3014 happen when we have a SFmode HFA. */
3015 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3016 return;
3018 /* If there is a prototype, then FP values go in a FR register when
3019 named, and in a GR registeer when unnamed. */
3020 else if (cum->prototype)
3022 if (! named)
3023 return;
3024 else
3025 /* ??? Complex types should not reach here. */
3026 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3028 /* If there is no prototype, then FP values go in both FR and GR
3029 registers. */
3030 else
3031 /* ??? Complex types should not reach here. */
3032 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3034 return;
3037 /* Implement va_start. */
3039 void
3040 ia64_va_start (stdarg_p, valist, nextarg)
3041 int stdarg_p;
3042 tree valist;
3043 rtx nextarg;
3045 int arg_words;
3046 int ofs;
3048 arg_words = current_function_args_info.words;
3050 if (stdarg_p)
3051 ofs = 0;
3052 else
3053 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
3055 nextarg = plus_constant (nextarg, ofs);
3056 std_expand_builtin_va_start (1, valist, nextarg);
3059 /* Implement va_arg. */
3062 ia64_va_arg (valist, type)
3063 tree valist, type;
3065 tree t;
3067 /* Arguments with alignment larger than 8 bytes start at the next even
3068 boundary. */
3069 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3071 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3072 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3073 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3074 build_int_2 (-2 * UNITS_PER_WORD, -1));
3075 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3076 TREE_SIDE_EFFECTS (t) = 1;
3077 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3080 return std_expand_builtin_va_arg (valist, type);
3083 /* Return 1 if function return value returned in memory. Return 0 if it is
3084 in a register. */
3087 ia64_return_in_memory (valtype)
3088 tree valtype;
3090 enum machine_mode mode;
3091 enum machine_mode hfa_mode;
3092 int byte_size;
3094 mode = TYPE_MODE (valtype);
3095 byte_size = ((mode == BLKmode)
3096 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3098 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3100 hfa_mode = hfa_element_mode (valtype, 0);
3101 if (hfa_mode != VOIDmode)
3103 int hfa_size = GET_MODE_SIZE (hfa_mode);
3105 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3106 return 1;
3107 else
3108 return 0;
3111 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3112 return 1;
3113 else
3114 return 0;
3117 /* Return rtx for register that holds the function return value. */
3120 ia64_function_value (valtype, func)
3121 tree valtype;
3122 tree func ATTRIBUTE_UNUSED;
3124 enum machine_mode mode;
3125 enum machine_mode hfa_mode;
3127 mode = TYPE_MODE (valtype);
3128 hfa_mode = hfa_element_mode (valtype, 0);
3130 if (hfa_mode != VOIDmode)
3132 rtx loc[8];
3133 int i;
3134 int hfa_size;
3135 int byte_size;
3136 int offset;
3138 hfa_size = GET_MODE_SIZE (hfa_mode);
3139 byte_size = ((mode == BLKmode)
3140 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3141 offset = 0;
3142 for (i = 0; offset < byte_size; i++)
3144 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3145 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3146 GEN_INT (offset));
3147 offset += hfa_size;
3150 if (i == 1)
3151 return XEXP (loc[0], 0);
3152 else
3153 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3155 else if (FLOAT_TYPE_P (valtype))
3156 return gen_rtx_REG (mode, FR_ARG_FIRST);
3157 else
3158 return gen_rtx_REG (mode, GR_RET_FIRST);
3161 /* Print a memory address as an operand to reference that memory location. */
3163 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3164 also call this from ia64_print_operand for memory addresses. */
3166 void
3167 ia64_print_operand_address (stream, address)
3168 FILE * stream ATTRIBUTE_UNUSED;
3169 rtx address ATTRIBUTE_UNUSED;
3173 /* Print an operand to a assembler instruction.
3174 C Swap and print a comparison operator.
3175 D Print an FP comparison operator.
3176 E Print 32 - constant, for SImode shifts as extract.
3177 e Print 64 - constant, for DImode rotates.
3178 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3179 a floating point register emitted normally.
3180 I Invert a predicate register by adding 1.
3181 J Select the proper predicate register for a condition.
3182 j Select the inverse predicate register for a condition.
3183 O Append .acq for volatile load.
3184 P Postincrement of a MEM.
3185 Q Append .rel for volatile store.
3186 S Shift amount for shladd instruction.
3187 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3188 for Intel assembler.
3189 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3190 for Intel assembler.
3191 r Print register name, or constant 0 as r0. HP compatibility for
3192 Linux kernel. */
3193 void
3194 ia64_print_operand (file, x, code)
3195 FILE * file;
3196 rtx x;
3197 int code;
3199 const char *str;
3201 switch (code)
3203 case 0:
3204 /* Handled below. */
3205 break;
3207 case 'C':
3209 enum rtx_code c = swap_condition (GET_CODE (x));
3210 fputs (GET_RTX_NAME (c), file);
3211 return;
3214 case 'D':
3215 switch (GET_CODE (x))
3217 case NE:
3218 str = "neq";
3219 break;
3220 case UNORDERED:
3221 str = "unord";
3222 break;
3223 case ORDERED:
3224 str = "ord";
3225 break;
3226 default:
3227 str = GET_RTX_NAME (GET_CODE (x));
3228 break;
3230 fputs (str, file);
3231 return;
3233 case 'E':
3234 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3235 return;
3237 case 'e':
3238 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3239 return;
3241 case 'F':
3242 if (x == CONST0_RTX (GET_MODE (x)))
3243 str = reg_names [FR_REG (0)];
3244 else if (x == CONST1_RTX (GET_MODE (x)))
3245 str = reg_names [FR_REG (1)];
3246 else if (GET_CODE (x) == REG)
3247 str = reg_names [REGNO (x)];
3248 else
3249 abort ();
3250 fputs (str, file);
3251 return;
3253 case 'I':
3254 fputs (reg_names [REGNO (x) + 1], file);
3255 return;
3257 case 'J':
3258 case 'j':
3260 unsigned int regno = REGNO (XEXP (x, 0));
3261 if (GET_CODE (x) == EQ)
3262 regno += 1;
3263 if (code == 'j')
3264 regno ^= 1;
3265 fputs (reg_names [regno], file);
3267 return;
3269 case 'O':
3270 if (MEM_VOLATILE_P (x))
3271 fputs(".acq", file);
3272 return;
3274 case 'P':
3276 HOST_WIDE_INT value;
3278 switch (GET_CODE (XEXP (x, 0)))
3280 default:
3281 return;
3283 case POST_MODIFY:
3284 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3285 if (GET_CODE (x) == CONST_INT)
3286 value = INTVAL (x);
3287 else if (GET_CODE (x) == REG)
3289 fprintf (file, ", %s", reg_names[REGNO (x)]);
3290 return;
3292 else
3293 abort ();
3294 break;
3296 case POST_INC:
3297 value = GET_MODE_SIZE (GET_MODE (x));
3298 break;
3300 case POST_DEC:
3301 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3302 break;
3305 putc (',', file);
3306 putc (' ', file);
3307 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
3308 return;
3311 case 'Q':
3312 if (MEM_VOLATILE_P (x))
3313 fputs(".rel", file);
3314 return;
3316 case 'S':
3317 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3318 return;
3320 case 'T':
3321 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3323 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3324 return;
3326 break;
3328 case 'U':
3329 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3331 const char *prefix = "0x";
3332 if (INTVAL (x) & 0x80000000)
3334 fprintf (file, "0xffffffff");
3335 prefix = "";
3337 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3338 return;
3340 break;
3342 case 'r':
3343 /* If this operand is the constant zero, write it as register zero.
3344 Any register, zero, or CONST_INT value is OK here. */
3345 if (GET_CODE (x) == REG)
3346 fputs (reg_names[REGNO (x)], file);
3347 else if (x == CONST0_RTX (GET_MODE (x)))
3348 fputs ("r0", file);
3349 else if (GET_CODE (x) == CONST_INT)
3350 output_addr_const (file, x);
3351 else
3352 output_operand_lossage ("invalid %%r value");
3353 return;
3355 case '+':
3357 const char *which;
3359 /* For conditional branches, returns or calls, substitute
3360 sptk, dptk, dpnt, or spnt for %s. */
3361 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3362 if (x)
3364 int pred_val = INTVAL (XEXP (x, 0));
3366 /* Guess top and bottom 10% statically predicted. */
3367 if (pred_val < REG_BR_PROB_BASE / 50)
3368 which = ".spnt";
3369 else if (pred_val < REG_BR_PROB_BASE / 2)
3370 which = ".dpnt";
3371 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3372 which = ".dptk";
3373 else
3374 which = ".sptk";
3376 else if (GET_CODE (current_output_insn) == CALL_INSN)
3377 which = ".sptk";
3378 else
3379 which = ".dptk";
3381 fputs (which, file);
3382 return;
3385 case ',':
3386 x = current_insn_predicate;
3387 if (x)
3389 unsigned int regno = REGNO (XEXP (x, 0));
3390 if (GET_CODE (x) == EQ)
3391 regno += 1;
3392 fprintf (file, "(%s) ", reg_names [regno]);
3394 return;
3396 default:
3397 output_operand_lossage ("ia64_print_operand: unknown code");
3398 return;
3401 switch (GET_CODE (x))
3403 /* This happens for the spill/restore instructions. */
3404 case POST_INC:
3405 case POST_DEC:
3406 case POST_MODIFY:
3407 x = XEXP (x, 0);
3408 /* ... fall through ... */
3410 case REG:
3411 fputs (reg_names [REGNO (x)], file);
3412 break;
3414 case MEM:
3416 rtx addr = XEXP (x, 0);
3417 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
3418 addr = XEXP (addr, 0);
3419 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3420 break;
3423 default:
3424 output_addr_const (file, x);
3425 break;
3428 return;
3431 /* Calulate the cost of moving data from a register in class FROM to
3432 one in class TO. */
3435 ia64_register_move_cost (from, to)
3436 enum reg_class from, to;
3438 int from_hard, to_hard;
3439 int from_gr, to_gr;
3440 int from_fr, to_fr;
3441 int from_pr, to_pr;
3443 from_hard = (from == BR_REGS || from == AR_M_REGS || from == AR_I_REGS);
3444 to_hard = (to == BR_REGS || to == AR_M_REGS || to == AR_I_REGS);
3445 from_gr = (from == GENERAL_REGS);
3446 to_gr = (to == GENERAL_REGS);
3447 from_fr = (from == FR_REGS);
3448 to_fr = (to == FR_REGS);
3449 from_pr = (from == PR_REGS);
3450 to_pr = (to == PR_REGS);
3452 if (from_hard && to_hard)
3453 return 8;
3454 else if ((from_hard && !to_gr) || (!from_gr && to_hard))
3455 return 6;
3457 /* Moving between PR registers takes two insns. */
3458 else if (from_pr && to_pr)
3459 return 3;
3460 /* Moving between PR and anything but GR is impossible. */
3461 else if ((from_pr && !to_gr) || (!from_gr && to_pr))
3462 return 6;
3464 /* ??? Moving from FR<->GR must be more expensive than 2, so that we get
3465 secondary memory reloads for TFmode moves. Unfortunately, we don't
3466 have the mode here, so we can't check that. */
3467 /* Moreover, we have to make this at least as high as MEMORY_MOVE_COST
3468 to avoid spectacularly poor register class preferencing for TFmode. */
3469 else if (from_fr != to_fr)
3470 return 5;
3472 return 2;
3475 /* This function returns the register class required for a secondary
3476 register when copying between one of the registers in CLASS, and X,
3477 using MODE. A return value of NO_REGS means that no secondary register
3478 is required. */
3480 enum reg_class
3481 ia64_secondary_reload_class (class, mode, x)
3482 enum reg_class class;
3483 enum machine_mode mode ATTRIBUTE_UNUSED;
3484 rtx x;
3486 int regno = -1;
3488 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3489 regno = true_regnum (x);
3491 switch (class)
3493 case BR_REGS:
3494 /* ??? This is required because of a bad gcse/cse/global interaction.
3495 We end up with two pseudos with overlapping lifetimes both of which
3496 are equiv to the same constant, and both which need to be in BR_REGS.
3497 This results in a BR_REGS to BR_REGS copy which doesn't exist. To
3498 reproduce, return NO_REGS here, and compile divdi3 in libgcc2.c.
3499 This seems to be a cse bug. cse_basic_block_end changes depending
3500 on the path length, which means the qty_first_reg check in
3501 make_regs_eqv can give different answers at different times. */
3502 /* ??? At some point I'll probably need a reload_indi pattern to handle
3503 this. */
3504 if (BR_REGNO_P (regno))
3505 return GR_REGS;
3507 /* This is needed if a pseudo used as a call_operand gets spilled to a
3508 stack slot. */
3509 if (GET_CODE (x) == MEM)
3510 return GR_REGS;
3511 break;
3513 case FR_REGS:
3514 /* This can happen when a paradoxical subreg is an operand to the
3515 muldi3 pattern. */
3516 /* ??? This shouldn't be necessary after instruction scheduling is
3517 enabled, because paradoxical subregs are not accepted by
3518 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3519 stop the paradoxical subreg stupidity in the *_operand functions
3520 in recog.c. */
3521 if (GET_CODE (x) == MEM
3522 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3523 || GET_MODE (x) == QImode))
3524 return GR_REGS;
3526 /* This can happen because of the ior/and/etc patterns that accept FP
3527 registers as operands. If the third operand is a constant, then it
3528 needs to be reloaded into a FP register. */
3529 if (GET_CODE (x) == CONST_INT)
3530 return GR_REGS;
3532 /* This can happen because of register elimination in a muldi3 insn.
3533 E.g. `26107 * (unsigned long)&u'. */
3534 if (GET_CODE (x) == PLUS)
3535 return GR_REGS;
3536 break;
3538 case PR_REGS:
3539 /* ??? This happens if we cse/gcse a BImode value across a call,
3540 and the function has a nonlocal goto. This is because global
3541 does not allocate call crossing pseudos to hard registers when
3542 current_function_has_nonlocal_goto is true. This is relatively
3543 common for C++ programs that use exceptions. To reproduce,
3544 return NO_REGS and compile libstdc++. */
3545 if (GET_CODE (x) == MEM)
3546 return GR_REGS;
3548 /* This can happen when we take a BImode subreg of a DImode value,
3549 and that DImode value winds up in some non-GR register. */
3550 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
3551 return GR_REGS;
3552 break;
3554 case GR_REGS:
3555 /* Since we have no offsettable memory addresses, we need a temporary
3556 to hold the address of the second word. */
3557 if (mode == TImode)
3558 return GR_REGS;
3559 break;
3561 default:
3562 break;
3565 return NO_REGS;
3569 /* Emit text to declare externally defined variables and functions, because
3570 the Intel assembler does not support undefined externals. */
3572 void
3573 ia64_asm_output_external (file, decl, name)
3574 FILE *file;
3575 tree decl;
3576 const char *name;
3578 int save_referenced;
3580 /* GNU as does not need anything here. */
3581 if (TARGET_GNU_AS)
3582 return;
3584 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3585 the linker when we do this, so we need to be careful not to do this for
3586 builtin functions which have no library equivalent. Unfortunately, we
3587 can't tell here whether or not a function will actually be called by
3588 expand_expr, so we pull in library functions even if we may not need
3589 them later. */
3590 if (! strcmp (name, "__builtin_next_arg")
3591 || ! strcmp (name, "alloca")
3592 || ! strcmp (name, "__builtin_constant_p")
3593 || ! strcmp (name, "__builtin_args_info"))
3594 return;
3596 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3597 restore it. */
3598 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
3599 if (TREE_CODE (decl) == FUNCTION_DECL)
3601 fprintf (file, "%s", TYPE_ASM_OP);
3602 assemble_name (file, name);
3603 putc (',', file);
3604 fprintf (file, TYPE_OPERAND_FMT, "function");
3605 putc ('\n', file);
3607 ASM_GLOBALIZE_LABEL (file, name);
3608 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
3611 /* Parse the -mfixed-range= option string. */
3613 static void
3614 fix_range (const_str)
3615 const char *const_str;
3617 int i, first, last;
3618 char *str, *dash, *comma;
3620 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3621 REG2 are either register names or register numbers. The effect
3622 of this option is to mark the registers in the range from REG1 to
3623 REG2 as ``fixed'' so they won't be used by the compiler. This is
3624 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3626 i = strlen (const_str);
3627 str = (char *) alloca (i + 1);
3628 memcpy (str, const_str, i + 1);
3630 while (1)
3632 dash = strchr (str, '-');
3633 if (!dash)
3635 warning ("value of -mfixed-range must have form REG1-REG2");
3636 return;
3638 *dash = '\0';
3640 comma = strchr (dash + 1, ',');
3641 if (comma)
3642 *comma = '\0';
3644 first = decode_reg_name (str);
3645 if (first < 0)
3647 warning ("unknown register name: %s", str);
3648 return;
3651 last = decode_reg_name (dash + 1);
3652 if (last < 0)
3654 warning ("unknown register name: %s", dash + 1);
3655 return;
3658 *dash = '-';
3660 if (first > last)
3662 warning ("%s-%s is an empty range", str, dash + 1);
3663 return;
3666 for (i = first; i <= last; ++i)
3667 fixed_regs[i] = call_used_regs[i] = 1;
3669 if (!comma)
3670 break;
3672 *comma = ',';
3673 str = comma + 1;
3677 /* Called to register all of our global variables with the garbage
3678 collector. */
3680 static void
3681 ia64_add_gc_roots ()
3683 ggc_add_rtx_root (&ia64_compare_op0, 1);
3684 ggc_add_rtx_root (&ia64_compare_op1, 1);
3687 static void
3688 ia64_init_machine_status (p)
3689 struct function *p;
3691 p->machine =
3692 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
3695 static void
3696 ia64_mark_machine_status (p)
3697 struct function *p;
3699 struct machine_function *machine = p->machine;
3701 if (machine)
3703 ggc_mark_rtx (machine->ia64_eh_epilogue_sp);
3704 ggc_mark_rtx (machine->ia64_eh_epilogue_bsp);
3705 ggc_mark_rtx (machine->ia64_gp_save);
3709 static void
3710 ia64_free_machine_status (p)
3711 struct function *p;
3713 free (p->machine);
3714 p->machine = NULL;
3717 /* Handle TARGET_OPTIONS switches. */
3719 void
3720 ia64_override_options ()
3722 if (TARGET_AUTO_PIC)
3723 target_flags |= MASK_CONST_GP;
3725 if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
3727 warning ("cannot optimize division for both latency and throughput");
3728 target_flags &= ~MASK_INLINE_DIV_THR;
3731 if (ia64_fixed_range_string)
3732 fix_range (ia64_fixed_range_string);
3734 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
3735 flag_schedule_insns_after_reload = 0;
3737 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
3739 init_machine_status = ia64_init_machine_status;
3740 mark_machine_status = ia64_mark_machine_status;
3741 free_machine_status = ia64_free_machine_status;
3743 ia64_add_gc_roots ();
3746 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
3747 static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
3748 static enum attr_type ia64_safe_type PARAMS((rtx));
3750 static enum attr_itanium_requires_unit0
3751 ia64_safe_itanium_requires_unit0 (insn)
3752 rtx insn;
3754 if (recog_memoized (insn) >= 0)
3755 return get_attr_itanium_requires_unit0 (insn);
3756 else
3757 return ITANIUM_REQUIRES_UNIT0_NO;
3760 static enum attr_itanium_class
3761 ia64_safe_itanium_class (insn)
3762 rtx insn;
3764 if (recog_memoized (insn) >= 0)
3765 return get_attr_itanium_class (insn);
3766 else
3767 return ITANIUM_CLASS_UNKNOWN;
3770 static enum attr_type
3771 ia64_safe_type (insn)
3772 rtx insn;
3774 if (recog_memoized (insn) >= 0)
3775 return get_attr_type (insn);
3776 else
3777 return TYPE_UNKNOWN;
3780 /* The following collection of routines emit instruction group stop bits as
3781 necessary to avoid dependencies. */
3783 /* Need to track some additional registers as far as serialization is
3784 concerned so we can properly handle br.call and br.ret. We could
3785 make these registers visible to gcc, but since these registers are
3786 never explicitly used in gcc generated code, it seems wasteful to
3787 do so (plus it would make the call and return patterns needlessly
3788 complex). */
3789 #define REG_GP (GR_REG (1))
3790 #define REG_RP (BR_REG (0))
3791 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
3792 /* This is used for volatile asms which may require a stop bit immediately
3793 before and after them. */
3794 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
3795 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
3796 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
3798 /* For each register, we keep track of how it has been written in the
3799 current instruction group.
3801 If a register is written unconditionally (no qualifying predicate),
3802 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
3804 If a register is written if its qualifying predicate P is true, we
3805 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
3806 may be written again by the complement of P (P^1) and when this happens,
3807 WRITE_COUNT gets set to 2.
3809 The result of this is that whenever an insn attempts to write a register
3810 whose WRITE_COUNT is two, we need to issue a insn group barrier first.
3812 If a predicate register is written by a floating-point insn, we set
3813 WRITTEN_BY_FP to true.
3815 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
3816 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
3818 struct reg_write_state
3820 unsigned int write_count : 2;
3821 unsigned int first_pred : 16;
3822 unsigned int written_by_fp : 1;
3823 unsigned int written_by_and : 1;
3824 unsigned int written_by_or : 1;
3827 /* Cumulative info for the current instruction group. */
3828 struct reg_write_state rws_sum[NUM_REGS];
3829 /* Info for the current instruction. This gets copied to rws_sum after a
3830 stop bit is emitted. */
3831 struct reg_write_state rws_insn[NUM_REGS];
3833 /* Indicates whether this is the first instruction after a stop bit,
3834 in which case we don't need another stop bit. Without this, we hit
3835 the abort in ia64_variable_issue when scheduling an alloc. */
3836 static int first_instruction;
3838 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
3839 RTL for one instruction. */
3840 struct reg_flags
3842 unsigned int is_write : 1; /* Is register being written? */
3843 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
3844 unsigned int is_branch : 1; /* Is register used as part of a branch? */
3845 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
3846 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
3847 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
3850 static void rws_update PARAMS ((struct reg_write_state *, int,
3851 struct reg_flags, int));
3852 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
3853 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
3854 static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
3855 static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
3856 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
3857 static void init_insn_group_barriers PARAMS ((void));
3858 static int group_barrier_needed_p PARAMS ((rtx));
3859 static int safe_group_barrier_needed_p PARAMS ((rtx));
3861 /* Update *RWS for REGNO, which is being written by the current instruction,
3862 with predicate PRED, and associated register flags in FLAGS. */
3864 static void
3865 rws_update (rws, regno, flags, pred)
3866 struct reg_write_state *rws;
3867 int regno;
3868 struct reg_flags flags;
3869 int pred;
3871 rws[regno].write_count += pred ? 1 : 2;
3872 rws[regno].written_by_fp |= flags.is_fp;
3873 /* ??? Not tracking and/or across differing predicates. */
3874 rws[regno].written_by_and = flags.is_and;
3875 rws[regno].written_by_or = flags.is_or;
3876 rws[regno].first_pred = pred;
3879 /* Handle an access to register REGNO of type FLAGS using predicate register
3880 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
3881 a dependency with an earlier instruction in the same group. */
3883 static int
3884 rws_access_regno (regno, flags, pred)
3885 int regno;
3886 struct reg_flags flags;
3887 int pred;
3889 int need_barrier = 0;
3891 if (regno >= NUM_REGS)
3892 abort ();
3894 if (! PR_REGNO_P (regno))
3895 flags.is_and = flags.is_or = 0;
3897 if (flags.is_write)
3899 int write_count;
3901 /* One insn writes same reg multiple times? */
3902 if (rws_insn[regno].write_count > 0)
3903 abort ();
3905 /* Update info for current instruction. */
3906 rws_update (rws_insn, regno, flags, pred);
3907 write_count = rws_sum[regno].write_count;
3909 switch (write_count)
3911 case 0:
3912 /* The register has not been written yet. */
3913 rws_update (rws_sum, regno, flags, pred);
3914 break;
3916 case 1:
3917 /* The register has been written via a predicate. If this is
3918 not a complementary predicate, then we need a barrier. */
3919 /* ??? This assumes that P and P+1 are always complementary
3920 predicates for P even. */
3921 if (flags.is_and && rws_sum[regno].written_by_and)
3923 else if (flags.is_or && rws_sum[regno].written_by_or)
3925 else if ((rws_sum[regno].first_pred ^ 1) != pred)
3926 need_barrier = 1;
3927 rws_update (rws_sum, regno, flags, pred);
3928 break;
3930 case 2:
3931 /* The register has been unconditionally written already. We
3932 need a barrier. */
3933 if (flags.is_and && rws_sum[regno].written_by_and)
3935 else if (flags.is_or && rws_sum[regno].written_by_or)
3937 else
3938 need_barrier = 1;
3939 rws_sum[regno].written_by_and = flags.is_and;
3940 rws_sum[regno].written_by_or = flags.is_or;
3941 break;
3943 default:
3944 abort ();
3947 else
3949 if (flags.is_branch)
3951 /* Branches have several RAW exceptions that allow to avoid
3952 barriers. */
3954 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
3955 /* RAW dependencies on branch regs are permissible as long
3956 as the writer is a non-branch instruction. Since we
3957 never generate code that uses a branch register written
3958 by a branch instruction, handling this case is
3959 easy. */
3960 return 0;
3962 if (REGNO_REG_CLASS (regno) == PR_REGS
3963 && ! rws_sum[regno].written_by_fp)
3964 /* The predicates of a branch are available within the
3965 same insn group as long as the predicate was written by
3966 something other than a floating-point instruction. */
3967 return 0;
3970 if (flags.is_and && rws_sum[regno].written_by_and)
3971 return 0;
3972 if (flags.is_or && rws_sum[regno].written_by_or)
3973 return 0;
3975 switch (rws_sum[regno].write_count)
3977 case 0:
3978 /* The register has not been written yet. */
3979 break;
3981 case 1:
3982 /* The register has been written via a predicate. If this is
3983 not a complementary predicate, then we need a barrier. */
3984 /* ??? This assumes that P and P+1 are always complementary
3985 predicates for P even. */
3986 if ((rws_sum[regno].first_pred ^ 1) != pred)
3987 need_barrier = 1;
3988 break;
3990 case 2:
3991 /* The register has been unconditionally written already. We
3992 need a barrier. */
3993 need_barrier = 1;
3994 break;
3996 default:
3997 abort ();
4001 return need_barrier;
4004 static int
4005 rws_access_reg (reg, flags, pred)
4006 rtx reg;
4007 struct reg_flags flags;
4008 int pred;
4010 int regno = REGNO (reg);
4011 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4013 if (n == 1)
4014 return rws_access_regno (regno, flags, pred);
4015 else
4017 int need_barrier = 0;
4018 while (--n >= 0)
4019 need_barrier |= rws_access_regno (regno + n, flags, pred);
4020 return need_barrier;
4024 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4025 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4027 static void
4028 update_set_flags (x, pflags, ppred, pcond)
4029 rtx x;
4030 struct reg_flags *pflags;
4031 int *ppred;
4032 rtx *pcond;
4034 rtx src = SET_SRC (x);
4036 *pcond = 0;
4038 switch (GET_CODE (src))
4040 case CALL:
4041 return;
4043 case IF_THEN_ELSE:
4044 if (SET_DEST (x) == pc_rtx)
4045 /* X is a conditional branch. */
4046 return;
4047 else
4049 int is_complemented = 0;
4051 /* X is a conditional move. */
4052 rtx cond = XEXP (src, 0);
4053 if (GET_CODE (cond) == EQ)
4054 is_complemented = 1;
4055 cond = XEXP (cond, 0);
4056 if (GET_CODE (cond) != REG
4057 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4058 abort ();
4059 *pcond = cond;
4060 if (XEXP (src, 1) == SET_DEST (x)
4061 || XEXP (src, 2) == SET_DEST (x))
4063 /* X is a conditional move that conditionally writes the
4064 destination. */
4066 /* We need another complement in this case. */
4067 if (XEXP (src, 1) == SET_DEST (x))
4068 is_complemented = ! is_complemented;
4070 *ppred = REGNO (cond);
4071 if (is_complemented)
4072 ++*ppred;
4075 /* ??? If this is a conditional write to the dest, then this
4076 instruction does not actually read one source. This probably
4077 doesn't matter, because that source is also the dest. */
4078 /* ??? Multiple writes to predicate registers are allowed
4079 if they are all AND type compares, or if they are all OR
4080 type compares. We do not generate such instructions
4081 currently. */
4083 /* ... fall through ... */
4085 default:
4086 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4087 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4088 /* Set pflags->is_fp to 1 so that we know we're dealing
4089 with a floating point comparison when processing the
4090 destination of the SET. */
4091 pflags->is_fp = 1;
4093 /* Discover if this is a parallel comparison. We only handle
4094 and.orcm and or.andcm at present, since we must retain a
4095 strict inverse on the predicate pair. */
4096 else if (GET_CODE (src) == AND)
4097 pflags->is_and = 1;
4098 else if (GET_CODE (src) == IOR)
4099 pflags->is_or = 1;
4101 break;
4105 /* Subroutine of rtx_needs_barrier; this function determines whether the
4106 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4107 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4108 for this insn. */
4110 static int
4111 set_src_needs_barrier (x, flags, pred, cond)
4112 rtx x;
4113 struct reg_flags flags;
4114 int pred;
4115 rtx cond;
4117 int need_barrier = 0;
4118 rtx dst;
4119 rtx src = SET_SRC (x);
4121 if (GET_CODE (src) == CALL)
4122 /* We don't need to worry about the result registers that
4123 get written by subroutine call. */
4124 return rtx_needs_barrier (src, flags, pred);
4125 else if (SET_DEST (x) == pc_rtx)
4127 /* X is a conditional branch. */
4128 /* ??? This seems redundant, as the caller sets this bit for
4129 all JUMP_INSNs. */
4130 flags.is_branch = 1;
4131 return rtx_needs_barrier (src, flags, pred);
4134 need_barrier = rtx_needs_barrier (src, flags, pred);
4136 /* This instruction unconditionally uses a predicate register. */
4137 if (cond)
4138 need_barrier |= rws_access_reg (cond, flags, 0);
4140 dst = SET_DEST (x);
4141 if (GET_CODE (dst) == ZERO_EXTRACT)
4143 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4144 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4145 dst = XEXP (dst, 0);
4147 return need_barrier;
4150 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4151 Return 1 is this access creates a dependency with an earlier instruction
4152 in the same group. */
4154 static int
4155 rtx_needs_barrier (x, flags, pred)
4156 rtx x;
4157 struct reg_flags flags;
4158 int pred;
4160 int i, j;
4161 int is_complemented = 0;
4162 int need_barrier = 0;
4163 const char *format_ptr;
4164 struct reg_flags new_flags;
4165 rtx cond = 0;
4167 if (! x)
4168 return 0;
4170 new_flags = flags;
4172 switch (GET_CODE (x))
4174 case SET:
4175 update_set_flags (x, &new_flags, &pred, &cond);
4176 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4177 if (GET_CODE (SET_SRC (x)) != CALL)
4179 new_flags.is_write = 1;
4180 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4182 break;
4184 case CALL:
4185 new_flags.is_write = 0;
4186 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4188 /* Avoid multiple register writes, in case this is a pattern with
4189 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4190 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4192 new_flags.is_write = 1;
4193 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4194 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4195 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4197 break;
4199 case COND_EXEC:
4200 /* X is a predicated instruction. */
4202 cond = COND_EXEC_TEST (x);
4203 if (pred)
4204 abort ();
4205 need_barrier = rtx_needs_barrier (cond, flags, 0);
4207 if (GET_CODE (cond) == EQ)
4208 is_complemented = 1;
4209 cond = XEXP (cond, 0);
4210 if (GET_CODE (cond) != REG
4211 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4212 abort ();
4213 pred = REGNO (cond);
4214 if (is_complemented)
4215 ++pred;
4217 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4218 return need_barrier;
4220 case CLOBBER:
4221 case USE:
4222 /* Clobber & use are for earlier compiler-phases only. */
4223 break;
4225 case ASM_OPERANDS:
4226 case ASM_INPUT:
4227 /* We always emit stop bits for traditional asms. We emit stop bits
4228 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4229 if (GET_CODE (x) != ASM_OPERANDS
4230 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4232 /* Avoid writing the register multiple times if we have multiple
4233 asm outputs. This avoids an abort in rws_access_reg. */
4234 if (! rws_insn[REG_VOLATILE].write_count)
4236 new_flags.is_write = 1;
4237 rws_access_regno (REG_VOLATILE, new_flags, pred);
4239 return 1;
4242 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4243 We can not just fall through here since then we would be confused
4244 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4245 traditional asms unlike their normal usage. */
4247 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4248 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4249 need_barrier = 1;
4250 break;
4252 case PARALLEL:
4253 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4255 rtx pat = XVECEXP (x, 0, i);
4256 if (GET_CODE (pat) == SET)
4258 update_set_flags (pat, &new_flags, &pred, &cond);
4259 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4261 else if (GET_CODE (pat) == USE
4262 || GET_CODE (pat) == CALL
4263 || GET_CODE (pat) == ASM_OPERANDS)
4264 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4265 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4266 abort ();
4268 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4270 rtx pat = XVECEXP (x, 0, i);
4271 if (GET_CODE (pat) == SET)
4273 if (GET_CODE (SET_SRC (pat)) != CALL)
4275 new_flags.is_write = 1;
4276 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4277 pred);
4280 else if (GET_CODE (pat) == CLOBBER)
4281 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4283 break;
4285 case SUBREG:
4286 x = SUBREG_REG (x);
4287 /* FALLTHRU */
4288 case REG:
4289 if (REGNO (x) == AR_UNAT_REGNUM)
4291 for (i = 0; i < 64; ++i)
4292 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4294 else
4295 need_barrier = rws_access_reg (x, flags, pred);
4296 break;
4298 case MEM:
4299 /* Find the regs used in memory address computation. */
4300 new_flags.is_write = 0;
4301 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4302 break;
4304 case CONST_INT: case CONST_DOUBLE:
4305 case SYMBOL_REF: case LABEL_REF: case CONST:
4306 break;
4308 /* Operators with side-effects. */
4309 case POST_INC: case POST_DEC:
4310 if (GET_CODE (XEXP (x, 0)) != REG)
4311 abort ();
4313 new_flags.is_write = 0;
4314 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4315 new_flags.is_write = 1;
4316 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4317 break;
4319 case POST_MODIFY:
4320 if (GET_CODE (XEXP (x, 0)) != REG)
4321 abort ();
4323 new_flags.is_write = 0;
4324 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4325 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4326 new_flags.is_write = 1;
4327 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4328 break;
4330 /* Handle common unary and binary ops for efficiency. */
4331 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4332 case MOD: case UDIV: case UMOD: case AND: case IOR:
4333 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4334 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4335 case NE: case EQ: case GE: case GT: case LE:
4336 case LT: case GEU: case GTU: case LEU: case LTU:
4337 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4338 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4339 break;
4341 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4342 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4343 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4344 case SQRT: case FFS:
4345 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4346 break;
4348 case UNSPEC:
4349 switch (XINT (x, 1))
4351 case 1: /* st8.spill */
4352 case 2: /* ld8.fill */
4354 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4355 HOST_WIDE_INT bit = (offset >> 3) & 63;
4357 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4358 new_flags.is_write = (XINT (x, 1) == 1);
4359 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4360 new_flags, pred);
4361 break;
4364 case 3: /* stf.spill */
4365 case 4: /* ldf.spill */
4366 case 8: /* popcnt */
4367 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4368 break;
4370 case 7: /* pred_rel_mutex */
4371 case 9: /* pic call */
4372 case 12: /* mf */
4373 case 19: /* fetchadd_acq */
4374 case 20: /* mov = ar.bsp */
4375 case 21: /* flushrs */
4376 case 22: /* bundle selector */
4377 case 23: /* cycle display */
4378 break;
4380 case 5: /* recip_approx */
4381 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4382 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4383 break;
4385 case 13: /* cmpxchg_acq */
4386 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4387 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4388 break;
4390 default:
4391 abort ();
4393 break;
4395 case UNSPEC_VOLATILE:
4396 switch (XINT (x, 1))
4398 case 0: /* alloc */
4399 /* Alloc must always be the first instruction of a group.
4400 We force this by always returning true. */
4401 /* ??? We might get better scheduling if we explicitly check for
4402 input/local/output register dependencies, and modify the
4403 scheduler so that alloc is always reordered to the start of
4404 the current group. We could then eliminate all of the
4405 first_instruction code. */
4406 rws_access_regno (AR_PFS_REGNUM, flags, pred);
4408 new_flags.is_write = 1;
4409 rws_access_regno (REG_AR_CFM, new_flags, pred);
4410 return 1;
4412 case 1: /* blockage */
4413 case 2: /* insn group barrier */
4414 return 0;
4416 case 5: /* set_bsp */
4417 need_barrier = 1;
4418 break;
4420 case 7: /* pred.rel.mutex */
4421 case 8: /* safe_across_calls all */
4422 case 9: /* safe_across_calls normal */
4423 return 0;
4425 default:
4426 abort ();
4428 break;
4430 case RETURN:
4431 new_flags.is_write = 0;
4432 need_barrier = rws_access_regno (REG_RP, flags, pred);
4433 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
4435 new_flags.is_write = 1;
4436 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4437 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4438 break;
4440 default:
4441 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4442 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4443 switch (format_ptr[i])
4445 case '0': /* unused field */
4446 case 'i': /* integer */
4447 case 'n': /* note */
4448 case 'w': /* wide integer */
4449 case 's': /* pointer to string */
4450 case 'S': /* optional pointer to string */
4451 break;
4453 case 'e':
4454 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4455 need_barrier = 1;
4456 break;
4458 case 'E':
4459 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4460 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4461 need_barrier = 1;
4462 break;
4464 default:
4465 abort ();
4467 break;
4469 return need_barrier;
4472 /* Clear out the state for group_barrier_needed_p at the start of a
4473 sequence of insns. */
4475 static void
4476 init_insn_group_barriers ()
4478 memset (rws_sum, 0, sizeof (rws_sum));
4479 first_instruction = 1;
4482 /* Given the current state, recorded by previous calls to this function,
4483 determine whether a group barrier (a stop bit) is necessary before INSN.
4484 Return nonzero if so. */
4486 static int
4487 group_barrier_needed_p (insn)
4488 rtx insn;
4490 rtx pat;
4491 int need_barrier = 0;
4492 struct reg_flags flags;
4494 memset (&flags, 0, sizeof (flags));
4495 switch (GET_CODE (insn))
4497 case NOTE:
4498 break;
4500 case BARRIER:
4501 /* A barrier doesn't imply an instruction group boundary. */
4502 break;
4504 case CODE_LABEL:
4505 memset (rws_insn, 0, sizeof (rws_insn));
4506 return 1;
4508 case CALL_INSN:
4509 flags.is_branch = 1;
4510 flags.is_sibcall = SIBLING_CALL_P (insn);
4511 memset (rws_insn, 0, sizeof (rws_insn));
4512 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
4513 break;
4515 case JUMP_INSN:
4516 flags.is_branch = 1;
4517 /* FALLTHRU */
4519 case INSN:
4520 if (GET_CODE (PATTERN (insn)) == USE
4521 || GET_CODE (PATTERN (insn)) == CLOBBER)
4522 /* Don't care about USE and CLOBBER "insns"---those are used to
4523 indicate to the optimizer that it shouldn't get rid of
4524 certain operations. */
4525 break;
4527 pat = PATTERN (insn);
4529 /* Ug. Hack hacks hacked elsewhere. */
4530 switch (recog_memoized (insn))
4532 /* We play dependency tricks with the epilogue in order
4533 to get proper schedules. Undo this for dv analysis. */
4534 case CODE_FOR_epilogue_deallocate_stack:
4535 pat = XVECEXP (pat, 0, 0);
4536 break;
4538 /* The pattern we use for br.cloop confuses the code above.
4539 The second element of the vector is representative. */
4540 case CODE_FOR_doloop_end_internal:
4541 pat = XVECEXP (pat, 0, 1);
4542 break;
4544 /* Doesn't generate code. */
4545 case CODE_FOR_pred_rel_mutex:
4546 return 0;
4548 default:
4549 break;
4552 memset (rws_insn, 0, sizeof (rws_insn));
4553 need_barrier = rtx_needs_barrier (pat, flags, 0);
4555 /* Check to see if the previous instruction was a volatile
4556 asm. */
4557 if (! need_barrier)
4558 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
4559 break;
4561 default:
4562 abort ();
4565 if (first_instruction)
4567 need_barrier = 0;
4568 first_instruction = 0;
4571 return need_barrier;
4574 /* Like group_barrier_needed_p, but do not clobber the current state. */
4576 static int
4577 safe_group_barrier_needed_p (insn)
4578 rtx insn;
4580 struct reg_write_state rws_saved[NUM_REGS];
4581 int saved_first_instruction;
4582 int t;
4584 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
4585 saved_first_instruction = first_instruction;
4587 t = group_barrier_needed_p (insn);
4589 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
4590 first_instruction = saved_first_instruction;
4592 return t;
4595 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
4596 as necessary to eliminate dependendencies. This function assumes that
4597 a final instruction scheduling pass has been run which has already
4598 inserted most of the necessary stop bits. This function only inserts
4599 new ones at basic block boundaries, since these are invisible to the
4600 scheduler. */
4602 static void
4603 emit_insn_group_barriers (dump, insns)
4604 FILE *dump;
4605 rtx insns;
4607 rtx insn;
4608 rtx last_label = 0;
4609 int insns_since_last_label = 0;
4611 init_insn_group_barriers ();
4613 for (insn = insns; insn; insn = NEXT_INSN (insn))
4615 if (GET_CODE (insn) == CODE_LABEL)
4617 if (insns_since_last_label)
4618 last_label = insn;
4619 insns_since_last_label = 0;
4621 else if (GET_CODE (insn) == NOTE
4622 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4624 if (insns_since_last_label)
4625 last_label = insn;
4626 insns_since_last_label = 0;
4628 else if (GET_CODE (insn) == INSN
4629 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4630 && XINT (PATTERN (insn), 1) == 2)
4632 init_insn_group_barriers ();
4633 last_label = 0;
4635 else if (INSN_P (insn))
4637 insns_since_last_label = 1;
4639 if (group_barrier_needed_p (insn))
4641 if (last_label)
4643 if (dump)
4644 fprintf (dump, "Emitting stop before label %d\n",
4645 INSN_UID (last_label));
4646 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
4647 insn = last_label;
4649 init_insn_group_barriers ();
4650 last_label = 0;
4657 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4658 This function has to emit all necessary group barriers. */
4660 static void
4661 emit_all_insn_group_barriers (dump, insns)
4662 FILE *dump ATTRIBUTE_UNUSED;
4663 rtx insns;
4665 rtx insn;
4667 init_insn_group_barriers ();
4669 for (insn = insns; insn; insn = NEXT_INSN (insn))
4671 if (GET_CODE (insn) == INSN
4672 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4673 && XINT (PATTERN (insn), 1) == 2)
4674 init_insn_group_barriers ();
4675 else if (INSN_P (insn))
4677 if (group_barrier_needed_p (insn))
4679 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4680 init_insn_group_barriers ();
4681 group_barrier_needed_p (insn);
4687 static int errata_find_address_regs PARAMS ((rtx *, void *));
4688 static void errata_emit_nops PARAMS ((rtx));
4689 static void fixup_errata PARAMS ((void));
4691 /* This structure is used to track some details about the previous insns
4692 groups so we can determine if it may be necessary to insert NOPs to
4693 workaround hardware errata. */
4694 static struct group
4696 HARD_REG_SET p_reg_set;
4697 HARD_REG_SET gr_reg_conditionally_set;
4698 } last_group[2];
4700 /* Index into the last_group array. */
4701 static int group_idx;
4703 /* Called through for_each_rtx; determines if a hard register that was
4704 conditionally set in the previous group is used as an address register.
4705 It ensures that for_each_rtx returns 1 in that case. */
4706 static int
4707 errata_find_address_regs (xp, data)
4708 rtx *xp;
4709 void *data ATTRIBUTE_UNUSED;
4711 rtx x = *xp;
4712 if (GET_CODE (x) != MEM)
4713 return 0;
4714 x = XEXP (x, 0);
4715 if (GET_CODE (x) == POST_MODIFY)
4716 x = XEXP (x, 0);
4717 if (GET_CODE (x) == REG)
4719 struct group *prev_group = last_group + (group_idx ^ 1);
4720 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
4721 REGNO (x)))
4722 return 1;
4723 return -1;
4725 return 0;
4728 /* Called for each insn; this function keeps track of the state in
4729 last_group and emits additional NOPs if necessary to work around
4730 an Itanium A/B step erratum. */
4731 static void
4732 errata_emit_nops (insn)
4733 rtx insn;
4735 struct group *this_group = last_group + group_idx;
4736 struct group *prev_group = last_group + (group_idx ^ 1);
4737 rtx pat = PATTERN (insn);
4738 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
4739 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
4740 enum attr_type type;
4741 rtx set = real_pat;
4743 if (GET_CODE (real_pat) == USE
4744 || GET_CODE (real_pat) == CLOBBER
4745 || GET_CODE (real_pat) == ASM_INPUT
4746 || GET_CODE (real_pat) == ADDR_VEC
4747 || GET_CODE (real_pat) == ADDR_DIFF_VEC
4748 || asm_noperands (PATTERN (insn)) >= 0)
4749 return;
4751 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
4752 parts of it. */
4754 if (GET_CODE (set) == PARALLEL)
4756 int i;
4757 set = XVECEXP (real_pat, 0, 0);
4758 for (i = 1; i < XVECLEN (real_pat, 0); i++)
4759 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
4760 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
4762 set = 0;
4763 break;
4767 if (set && GET_CODE (set) != SET)
4768 set = 0;
4770 type = get_attr_type (insn);
4772 if (type == TYPE_F
4773 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
4774 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
4776 if ((type == TYPE_M || type == TYPE_A) && cond && set
4777 && REG_P (SET_DEST (set))
4778 && GET_CODE (SET_SRC (set)) != PLUS
4779 && GET_CODE (SET_SRC (set)) != MINUS
4780 && (GET_CODE (SET_SRC (set)) != ASHIFT
4781 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
4782 && (GET_CODE (SET_SRC (set)) != MEM
4783 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
4784 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
4786 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
4787 || ! REG_P (XEXP (cond, 0)))
4788 abort ();
4790 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
4791 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
4793 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
4795 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4796 emit_insn_before (gen_nop (), insn);
4797 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4798 group_idx = 0;
4799 memset (last_group, 0, sizeof last_group);
4803 /* Emit extra nops if they are required to work around hardware errata. */
4805 static void
4806 fixup_errata ()
4808 rtx insn;
4810 if (! TARGET_B_STEP)
4811 return;
4813 group_idx = 0;
4814 memset (last_group, 0, sizeof last_group);
4816 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
4818 if (!INSN_P (insn))
4819 continue;
4821 if (ia64_safe_type (insn) == TYPE_S)
4823 group_idx ^= 1;
4824 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
4826 else
4827 errata_emit_nops (insn);
4831 /* Instruction scheduling support. */
4832 /* Describe one bundle. */
4834 struct bundle
4836 /* Zero if there's no possibility of a stop in this bundle other than
4837 at the end, otherwise the position of the optional stop bit. */
4838 int possible_stop;
4839 /* The types of the three slots. */
4840 enum attr_type t[3];
4841 /* The pseudo op to be emitted into the assembler output. */
4842 const char *name;
4845 #define NR_BUNDLES 10
4847 /* A list of all available bundles. */
4849 static const struct bundle bundle[NR_BUNDLES] =
4851 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
4852 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
4853 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
4854 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
4855 #if NR_BUNDLES == 10
4856 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
4857 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
4858 #endif
4859 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
4860 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
4861 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
4862 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
4863 it matches an L type insn. Otherwise we'll try to generate L type
4864 nops. */
4865 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
4868 /* Describe a packet of instructions. Packets consist of two bundles that
4869 are visible to the hardware in one scheduling window. */
4871 struct ia64_packet
4873 const struct bundle *t1, *t2;
4874 /* Precomputed value of the first split issue in this packet if a cycle
4875 starts at its beginning. */
4876 int first_split;
4877 /* For convenience, the insn types are replicated here so we don't have
4878 to go through T1 and T2 all the time. */
4879 enum attr_type t[6];
4882 /* An array containing all possible packets. */
4883 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
4884 static struct ia64_packet packets[NR_PACKETS];
4886 /* Map attr_type to a string with the name. */
4888 static const char *type_names[] =
4890 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
4893 /* Nonzero if we should insert stop bits into the schedule. */
4894 int ia64_final_schedule = 0;
4896 static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
4897 static rtx ia64_single_set PARAMS ((rtx));
4898 static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
4899 static void ia64_emit_insn_before PARAMS ((rtx, rtx));
4900 static void maybe_rotate PARAMS ((FILE *));
4901 static void finish_last_head PARAMS ((FILE *, int));
4902 static void rotate_one_bundle PARAMS ((FILE *));
4903 static void rotate_two_bundles PARAMS ((FILE *));
4904 static void nop_cycles_until PARAMS ((int, FILE *));
4905 static void cycle_end_fill_slots PARAMS ((FILE *));
4906 static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
4907 static int get_split PARAMS ((const struct ia64_packet *, int));
4908 static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
4909 const struct ia64_packet *, int));
4910 static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
4911 rtx *, enum attr_type *, int));
4912 static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
4913 static void dump_current_packet PARAMS ((FILE *));
4914 static void schedule_stop PARAMS ((FILE *));
4915 static rtx gen_nop_type PARAMS ((enum attr_type));
4916 static void ia64_emit_nops PARAMS ((void));
4918 /* Map a bundle number to its pseudo-op. */
4920 const char *
4921 get_bundle_name (b)
4922 int b;
4924 return bundle[b].name;
4927 /* Compute the slot which will cause a split issue in packet P if the
4928 current cycle begins at slot BEGIN. */
4930 static int
4931 itanium_split_issue (p, begin)
4932 const struct ia64_packet *p;
4933 int begin;
4935 int type_count[TYPE_S];
4936 int i;
4937 int split = 6;
4939 if (begin < 3)
4941 /* Always split before and after MMF. */
4942 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
4943 return 3;
4944 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
4945 return 3;
4946 /* Always split after MBB and BBB. */
4947 if (p->t[1] == TYPE_B)
4948 return 3;
4949 /* Split after first bundle in MIB BBB combination. */
4950 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
4951 return 3;
4954 memset (type_count, 0, sizeof type_count);
4955 for (i = begin; i < split; i++)
4957 enum attr_type t0 = p->t[i];
4958 /* An MLX bundle reserves the same units as an MFI bundle. */
4959 enum attr_type t = (t0 == TYPE_L ? TYPE_F
4960 : t0 == TYPE_X ? TYPE_I
4961 : t0);
4962 int max = (t == TYPE_B ? 3 : t == TYPE_F ? 1 : 2);
4963 if (type_count[t] == max)
4964 return i;
4965 type_count[t]++;
4967 return split;
4970 /* Return the maximum number of instructions a cpu can issue. */
4973 ia64_issue_rate ()
4975 return 6;
4978 /* Helper function - like single_set, but look inside COND_EXEC. */
4980 static rtx
4981 ia64_single_set (insn)
4982 rtx insn;
4984 rtx x = PATTERN (insn);
4985 if (GET_CODE (x) == COND_EXEC)
4986 x = COND_EXEC_CODE (x);
4987 if (GET_CODE (x) == SET)
4988 return x;
4989 return single_set_2 (insn, x);
4992 /* Adjust the cost of a scheduling dependency. Return the new cost of
4993 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4996 ia64_adjust_cost (insn, link, dep_insn, cost)
4997 rtx insn, link, dep_insn;
4998 int cost;
5000 enum attr_type dep_type;
5001 enum attr_itanium_class dep_class;
5002 enum attr_itanium_class insn_class;
5003 rtx dep_set, set, src, addr;
5005 if (GET_CODE (PATTERN (insn)) == CLOBBER
5006 || GET_CODE (PATTERN (insn)) == USE
5007 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5008 || GET_CODE (PATTERN (dep_insn)) == USE
5009 /* @@@ Not accurate for indirect calls. */
5010 || GET_CODE (insn) == CALL_INSN
5011 || ia64_safe_type (insn) == TYPE_S)
5012 return 0;
5014 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5015 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5016 return 0;
5018 dep_type = ia64_safe_type (dep_insn);
5019 dep_class = ia64_safe_itanium_class (dep_insn);
5020 insn_class = ia64_safe_itanium_class (insn);
5022 /* Compares that feed a conditional branch can execute in the same
5023 cycle. */
5024 dep_set = ia64_single_set (dep_insn);
5025 set = ia64_single_set (insn);
5027 if (dep_type != TYPE_F
5028 && dep_set
5029 && GET_CODE (SET_DEST (dep_set)) == REG
5030 && PR_REG (REGNO (SET_DEST (dep_set)))
5031 && GET_CODE (insn) == JUMP_INSN)
5032 return 0;
5034 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5036 /* ??? Can't find any information in the documenation about whether
5037 a sequence
5038 st [rx] = ra
5039 ld rb = [ry]
5040 splits issue. Assume it doesn't. */
5041 return 0;
5044 src = set ? SET_SRC (set) : 0;
5045 addr = 0;
5046 if (set && GET_CODE (SET_DEST (set)) == MEM)
5047 addr = XEXP (SET_DEST (set), 0);
5048 else if (set && GET_CODE (src) == MEM)
5049 addr = XEXP (src, 0);
5050 else if (set && GET_CODE (src) == ZERO_EXTEND
5051 && GET_CODE (XEXP (src, 0)) == MEM)
5052 addr = XEXP (XEXP (src, 0), 0);
5053 else if (set && GET_CODE (src) == UNSPEC
5054 && XVECLEN (XEXP (src, 0), 0) > 0
5055 && GET_CODE (XVECEXP (src, 0, 0)) == MEM)
5056 addr = XEXP (XVECEXP (src, 0, 0), 0);
5057 if (addr && GET_CODE (addr) == POST_MODIFY)
5058 addr = XEXP (addr, 0);
5060 set = ia64_single_set (dep_insn);
5062 if ((dep_class == ITANIUM_CLASS_IALU
5063 || dep_class == ITANIUM_CLASS_ILOG
5064 || dep_class == ITANIUM_CLASS_LD)
5065 && (insn_class == ITANIUM_CLASS_LD
5066 || insn_class == ITANIUM_CLASS_ST))
5068 if (! addr || ! set)
5069 abort ();
5070 /* This isn't completely correct - an IALU that feeds an address has
5071 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5072 otherwise. Unfortunately there's no good way to describe this. */
5073 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5074 return cost + 1;
5076 if ((dep_class == ITANIUM_CLASS_IALU
5077 || dep_class == ITANIUM_CLASS_ILOG
5078 || dep_class == ITANIUM_CLASS_LD)
5079 && (insn_class == ITANIUM_CLASS_MMMUL
5080 || insn_class == ITANIUM_CLASS_MMSHF
5081 || insn_class == ITANIUM_CLASS_MMSHFI))
5082 return 3;
5083 if (dep_class == ITANIUM_CLASS_FMAC
5084 && (insn_class == ITANIUM_CLASS_FMISC
5085 || insn_class == ITANIUM_CLASS_FCVTFX
5086 || insn_class == ITANIUM_CLASS_XMPY))
5087 return 7;
5088 if ((dep_class == ITANIUM_CLASS_FMAC
5089 || dep_class == ITANIUM_CLASS_FMISC
5090 || dep_class == ITANIUM_CLASS_FCVTFX
5091 || dep_class == ITANIUM_CLASS_XMPY)
5092 && insn_class == ITANIUM_CLASS_STF)
5093 return 8;
5094 if ((dep_class == ITANIUM_CLASS_MMMUL
5095 || dep_class == ITANIUM_CLASS_MMSHF
5096 || dep_class == ITANIUM_CLASS_MMSHFI)
5097 && (insn_class == ITANIUM_CLASS_LD
5098 || insn_class == ITANIUM_CLASS_ST
5099 || insn_class == ITANIUM_CLASS_IALU
5100 || insn_class == ITANIUM_CLASS_ILOG
5101 || insn_class == ITANIUM_CLASS_ISHF))
5102 return 4;
5104 return cost;
5107 /* Describe the current state of the Itanium pipeline. */
5108 static struct
5110 /* The first slot that is used in the current cycle. */
5111 int first_slot;
5112 /* The next slot to fill. */
5113 int cur;
5114 /* The packet we have selected for the current issue window. */
5115 const struct ia64_packet *packet;
5116 /* The position of the split issue that occurs due to issue width
5117 limitations (6 if there's no split issue). */
5118 int split;
5119 /* Record data about the insns scheduled so far in the same issue
5120 window. The elements up to but not including FIRST_SLOT belong
5121 to the previous cycle, the ones starting with FIRST_SLOT belong
5122 to the current cycle. */
5123 enum attr_type types[6];
5124 rtx insns[6];
5125 int stopbit[6];
5126 /* Nonzero if we decided to schedule a stop bit. */
5127 int last_was_stop;
5128 } sched_data;
5130 /* Temporary arrays; they have enough elements to hold all insns that
5131 can be ready at the same time while scheduling of the current block.
5132 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5133 static rtx *sched_ready;
5134 static enum attr_type *sched_types;
5136 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5137 of packet P. */
5139 static int
5140 insn_matches_slot (p, itype, slot, insn)
5141 const struct ia64_packet *p;
5142 enum attr_type itype;
5143 int slot;
5144 rtx insn;
5146 enum attr_itanium_requires_unit0 u0;
5147 enum attr_type stype = p->t[slot];
5149 if (insn)
5151 u0 = ia64_safe_itanium_requires_unit0 (insn);
5152 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5154 int i;
5155 for (i = sched_data.first_slot; i < slot; i++)
5156 if (p->t[i] == stype)
5157 return 0;
5159 if (GET_CODE (insn) == CALL_INSN)
5161 /* Reject calls in multiway branch packets. We want to limit
5162 the number of multiway branches we generate (since the branch
5163 predictor is limited), and this seems to work fairly well.
5164 (If we didn't do this, we'd have to add another test here to
5165 force calls into the third slot of the bundle.) */
5166 if (slot < 3)
5168 if (p->t[1] == TYPE_B)
5169 return 0;
5171 else
5173 if (p->t[4] == TYPE_B)
5174 return 0;
5179 if (itype == stype)
5180 return 1;
5181 if (itype == TYPE_A)
5182 return stype == TYPE_M || stype == TYPE_I;
5183 return 0;
5186 /* Like emit_insn_before, but skip cycle_display insns. This makes the
5187 assembly output a bit prettier. */
5189 static void
5190 ia64_emit_insn_before (insn, before)
5191 rtx insn, before;
5193 rtx prev = PREV_INSN (before);
5194 if (prev && GET_CODE (prev) == INSN
5195 && GET_CODE (PATTERN (prev)) == UNSPEC
5196 && XINT (PATTERN (prev), 1) == 23)
5197 before = prev;
5198 emit_insn_before (insn, before);
5201 #if 0
5202 /* Generate a nop insn of the given type. Note we never generate L type
5203 nops. */
5205 static rtx
5206 gen_nop_type (t)
5207 enum attr_type t;
5209 switch (t)
5211 case TYPE_M:
5212 return gen_nop_m ();
5213 case TYPE_I:
5214 return gen_nop_i ();
5215 case TYPE_B:
5216 return gen_nop_b ();
5217 case TYPE_F:
5218 return gen_nop_f ();
5219 case TYPE_X:
5220 return gen_nop_x ();
5221 default:
5222 abort ();
5225 #endif
5227 /* When rotating a bundle out of the issue window, insert a bundle selector
5228 insn in front of it. DUMP is the scheduling dump file or NULL. START
5229 is either 0 or 3, depending on whether we want to emit a bundle selector
5230 for the first bundle or the second bundle in the current issue window.
5232 The selector insns are emitted this late because the selected packet can
5233 be changed until parts of it get rotated out. */
5235 static void
5236 finish_last_head (dump, start)
5237 FILE *dump;
5238 int start;
5240 const struct ia64_packet *p = sched_data.packet;
5241 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5242 int bundle_type = b - bundle;
5243 rtx insn;
5244 int i;
5246 if (! ia64_final_schedule)
5247 return;
5249 for (i = start; sched_data.insns[i] == 0; i++)
5250 if (i == start + 3)
5251 abort ();
5252 insn = sched_data.insns[i];
5254 if (dump)
5255 fprintf (dump, "// Emitting template before %d: %s\n",
5256 INSN_UID (insn), b->name);
5258 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5261 /* We can't schedule more insns this cycle. Fix up the scheduling state
5262 and advance FIRST_SLOT and CUR.
5263 We have to distribute the insns that are currently found between
5264 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5265 far, they are stored successively in the fields starting at FIRST_SLOT;
5266 now they must be moved to the correct slots.
5267 DUMP is the current scheduling dump file, or NULL. */
5269 static void
5270 cycle_end_fill_slots (dump)
5271 FILE *dump;
5273 const struct ia64_packet *packet = sched_data.packet;
5274 int slot, i;
5275 enum attr_type tmp_types[6];
5276 rtx tmp_insns[6];
5278 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5279 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5281 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5283 enum attr_type t = tmp_types[i];
5284 if (t != ia64_safe_type (tmp_insns[i]))
5285 abort ();
5286 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5288 if (slot > sched_data.split)
5289 abort ();
5290 if (dump)
5291 fprintf (dump, "// Packet needs %s, have %s\n", type_names[packet->t[slot]],
5292 type_names[t]);
5293 sched_data.types[slot] = packet->t[slot];
5294 sched_data.insns[slot] = 0;
5295 sched_data.stopbit[slot] = 0;
5296 slot++;
5298 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5299 actual slot type later. */
5300 sched_data.types[slot] = packet->t[slot];
5301 sched_data.insns[slot] = tmp_insns[i];
5302 sched_data.stopbit[slot] = 0;
5303 slot++;
5306 /* This isn't right - there's no need to pad out until the forced split;
5307 the CPU will automatically split if an insn isn't ready. */
5308 #if 0
5309 while (slot < sched_data.split)
5311 sched_data.types[slot] = packet->t[slot];
5312 sched_data.insns[slot] = 0;
5313 sched_data.stopbit[slot] = 0;
5314 slot++;
5316 #endif
5318 sched_data.first_slot = sched_data.cur = slot;
5321 /* Bundle rotations, as described in the Itanium optimization manual.
5322 We can rotate either one or both bundles out of the issue window.
5323 DUMP is the current scheduling dump file, or NULL. */
5325 static void
5326 rotate_one_bundle (dump)
5327 FILE *dump;
5329 if (dump)
5330 fprintf (dump, "// Rotating one bundle.\n");
5332 finish_last_head (dump, 0);
5333 if (sched_data.cur > 3)
5335 sched_data.cur -= 3;
5336 sched_data.first_slot -= 3;
5337 memmove (sched_data.types,
5338 sched_data.types + 3,
5339 sched_data.cur * sizeof *sched_data.types);
5340 memmove (sched_data.stopbit,
5341 sched_data.stopbit + 3,
5342 sched_data.cur * sizeof *sched_data.stopbit);
5343 memmove (sched_data.insns,
5344 sched_data.insns + 3,
5345 sched_data.cur * sizeof *sched_data.insns);
5347 else
5349 sched_data.cur = 0;
5350 sched_data.first_slot = 0;
5354 static void
5355 rotate_two_bundles (dump)
5356 FILE *dump;
5358 if (dump)
5359 fprintf (dump, "// Rotating two bundles.\n");
5361 if (sched_data.cur == 0)
5362 return;
5364 finish_last_head (dump, 0);
5365 if (sched_data.cur > 3)
5366 finish_last_head (dump, 3);
5367 sched_data.cur = 0;
5368 sched_data.first_slot = 0;
5371 /* We're beginning a new block. Initialize data structures as necessary. */
5373 void
5374 ia64_sched_init (dump, sched_verbose, max_ready)
5375 FILE *dump ATTRIBUTE_UNUSED;
5376 int sched_verbose ATTRIBUTE_UNUSED;
5377 int max_ready;
5379 static int initialized = 0;
5381 if (! initialized)
5383 int b1, b2, i;
5385 initialized = 1;
5387 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
5389 const struct bundle *t1 = bundle + b1;
5390 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
5392 const struct bundle *t2 = bundle + b2;
5394 packets[i].t1 = t1;
5395 packets[i].t2 = t2;
5398 for (i = 0; i < NR_PACKETS; i++)
5400 int j;
5401 for (j = 0; j < 3; j++)
5402 packets[i].t[j] = packets[i].t1->t[j];
5403 for (j = 0; j < 3; j++)
5404 packets[i].t[j + 3] = packets[i].t2->t[j];
5405 packets[i].first_split = itanium_split_issue (packets + i, 0);
5410 init_insn_group_barriers ();
5412 memset (&sched_data, 0, sizeof sched_data);
5413 sched_types = (enum attr_type *) xmalloc (max_ready
5414 * sizeof (enum attr_type));
5415 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
5418 /* See if the packet P can match the insns we have already scheduled. Return
5419 nonzero if so. In *PSLOT, we store the first slot that is available for
5420 more instructions if we choose this packet.
5421 SPLIT holds the last slot we can use, there's a split issue after it so
5422 scheduling beyond it would cause us to use more than one cycle. */
5424 static int
5425 packet_matches_p (p, split, pslot)
5426 const struct ia64_packet *p;
5427 int split;
5428 int *pslot;
5430 int filled = sched_data.cur;
5431 int first = sched_data.first_slot;
5432 int i, slot;
5434 /* First, check if the first of the two bundles must be a specific one (due
5435 to stop bits). */
5436 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
5437 return 0;
5438 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
5439 return 0;
5441 for (i = 0; i < first; i++)
5442 if (! insn_matches_slot (p, sched_data.types[i], i,
5443 sched_data.insns[i]))
5444 return 0;
5445 for (i = slot = first; i < filled; i++)
5447 while (slot < split)
5449 if (insn_matches_slot (p, sched_data.types[i], slot,
5450 sched_data.insns[i]))
5451 break;
5452 slot++;
5454 if (slot == split)
5455 return 0;
5456 slot++;
5459 if (pslot)
5460 *pslot = slot;
5461 return 1;
5464 /* A frontend for itanium_split_issue. For a packet P and a slot
5465 number FIRST that describes the start of the current clock cycle,
5466 return the slot number of the first split issue. This function
5467 uses the cached number found in P if possible. */
5469 static int
5470 get_split (p, first)
5471 const struct ia64_packet *p;
5472 int first;
5474 if (first == 0)
5475 return p->first_split;
5476 return itanium_split_issue (p, first);
5479 /* Given N_READY insns in the array READY, whose types are found in the
5480 corresponding array TYPES, return the insn that is best suited to be
5481 scheduled in slot SLOT of packet P. */
5483 static int
5484 find_best_insn (ready, types, n_ready, p, slot)
5485 rtx *ready;
5486 enum attr_type *types;
5487 int n_ready;
5488 const struct ia64_packet *p;
5489 int slot;
5491 int best = -1;
5492 int best_pri = 0;
5493 while (n_ready-- > 0)
5495 rtx insn = ready[n_ready];
5496 if (! insn)
5497 continue;
5498 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
5499 break;
5500 /* If we have equally good insns, one of which has a stricter
5501 slot requirement, prefer the one with the stricter requirement. */
5502 if (best >= 0 && types[n_ready] == TYPE_A)
5503 continue;
5504 if (insn_matches_slot (p, types[n_ready], slot, insn))
5506 best = n_ready;
5507 best_pri = INSN_PRIORITY (ready[best]);
5509 /* If there's no way we could get a stricter requirement, stop
5510 looking now. */
5511 if (types[n_ready] != TYPE_A
5512 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
5513 break;
5514 break;
5517 return best;
5520 /* Select the best packet to use given the current scheduler state and the
5521 current ready list.
5522 READY is an array holding N_READY ready insns; TYPES is a corresponding
5523 array that holds their types. Store the best packet in *PPACKET and the
5524 number of insns that can be scheduled in the current cycle in *PBEST. */
5526 static void
5527 find_best_packet (pbest, ppacket, ready, types, n_ready)
5528 int *pbest;
5529 const struct ia64_packet **ppacket;
5530 rtx *ready;
5531 enum attr_type *types;
5532 int n_ready;
5534 int first = sched_data.first_slot;
5535 int best = 0;
5536 int lowest_end = 6;
5537 const struct ia64_packet *best_packet = NULL;
5538 int i;
5540 for (i = 0; i < NR_PACKETS; i++)
5542 const struct ia64_packet *p = packets + i;
5543 int slot;
5544 int split = get_split (p, first);
5545 int win = 0;
5546 int first_slot, last_slot;
5547 int b_nops = 0;
5549 if (! packet_matches_p (p, split, &first_slot))
5550 continue;
5552 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
5554 win = 0;
5555 last_slot = 6;
5556 for (slot = first_slot; slot < split; slot++)
5558 int insn_nr;
5560 /* Disallow a degenerate case where the first bundle doesn't
5561 contain anything but NOPs! */
5562 if (first_slot == 0 && win == 0 && slot == 3)
5564 win = -1;
5565 break;
5568 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
5569 if (insn_nr >= 0)
5571 sched_ready[insn_nr] = 0;
5572 last_slot = slot;
5573 win++;
5575 else if (p->t[slot] == TYPE_B)
5576 b_nops++;
5578 /* We must disallow MBB/BBB packets if any of their B slots would be
5579 filled with nops. */
5580 if (last_slot < 3)
5582 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
5583 win = -1;
5585 else
5587 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
5588 win = -1;
5591 if (win > best
5592 || (win == best && last_slot < lowest_end))
5594 best = win;
5595 lowest_end = last_slot;
5596 best_packet = p;
5599 *pbest = best;
5600 *ppacket = best_packet;
5603 /* Reorder the ready list so that the insns that can be issued in this cycle
5604 are found in the correct order at the end of the list.
5605 DUMP is the scheduling dump file, or NULL. READY points to the start,
5606 E_READY to the end of the ready list. MAY_FAIL determines what should be
5607 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5608 otherwise we return 0.
5609 Return 1 if any insns can be scheduled in this cycle. */
5611 static int
5612 itanium_reorder (dump, ready, e_ready, may_fail)
5613 FILE *dump;
5614 rtx *ready;
5615 rtx *e_ready;
5616 int may_fail;
5618 const struct ia64_packet *best_packet;
5619 int n_ready = e_ready - ready;
5620 int first = sched_data.first_slot;
5621 int i, best, best_split, filled;
5623 for (i = 0; i < n_ready; i++)
5624 sched_types[i] = ia64_safe_type (ready[i]);
5626 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
5628 if (best == 0)
5630 if (may_fail)
5631 return 0;
5632 abort ();
5635 if (dump)
5637 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
5638 best_packet->t1->name,
5639 best_packet->t2 ? best_packet->t2->name : NULL, best);
5642 best_split = itanium_split_issue (best_packet, first);
5643 packet_matches_p (best_packet, best_split, &filled);
5645 for (i = filled; i < best_split; i++)
5647 int insn_nr;
5649 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
5650 if (insn_nr >= 0)
5652 rtx insn = ready[insn_nr];
5653 memmove (ready + insn_nr, ready + insn_nr + 1,
5654 (n_ready - insn_nr - 1) * sizeof (rtx));
5655 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
5656 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
5657 ready[--n_ready] = insn;
5661 sched_data.packet = best_packet;
5662 sched_data.split = best_split;
5663 return 1;
5666 /* Dump information about the current scheduling state to file DUMP. */
5668 static void
5669 dump_current_packet (dump)
5670 FILE *dump;
5672 int i;
5673 fprintf (dump, "// %d slots filled:", sched_data.cur);
5674 for (i = 0; i < sched_data.first_slot; i++)
5676 rtx insn = sched_data.insns[i];
5677 fprintf (dump, " %s", type_names[sched_data.types[i]]);
5678 if (insn)
5679 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
5680 if (sched_data.stopbit[i])
5681 fprintf (dump, " ;;");
5683 fprintf (dump, " :::");
5684 for (i = sched_data.first_slot; i < sched_data.cur; i++)
5686 rtx insn = sched_data.insns[i];
5687 enum attr_type t = ia64_safe_type (insn);
5688 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
5690 fprintf (dump, "\n");
5693 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
5694 NULL. */
5696 static void
5697 schedule_stop (dump)
5698 FILE *dump;
5700 const struct ia64_packet *best = sched_data.packet;
5701 int i;
5702 int best_stop = 6;
5704 if (dump)
5705 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
5707 if (sched_data.cur == 0)
5709 if (dump)
5710 fprintf (dump, "// At start of bundle, so nothing to do.\n");
5712 rotate_two_bundles (NULL);
5713 return;
5716 for (i = -1; i < NR_PACKETS; i++)
5718 /* This is a slight hack to give the current packet the first chance.
5719 This is done to avoid e.g. switching from MIB to MBB bundles. */
5720 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
5721 int split = get_split (p, sched_data.first_slot);
5722 const struct bundle *compare;
5723 int next, stoppos;
5725 if (! packet_matches_p (p, split, &next))
5726 continue;
5728 compare = next > 3 ? p->t2 : p->t1;
5730 stoppos = 3;
5731 if (compare->possible_stop)
5732 stoppos = compare->possible_stop;
5733 if (next > 3)
5734 stoppos += 3;
5736 if (stoppos < next || stoppos >= best_stop)
5738 if (compare->possible_stop == 0)
5739 continue;
5740 stoppos = (next > 3 ? 6 : 3);
5742 if (stoppos < next || stoppos >= best_stop)
5743 continue;
5745 if (dump)
5746 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
5747 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
5748 stoppos);
5750 best_stop = stoppos;
5751 best = p;
5754 sched_data.packet = best;
5755 cycle_end_fill_slots (dump);
5756 while (sched_data.cur < best_stop)
5758 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
5759 sched_data.insns[sched_data.cur] = 0;
5760 sched_data.stopbit[sched_data.cur] = 0;
5761 sched_data.cur++;
5763 sched_data.stopbit[sched_data.cur - 1] = 1;
5764 sched_data.first_slot = best_stop;
5766 if (dump)
5767 dump_current_packet (dump);
5770 /* If necessary, perform one or two rotations on the scheduling state.
5771 This should only be called if we are starting a new cycle. */
5773 static void
5774 maybe_rotate (dump)
5775 FILE *dump;
5777 if (sched_data.cur == 6)
5778 rotate_two_bundles (dump);
5779 else if (sched_data.cur >= 3)
5780 rotate_one_bundle (dump);
5781 sched_data.first_slot = sched_data.cur;
5784 /* The clock cycle when ia64_sched_reorder was last called. */
5785 static int prev_cycle;
5787 /* The first insn scheduled in the previous cycle. This is the saved
5788 value of sched_data.first_slot. */
5789 static int prev_first;
5791 /* The last insn that has been scheduled. At the start of a new cycle
5792 we know that we can emit new insns after it; the main scheduling code
5793 has already emitted a cycle_display insn after it and is using that
5794 as its current last insn. */
5795 static rtx last_issued;
5797 /* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
5798 pad out the delay between MM (shifts, etc.) and integer operations. */
5800 static void
5801 nop_cycles_until (clock_var, dump)
5802 int clock_var;
5803 FILE *dump;
5805 int prev_clock = prev_cycle;
5806 int cycles_left = clock_var - prev_clock;
5808 /* Finish the previous cycle; pad it out with NOPs. */
5809 if (sched_data.cur == 3)
5811 rtx t = gen_insn_group_barrier (GEN_INT (3));
5812 last_issued = emit_insn_after (t, last_issued);
5813 maybe_rotate (dump);
5815 else if (sched_data.cur > 0)
5817 int need_stop = 0;
5818 int split = itanium_split_issue (sched_data.packet, prev_first);
5820 if (sched_data.cur < 3 && split > 3)
5822 split = 3;
5823 need_stop = 1;
5826 if (split > sched_data.cur)
5828 int i;
5829 for (i = sched_data.cur; i < split; i++)
5831 rtx t;
5833 t = gen_nop_type (sched_data.packet->t[i]);
5834 last_issued = emit_insn_after (t, last_issued);
5835 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
5836 sched_data.insns[i] = last_issued;
5837 sched_data.stopbit[i] = 0;
5839 sched_data.cur = split;
5842 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
5843 && cycles_left > 1)
5845 int i;
5846 for (i = sched_data.cur; i < 6; i++)
5848 rtx t;
5850 t = gen_nop_type (sched_data.packet->t[i]);
5851 last_issued = emit_insn_after (t, last_issued);
5852 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
5853 sched_data.insns[i] = last_issued;
5854 sched_data.stopbit[i] = 0;
5856 sched_data.cur = 6;
5857 cycles_left--;
5858 need_stop = 1;
5861 if (need_stop || sched_data.cur == 6)
5863 rtx t = gen_insn_group_barrier (GEN_INT (3));
5864 last_issued = emit_insn_after (t, last_issued);
5866 maybe_rotate (dump);
5869 cycles_left--;
5870 while (cycles_left > 0)
5872 rtx t = gen_bundle_selector (GEN_INT (0));
5873 last_issued = emit_insn_after (t, last_issued);
5874 t = gen_nop_type (TYPE_M);
5875 last_issued = emit_insn_after (t, last_issued);
5876 t = gen_nop_type (TYPE_I);
5877 last_issued = emit_insn_after (t, last_issued);
5878 if (cycles_left > 1)
5880 t = gen_insn_group_barrier (GEN_INT (2));
5881 last_issued = emit_insn_after (t, last_issued);
5882 cycles_left--;
5884 t = gen_nop_type (TYPE_I);
5885 last_issued = emit_insn_after (t, last_issued);
5886 t = gen_insn_group_barrier (GEN_INT (3));
5887 last_issued = emit_insn_after (t, last_issued);
5888 cycles_left--;
5892 /* We are about to being issuing insns for this clock cycle.
5893 Override the default sort algorithm to better slot instructions. */
5896 ia64_sched_reorder (dump, sched_verbose, ready, pn_ready,
5897 reorder_type, clock_var)
5898 FILE *dump ATTRIBUTE_UNUSED;
5899 int sched_verbose ATTRIBUTE_UNUSED;
5900 rtx *ready;
5901 int *pn_ready;
5902 int reorder_type, clock_var;
5904 int n_ready = *pn_ready;
5905 rtx *e_ready = ready + n_ready;
5906 rtx *insnp;
5907 rtx highest;
5909 if (sched_verbose)
5911 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
5912 dump_current_packet (dump);
5915 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
5917 for (insnp = ready; insnp < e_ready; insnp++)
5919 rtx insn = *insnp;
5920 enum attr_itanium_class t = ia64_safe_itanium_class (insn);
5921 if (t == ITANIUM_CLASS_IALU || t == ITANIUM_CLASS_ISHF
5922 || t == ITANIUM_CLASS_ILOG
5923 || t == ITANIUM_CLASS_LD || t == ITANIUM_CLASS_ST)
5925 rtx link;
5926 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
5927 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT
5928 && REG_NOTE_KIND (link) != REG_DEP_ANTI)
5930 rtx other = XEXP (link, 0);
5931 enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
5932 if (t0 == ITANIUM_CLASS_MMSHF
5933 || t0 == ITANIUM_CLASS_MMMUL)
5935 nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
5936 goto out;
5942 out:
5944 prev_first = sched_data.first_slot;
5945 prev_cycle = clock_var;
5947 if (reorder_type == 0)
5948 maybe_rotate (sched_verbose ? dump : NULL);
5950 /* First, move all USEs, CLOBBERs and other crud out of the way. */
5951 highest = ready[n_ready - 1];
5952 for (insnp = ready; insnp < e_ready; insnp++)
5953 if (insnp < e_ready)
5955 rtx insn = *insnp;
5956 enum attr_type t = ia64_safe_type (insn);
5957 if (t == TYPE_UNKNOWN)
5959 highest = ready[n_ready - 1];
5960 ready[n_ready - 1] = insn;
5961 *insnp = highest;
5962 if (ia64_final_schedule && group_barrier_needed_p (insn))
5964 schedule_stop (sched_verbose ? dump : NULL);
5965 sched_data.last_was_stop = 1;
5966 maybe_rotate (sched_verbose ? dump : NULL);
5968 else if (GET_CODE (PATTERN (insn)) == ASM_INPUT
5969 || asm_noperands (PATTERN (insn)) >= 0)
5971 /* It must be an asm of some kind. */
5972 cycle_end_fill_slots (sched_verbose ? dump : NULL);
5974 return 1;
5978 if (ia64_final_schedule)
5980 int nr_need_stop = 0;
5982 for (insnp = ready; insnp < e_ready; insnp++)
5983 if (safe_group_barrier_needed_p (*insnp))
5984 nr_need_stop++;
5986 /* Schedule a stop bit if
5987 - all insns require a stop bit, or
5988 - we are starting a new cycle and _any_ insns require a stop bit.
5989 The reason for the latter is that if our schedule is accurate, then
5990 the additional stop won't decrease performance at this point (since
5991 there's a split issue at this point anyway), but it gives us more
5992 freedom when scheduling the currently ready insns. */
5993 if ((reorder_type == 0 && nr_need_stop)
5994 || (reorder_type == 1 && n_ready == nr_need_stop))
5996 schedule_stop (sched_verbose ? dump : NULL);
5997 sched_data.last_was_stop = 1;
5998 maybe_rotate (sched_verbose ? dump : NULL);
5999 if (reorder_type == 1)
6000 return 0;
6002 else
6004 int deleted = 0;
6005 insnp = e_ready;
6006 /* Move down everything that needs a stop bit, preserving relative
6007 order. */
6008 while (insnp-- > ready + deleted)
6009 while (insnp >= ready + deleted)
6011 rtx insn = *insnp;
6012 if (! safe_group_barrier_needed_p (insn))
6013 break;
6014 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6015 *ready = insn;
6016 deleted++;
6018 n_ready -= deleted;
6019 ready += deleted;
6020 if (deleted != nr_need_stop)
6021 abort ();
6025 return itanium_reorder (sched_verbose ? dump : NULL,
6026 ready, e_ready, reorder_type == 1);
6029 /* Like ia64_sched_reorder, but called after issuing each insn.
6030 Override the default sort algorithm to better slot instructions. */
6033 ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6034 FILE *dump ATTRIBUTE_UNUSED;
6035 int sched_verbose ATTRIBUTE_UNUSED;
6036 rtx *ready;
6037 int *pn_ready;
6038 int clock_var;
6040 if (sched_data.last_was_stop)
6041 return 0;
6043 /* Detect one special case and try to optimize it.
6044 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6045 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6046 if (sched_data.first_slot == 1
6047 && sched_data.stopbit[0]
6048 && ((sched_data.cur == 4
6049 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6050 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6051 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6052 || (sched_data.cur == 3
6053 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6054 && (sched_data.types[2] != TYPE_M && sched_data.types[2] != TYPE_I
6055 && sched_data.types[2] != TYPE_A))))
6058 int i, best;
6059 rtx stop = PREV_INSN (sched_data.insns[1]);
6060 rtx pat;
6062 sched_data.stopbit[0] = 0;
6063 sched_data.stopbit[2] = 1;
6064 if (GET_CODE (stop) != INSN)
6065 abort ();
6067 pat = PATTERN (stop);
6068 /* Ignore cycle displays. */
6069 if (GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 23)
6070 stop = PREV_INSN (stop);
6071 pat = PATTERN (stop);
6072 if (GET_CODE (pat) != UNSPEC_VOLATILE
6073 || XINT (pat, 1) != 2
6074 || INTVAL (XVECEXP (pat, 0, 0)) != 1)
6075 abort ();
6076 XVECEXP (pat, 0, 0) = GEN_INT (3);
6078 sched_data.types[5] = sched_data.types[3];
6079 sched_data.types[4] = sched_data.types[2];
6080 sched_data.types[3] = sched_data.types[1];
6081 sched_data.insns[5] = sched_data.insns[3];
6082 sched_data.insns[4] = sched_data.insns[2];
6083 sched_data.insns[3] = sched_data.insns[1];
6084 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6085 sched_data.cur += 2;
6086 sched_data.first_slot = 3;
6087 for (i = 0; i < NR_PACKETS; i++)
6089 const struct ia64_packet *p = packets + i;
6090 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6092 sched_data.packet = p;
6093 break;
6096 rotate_one_bundle (sched_verbose ? dump : NULL);
6098 best = 6;
6099 for (i = 0; i < NR_PACKETS; i++)
6101 const struct ia64_packet *p = packets + i;
6102 int split = get_split (p, sched_data.first_slot);
6103 int next;
6105 /* Disallow multiway branches here. */
6106 if (p->t[1] == TYPE_B)
6107 continue;
6109 if (packet_matches_p (p, split, &next) && next < best)
6111 best = next;
6112 sched_data.packet = p;
6113 sched_data.split = split;
6116 if (best == 6)
6117 abort ();
6120 if (*pn_ready > 0)
6122 int more = ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, 1,
6123 clock_var);
6124 if (more)
6125 return more;
6126 /* Did we schedule a stop? If so, finish this cycle. */
6127 if (sched_data.cur == sched_data.first_slot)
6128 return 0;
6131 if (sched_verbose)
6132 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
6134 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6135 if (sched_verbose)
6136 dump_current_packet (dump);
6137 return 0;
6140 /* We are about to issue INSN. Return the number of insns left on the
6141 ready queue that can be issued this cycle. */
6144 ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6145 FILE *dump;
6146 int sched_verbose;
6147 rtx insn;
6148 int can_issue_more ATTRIBUTE_UNUSED;
6150 enum attr_type t = ia64_safe_type (insn);
6152 last_issued = insn;
6154 if (sched_data.last_was_stop)
6156 int t = sched_data.first_slot;
6157 if (t == 0)
6158 t = 3;
6159 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6160 init_insn_group_barriers ();
6161 sched_data.last_was_stop = 0;
6164 if (t == TYPE_UNKNOWN)
6166 if (sched_verbose)
6167 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
6168 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6169 || asm_noperands (PATTERN (insn)) >= 0)
6171 /* This must be some kind of asm. Clear the scheduling state. */
6172 rotate_two_bundles (sched_verbose ? dump : NULL);
6173 if (ia64_final_schedule)
6174 group_barrier_needed_p (insn);
6176 return 1;
6179 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6180 important state info. Don't delete this test. */
6181 if (ia64_final_schedule
6182 && group_barrier_needed_p (insn))
6183 abort ();
6185 sched_data.stopbit[sched_data.cur] = 0;
6186 sched_data.insns[sched_data.cur] = insn;
6187 sched_data.types[sched_data.cur] = t;
6189 sched_data.cur++;
6190 if (sched_verbose)
6191 fprintf (dump, "// Scheduling insn %d of type %s\n",
6192 INSN_UID (insn), type_names[t]);
6194 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6196 schedule_stop (sched_verbose ? dump : NULL);
6197 sched_data.last_was_stop = 1;
6200 return 1;
6203 /* Free data allocated by ia64_sched_init. */
6205 void
6206 ia64_sched_finish (dump, sched_verbose)
6207 FILE *dump;
6208 int sched_verbose;
6210 if (sched_verbose)
6211 fprintf (dump, "// Finishing schedule.\n");
6212 rotate_two_bundles (NULL);
6213 free (sched_types);
6214 free (sched_ready);
6217 /* Emit pseudo-ops for the assembler to describe predicate relations.
6218 At present this assumes that we only consider predicate pairs to
6219 be mutex, and that the assembler can deduce proper values from
6220 straight-line code. */
6222 static void
6223 emit_predicate_relation_info ()
6225 int i;
6227 for (i = n_basic_blocks - 1; i >= 0; --i)
6229 basic_block bb = BASIC_BLOCK (i);
6230 int r;
6231 rtx head = bb->head;
6233 /* We only need such notes at code labels. */
6234 if (GET_CODE (head) != CODE_LABEL)
6235 continue;
6236 if (GET_CODE (NEXT_INSN (head)) == NOTE
6237 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6238 head = NEXT_INSN (head);
6240 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6241 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6243 rtx p = gen_rtx_REG (BImode, r);
6244 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6245 if (head == bb->end)
6246 bb->end = n;
6247 head = n;
6251 /* Look for conditional calls that do not return, and protect predicate
6252 relations around them. Otherwise the assembler will assume the call
6253 returns, and complain about uses of call-clobbered predicates after
6254 the call. */
6255 for (i = n_basic_blocks - 1; i >= 0; --i)
6257 basic_block bb = BASIC_BLOCK (i);
6258 rtx insn = bb->head;
6260 while (1)
6262 if (GET_CODE (insn) == CALL_INSN
6263 && GET_CODE (PATTERN (insn)) == COND_EXEC
6264 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6266 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6267 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6268 if (bb->head == insn)
6269 bb->head = b;
6270 if (bb->end == insn)
6271 bb->end = a;
6274 if (insn == bb->end)
6275 break;
6276 insn = NEXT_INSN (insn);
6281 /* Generate a NOP instruction of type T. We will never generate L type
6282 nops. */
6284 static rtx
6285 gen_nop_type (t)
6286 enum attr_type t;
6288 switch (t)
6290 case TYPE_M:
6291 return gen_nop_m ();
6292 case TYPE_I:
6293 return gen_nop_i ();
6294 case TYPE_B:
6295 return gen_nop_b ();
6296 case TYPE_F:
6297 return gen_nop_f ();
6298 case TYPE_X:
6299 return gen_nop_x ();
6300 default:
6301 abort ();
6305 /* After the last scheduling pass, fill in NOPs. It's easier to do this
6306 here than while scheduling. */
6308 static void
6309 ia64_emit_nops ()
6311 rtx insn;
6312 const struct bundle *b = 0;
6313 int bundle_pos = 0;
6315 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6317 rtx pat;
6318 enum attr_type t;
6319 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
6320 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
6321 continue;
6322 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 22)
6323 || GET_CODE (insn) == CODE_LABEL)
6325 if (b)
6326 while (bundle_pos < 3)
6328 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6329 bundle_pos++;
6331 if (GET_CODE (insn) != CODE_LABEL)
6332 b = bundle + INTVAL (XVECEXP (pat, 0, 0));
6333 else
6334 b = 0;
6335 bundle_pos = 0;
6336 continue;
6338 else if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 2)
6340 int t = INTVAL (XVECEXP (pat, 0, 0));
6341 if (b)
6342 while (bundle_pos < t)
6344 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6345 bundle_pos++;
6347 continue;
6350 if (bundle_pos == 3)
6351 b = 0;
6353 if (b && INSN_P (insn))
6355 t = ia64_safe_type (insn);
6356 if (asm_noperands (PATTERN (insn)) >= 0
6357 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
6359 while (bundle_pos < 3)
6361 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6362 bundle_pos++;
6364 continue;
6367 if (t == TYPE_UNKNOWN)
6368 continue;
6369 while (bundle_pos < 3)
6371 if (t == b->t[bundle_pos]
6372 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
6373 || b->t[bundle_pos] == TYPE_I)))
6374 break;
6376 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6377 bundle_pos++;
6379 if (bundle_pos < 3)
6380 bundle_pos++;
6385 /* Perform machine dependent operations on the rtl chain INSNS. */
6387 void
6388 ia64_reorg (insns)
6389 rtx insns;
6391 /* If optimizing, we'll have split before scheduling. */
6392 if (optimize == 0)
6393 split_all_insns (0);
6395 /* Make sure the CFG and global_live_at_start are correct
6396 for emit_predicate_relation_info. */
6397 find_basic_blocks (insns, max_reg_num (), NULL);
6398 life_analysis (insns, NULL, PROP_DEATH_NOTES);
6400 if (ia64_flag_schedule_insns2)
6402 ia64_final_schedule = 1;
6403 schedule_ebbs (rtl_dump_file);
6404 ia64_final_schedule = 0;
6406 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6407 place as they were during scheduling. */
6408 emit_insn_group_barriers (rtl_dump_file, insns);
6409 ia64_emit_nops ();
6411 else
6412 emit_all_insn_group_barriers (rtl_dump_file, insns);
6414 fixup_errata ();
6415 emit_predicate_relation_info ();
6418 /* Return true if REGNO is used by the epilogue. */
6421 ia64_epilogue_uses (regno)
6422 int regno;
6424 /* When a function makes a call through a function descriptor, we
6425 will write a (potentially) new value to "gp". After returning
6426 from such a call, we need to make sure the function restores the
6427 original gp-value, even if the function itself does not use the
6428 gp anymore. */
6429 if (regno == R_GR (1)
6430 && TARGET_CONST_GP
6431 && !(TARGET_AUTO_PIC || TARGET_NO_PIC))
6432 return 1;
6434 /* For functions defined with the syscall_linkage attribute, all input
6435 registers are marked as live at all function exits. This prevents the
6436 register allocator from using the input registers, which in turn makes it
6437 possible to restart a system call after an interrupt without having to
6438 save/restore the input registers. This also prevents kernel data from
6439 leaking to application code. */
6441 if (IN_REGNO_P (regno)
6442 && lookup_attribute ("syscall_linkage",
6443 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
6444 return 1;
6446 /* Conditional return patterns can't represent the use of `b0' as
6447 the return address, so we force the value live this way. */
6448 if (regno == R_BR (0))
6449 return 1;
6451 if (regs_ever_live[AR_LC_REGNUM] && regno == AR_LC_REGNUM)
6452 return 1;
6453 if (! current_function_is_leaf && regno == AR_PFS_REGNUM)
6454 return 1;
6455 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
6456 && regno == AR_UNAT_REGNUM)
6457 return 1;
6459 return 0;
6462 /* Return true if IDENTIFIER is a valid attribute for TYPE. */
6465 ia64_valid_type_attribute (type, attributes, identifier, args)
6466 tree type;
6467 tree attributes ATTRIBUTE_UNUSED;
6468 tree identifier;
6469 tree args;
6471 /* We only support an attribute for function calls. */
6473 if (TREE_CODE (type) != FUNCTION_TYPE
6474 && TREE_CODE (type) != METHOD_TYPE)
6475 return 0;
6477 /* The "syscall_linkage" attribute says the callee is a system call entry
6478 point. This affects ia64_epilogue_uses. */
6480 if (is_attribute_p ("syscall_linkage", identifier))
6481 return args == NULL_TREE;
6483 return 0;
6486 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6488 We add @ to the name if this goes in small data/bss. We can only put
6489 a variable in small data/bss if it is defined in this module or a module
6490 that we are statically linked with. We can't check the second condition,
6491 but TREE_STATIC gives us the first one. */
6493 /* ??? If we had IPA, we could check the second condition. We could support
6494 programmer added section attributes if the variable is not defined in this
6495 module. */
6497 /* ??? See the v850 port for a cleaner way to do this. */
6499 /* ??? We could also support own long data here. Generating movl/add/ld8
6500 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6501 code faster because there is one less load. This also includes incomplete
6502 types which can't go in sdata/sbss. */
6504 /* ??? See select_section. We must put short own readonly variables in
6505 sdata/sbss instead of the more natural rodata, because we can't perform
6506 the DECL_READONLY_SECTION test here. */
6508 extern struct obstack * saveable_obstack;
6510 void
6511 ia64_encode_section_info (decl)
6512 tree decl;
6514 const char *symbol_str;
6516 if (TREE_CODE (decl) == FUNCTION_DECL)
6518 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
6519 return;
6522 /* Careful not to prod global register variables. */
6523 if (TREE_CODE (decl) != VAR_DECL
6524 || GET_CODE (DECL_RTL (decl)) != MEM
6525 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
6526 return;
6528 symbol_str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
6530 /* We assume that -fpic is used only to create a shared library (dso).
6531 With -fpic, no global data can ever be sdata.
6532 Without -fpic, global common uninitialized data can never be sdata, since
6533 it can unify with a real definition in a dso. */
6534 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
6535 to access them. The linker may then be able to do linker relaxation to
6536 optimize references to them. Currently sdata implies use of gprel. */
6537 /* We need the DECL_EXTERNAL check for C++. static class data members get
6538 both TREE_STATIC and DECL_EXTERNAL set, to indicate that they are
6539 statically allocated, but the space is allocated somewhere else. Such
6540 decls can not be own data. */
6541 if (! TARGET_NO_SDATA
6542 && TREE_STATIC (decl) && ! DECL_EXTERNAL (decl)
6543 && ! (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
6544 && ! (TREE_PUBLIC (decl)
6545 && (flag_pic
6546 || (DECL_COMMON (decl)
6547 && (DECL_INITIAL (decl) == 0
6548 || DECL_INITIAL (decl) == error_mark_node))))
6549 /* Either the variable must be declared without a section attribute,
6550 or the section must be sdata or sbss. */
6551 && (DECL_SECTION_NAME (decl) == 0
6552 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6553 ".sdata")
6554 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6555 ".sbss")))
6557 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
6559 /* If the variable has already been defined in the output file, then it
6560 is too late to put it in sdata if it wasn't put there in the first
6561 place. The test is here rather than above, because if it is already
6562 in sdata, then it can stay there. */
6564 if (TREE_ASM_WRITTEN (decl))
6567 /* If this is an incomplete type with size 0, then we can't put it in
6568 sdata because it might be too big when completed. */
6569 else if (size > 0
6570 && size <= (HOST_WIDE_INT) ia64_section_threshold
6571 && symbol_str[0] != SDATA_NAME_FLAG_CHAR)
6573 size_t len = strlen (symbol_str);
6574 char *newstr = alloca (len + 1);
6575 const char *string;
6577 *newstr = SDATA_NAME_FLAG_CHAR;
6578 memcpy (newstr + 1, symbol_str, len + 1);
6580 string = ggc_alloc_string (newstr, len + 1);
6581 XSTR (XEXP (DECL_RTL (decl), 0), 0) = string;
6584 /* This decl is marked as being in small data/bss but it shouldn't
6585 be; one likely explanation for this is that the decl has been
6586 moved into a different section from the one it was in when
6587 ENCODE_SECTION_INFO was first called. Remove the '@'.*/
6588 else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
6590 XSTR (XEXP (DECL_RTL (decl), 0), 0)
6591 = ggc_strdup (symbol_str + 1);
6595 /* Output assembly directives for prologue regions. */
6597 /* The current basic block number. */
6599 static int block_num;
6601 /* True if we need a copy_state command at the start of the next block. */
6603 static int need_copy_state;
6605 /* The function emits unwind directives for the start of an epilogue. */
6607 static void
6608 process_epilogue ()
6610 /* If this isn't the last block of the function, then we need to label the
6611 current state, and copy it back in at the start of the next block. */
6613 if (block_num != n_basic_blocks - 1)
6615 fprintf (asm_out_file, "\t.label_state 1\n");
6616 need_copy_state = 1;
6619 fprintf (asm_out_file, "\t.restore sp\n");
6622 /* This function processes a SET pattern looking for specific patterns
6623 which result in emitting an assembly directive required for unwinding. */
6625 static int
6626 process_set (asm_out_file, pat)
6627 FILE *asm_out_file;
6628 rtx pat;
6630 rtx src = SET_SRC (pat);
6631 rtx dest = SET_DEST (pat);
6632 int src_regno, dest_regno;
6634 /* Look for the ALLOC insn. */
6635 if (GET_CODE (src) == UNSPEC_VOLATILE
6636 && XINT (src, 1) == 0
6637 && GET_CODE (dest) == REG)
6639 dest_regno = REGNO (dest);
6641 /* If this isn't the final destination for ar.pfs, the alloc
6642 shouldn't have been marked frame related. */
6643 if (dest_regno != current_frame_info.reg_save_ar_pfs)
6644 abort ();
6646 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
6647 ia64_dbx_register_number (dest_regno));
6648 return 1;
6651 /* Look for SP = .... */
6652 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
6654 if (GET_CODE (src) == PLUS)
6656 rtx op0 = XEXP (src, 0);
6657 rtx op1 = XEXP (src, 1);
6658 if (op0 == dest && GET_CODE (op1) == CONST_INT)
6660 if (INTVAL (op1) < 0)
6662 fputs ("\t.fframe ", asm_out_file);
6663 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
6664 -INTVAL (op1));
6665 fputc ('\n', asm_out_file);
6667 else
6668 process_epilogue ();
6670 else
6671 abort ();
6673 else if (GET_CODE (src) == REG
6674 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
6675 process_epilogue ();
6676 else
6677 abort ();
6679 return 1;
6682 /* Register move we need to look at. */
6683 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
6685 src_regno = REGNO (src);
6686 dest_regno = REGNO (dest);
6688 switch (src_regno)
6690 case BR_REG (0):
6691 /* Saving return address pointer. */
6692 if (dest_regno != current_frame_info.reg_save_b0)
6693 abort ();
6694 fprintf (asm_out_file, "\t.save rp, r%d\n",
6695 ia64_dbx_register_number (dest_regno));
6696 return 1;
6698 case PR_REG (0):
6699 if (dest_regno != current_frame_info.reg_save_pr)
6700 abort ();
6701 fprintf (asm_out_file, "\t.save pr, r%d\n",
6702 ia64_dbx_register_number (dest_regno));
6703 return 1;
6705 case AR_UNAT_REGNUM:
6706 if (dest_regno != current_frame_info.reg_save_ar_unat)
6707 abort ();
6708 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
6709 ia64_dbx_register_number (dest_regno));
6710 return 1;
6712 case AR_LC_REGNUM:
6713 if (dest_regno != current_frame_info.reg_save_ar_lc)
6714 abort ();
6715 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
6716 ia64_dbx_register_number (dest_regno));
6717 return 1;
6719 case STACK_POINTER_REGNUM:
6720 if (dest_regno != HARD_FRAME_POINTER_REGNUM
6721 || ! frame_pointer_needed)
6722 abort ();
6723 fprintf (asm_out_file, "\t.vframe r%d\n",
6724 ia64_dbx_register_number (dest_regno));
6725 return 1;
6727 default:
6728 /* Everything else should indicate being stored to memory. */
6729 abort ();
6733 /* Memory store we need to look at. */
6734 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
6736 long off;
6737 rtx base;
6738 const char *saveop;
6740 if (GET_CODE (XEXP (dest, 0)) == REG)
6742 base = XEXP (dest, 0);
6743 off = 0;
6745 else if (GET_CODE (XEXP (dest, 0)) == PLUS
6746 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
6748 base = XEXP (XEXP (dest, 0), 0);
6749 off = INTVAL (XEXP (XEXP (dest, 0), 1));
6751 else
6752 abort ();
6754 if (base == hard_frame_pointer_rtx)
6756 saveop = ".savepsp";
6757 off = - off;
6759 else if (base == stack_pointer_rtx)
6760 saveop = ".savesp";
6761 else
6762 abort ();
6764 src_regno = REGNO (src);
6765 switch (src_regno)
6767 case BR_REG (0):
6768 if (current_frame_info.reg_save_b0 != 0)
6769 abort ();
6770 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
6771 return 1;
6773 case PR_REG (0):
6774 if (current_frame_info.reg_save_pr != 0)
6775 abort ();
6776 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
6777 return 1;
6779 case AR_LC_REGNUM:
6780 if (current_frame_info.reg_save_ar_lc != 0)
6781 abort ();
6782 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
6783 return 1;
6785 case AR_PFS_REGNUM:
6786 if (current_frame_info.reg_save_ar_pfs != 0)
6787 abort ();
6788 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
6789 return 1;
6791 case AR_UNAT_REGNUM:
6792 if (current_frame_info.reg_save_ar_unat != 0)
6793 abort ();
6794 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
6795 return 1;
6797 case GR_REG (4):
6798 case GR_REG (5):
6799 case GR_REG (6):
6800 case GR_REG (7):
6801 fprintf (asm_out_file, "\t.save.g 0x%x\n",
6802 1 << (src_regno - GR_REG (4)));
6803 return 1;
6805 case BR_REG (1):
6806 case BR_REG (2):
6807 case BR_REG (3):
6808 case BR_REG (4):
6809 case BR_REG (5):
6810 fprintf (asm_out_file, "\t.save.b 0x%x\n",
6811 1 << (src_regno - BR_REG (1)));
6812 return 1;
6814 case FR_REG (2):
6815 case FR_REG (3):
6816 case FR_REG (4):
6817 case FR_REG (5):
6818 fprintf (asm_out_file, "\t.save.f 0x%x\n",
6819 1 << (src_regno - FR_REG (2)));
6820 return 1;
6822 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
6823 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
6824 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
6825 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
6826 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
6827 1 << (src_regno - FR_REG (12)));
6828 return 1;
6830 default:
6831 return 0;
6835 return 0;
6839 /* This function looks at a single insn and emits any directives
6840 required to unwind this insn. */
6841 void
6842 process_for_unwind_directive (asm_out_file, insn)
6843 FILE *asm_out_file;
6844 rtx insn;
6846 if (flag_unwind_tables
6847 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
6849 rtx pat;
6851 if (GET_CODE (insn) == NOTE
6852 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
6854 block_num = NOTE_BASIC_BLOCK (insn)->index;
6856 /* Restore unwind state from immediately before the epilogue. */
6857 if (need_copy_state)
6859 fprintf (asm_out_file, "\t.body\n");
6860 fprintf (asm_out_file, "\t.copy_state 1\n");
6861 need_copy_state = 0;
6865 if (! RTX_FRAME_RELATED_P (insn))
6866 return;
6868 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
6869 if (pat)
6870 pat = XEXP (pat, 0);
6871 else
6872 pat = PATTERN (insn);
6874 switch (GET_CODE (pat))
6876 case SET:
6877 process_set (asm_out_file, pat);
6878 break;
6880 case PARALLEL:
6882 int par_index;
6883 int limit = XVECLEN (pat, 0);
6884 for (par_index = 0; par_index < limit; par_index++)
6886 rtx x = XVECEXP (pat, 0, par_index);
6887 if (GET_CODE (x) == SET)
6888 process_set (asm_out_file, x);
6890 break;
6893 default:
6894 abort ();
6900 void
6901 ia64_init_builtins ()
6903 tree psi_type_node = build_pointer_type (integer_type_node);
6904 tree pdi_type_node = build_pointer_type (long_integer_type_node);
6905 tree endlink = void_list_node;
6907 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
6908 tree si_ftype_psi_si_si
6909 = build_function_type (integer_type_node,
6910 tree_cons (NULL_TREE, psi_type_node,
6911 tree_cons (NULL_TREE, integer_type_node,
6912 tree_cons (NULL_TREE,
6913 integer_type_node,
6914 endlink))));
6916 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
6917 tree di_ftype_pdi_di_di
6918 = build_function_type (long_integer_type_node,
6919 tree_cons (NULL_TREE, pdi_type_node,
6920 tree_cons (NULL_TREE,
6921 long_integer_type_node,
6922 tree_cons (NULL_TREE,
6923 long_integer_type_node,
6924 endlink))));
6925 /* __sync_synchronize */
6926 tree void_ftype_void
6927 = build_function_type (void_type_node, endlink);
6929 /* __sync_lock_test_and_set_si */
6930 tree si_ftype_psi_si
6931 = build_function_type (integer_type_node,
6932 tree_cons (NULL_TREE, psi_type_node,
6933 tree_cons (NULL_TREE, integer_type_node, endlink)));
6935 /* __sync_lock_test_and_set_di */
6936 tree di_ftype_pdi_di
6937 = build_function_type (long_integer_type_node,
6938 tree_cons (NULL_TREE, pdi_type_node,
6939 tree_cons (NULL_TREE, long_integer_type_node,
6940 endlink)));
6942 /* __sync_lock_release_si */
6943 tree void_ftype_psi
6944 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
6945 endlink));
6947 /* __sync_lock_release_di */
6948 tree void_ftype_pdi
6949 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
6950 endlink));
6952 #define def_builtin(name, type, code) \
6953 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
6955 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
6956 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
6957 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
6958 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
6959 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
6960 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
6961 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
6962 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
6964 def_builtin ("__sync_synchronize", void_ftype_void,
6965 IA64_BUILTIN_SYNCHRONIZE);
6967 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
6968 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
6969 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
6970 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
6971 def_builtin ("__sync_lock_release_si", void_ftype_psi,
6972 IA64_BUILTIN_LOCK_RELEASE_SI);
6973 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
6974 IA64_BUILTIN_LOCK_RELEASE_DI);
6976 def_builtin ("__builtin_ia64_bsp",
6977 build_function_type (ptr_type_node, endlink),
6978 IA64_BUILTIN_BSP);
6980 def_builtin ("__builtin_ia64_flushrs",
6981 build_function_type (void_type_node, endlink),
6982 IA64_BUILTIN_FLUSHRS);
6984 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
6985 IA64_BUILTIN_FETCH_AND_ADD_SI);
6986 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
6987 IA64_BUILTIN_FETCH_AND_SUB_SI);
6988 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
6989 IA64_BUILTIN_FETCH_AND_OR_SI);
6990 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
6991 IA64_BUILTIN_FETCH_AND_AND_SI);
6992 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
6993 IA64_BUILTIN_FETCH_AND_XOR_SI);
6994 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
6995 IA64_BUILTIN_FETCH_AND_NAND_SI);
6997 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
6998 IA64_BUILTIN_ADD_AND_FETCH_SI);
6999 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7000 IA64_BUILTIN_SUB_AND_FETCH_SI);
7001 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7002 IA64_BUILTIN_OR_AND_FETCH_SI);
7003 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7004 IA64_BUILTIN_AND_AND_FETCH_SI);
7005 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7006 IA64_BUILTIN_XOR_AND_FETCH_SI);
7007 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7008 IA64_BUILTIN_NAND_AND_FETCH_SI);
7010 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7011 IA64_BUILTIN_FETCH_AND_ADD_DI);
7012 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7013 IA64_BUILTIN_FETCH_AND_SUB_DI);
7014 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7015 IA64_BUILTIN_FETCH_AND_OR_DI);
7016 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7017 IA64_BUILTIN_FETCH_AND_AND_DI);
7018 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7019 IA64_BUILTIN_FETCH_AND_XOR_DI);
7020 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7021 IA64_BUILTIN_FETCH_AND_NAND_DI);
7023 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7024 IA64_BUILTIN_ADD_AND_FETCH_DI);
7025 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7026 IA64_BUILTIN_SUB_AND_FETCH_DI);
7027 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7028 IA64_BUILTIN_OR_AND_FETCH_DI);
7029 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7030 IA64_BUILTIN_AND_AND_FETCH_DI);
7031 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7032 IA64_BUILTIN_XOR_AND_FETCH_DI);
7033 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7034 IA64_BUILTIN_NAND_AND_FETCH_DI);
7036 #undef def_builtin
7039 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7042 tmp = [ptr];
7043 do {
7044 ret = tmp;
7045 ar.ccv = tmp;
7046 tmp <op>= value;
7047 cmpxchgsz.acq tmp = [ptr], tmp
7048 } while (tmp != ret)
7051 static rtx
7052 ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7053 optab binoptab;
7054 enum machine_mode mode;
7055 tree arglist;
7056 rtx target;
7058 rtx ret, label, tmp, ccv, insn, mem, value;
7059 tree arg0, arg1;
7061 arg0 = TREE_VALUE (arglist);
7062 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7063 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7064 value = expand_expr (arg1, NULL_RTX, mode, 0);
7066 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7067 MEM_VOLATILE_P (mem) = 1;
7069 if (target && register_operand (target, mode))
7070 ret = target;
7071 else
7072 ret = gen_reg_rtx (mode);
7074 emit_insn (gen_mf ());
7076 /* Special case for fetchadd instructions. */
7077 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7079 if (mode == SImode)
7080 insn = gen_fetchadd_acq_si (ret, mem, value);
7081 else
7082 insn = gen_fetchadd_acq_di (ret, mem, value);
7083 emit_insn (insn);
7084 return ret;
7087 tmp = gen_reg_rtx (mode);
7088 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7089 emit_move_insn (tmp, mem);
7091 label = gen_label_rtx ();
7092 emit_label (label);
7093 emit_move_insn (ret, tmp);
7094 emit_move_insn (ccv, tmp);
7096 /* Perform the specific operation. Special case NAND by noticing
7097 one_cmpl_optab instead. */
7098 if (binoptab == one_cmpl_optab)
7100 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7101 binoptab = and_optab;
7103 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7105 if (mode == SImode)
7106 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7107 else
7108 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7109 emit_insn (insn);
7111 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, 0, label);
7113 return ret;
7116 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7119 tmp = [ptr];
7120 do {
7121 old = tmp;
7122 ar.ccv = tmp;
7123 ret = tmp + value;
7124 cmpxchgsz.acq tmp = [ptr], ret
7125 } while (tmp != old)
7128 static rtx
7129 ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7130 optab binoptab;
7131 enum machine_mode mode;
7132 tree arglist;
7133 rtx target;
7135 rtx old, label, tmp, ret, ccv, insn, mem, value;
7136 tree arg0, arg1;
7138 arg0 = TREE_VALUE (arglist);
7139 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7140 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7141 value = expand_expr (arg1, NULL_RTX, mode, 0);
7143 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7144 MEM_VOLATILE_P (mem) = 1;
7146 if (target && ! register_operand (target, mode))
7147 target = NULL_RTX;
7149 emit_insn (gen_mf ());
7150 tmp = gen_reg_rtx (mode);
7151 old = gen_reg_rtx (mode);
7152 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7154 emit_move_insn (tmp, mem);
7156 label = gen_label_rtx ();
7157 emit_label (label);
7158 emit_move_insn (old, tmp);
7159 emit_move_insn (ccv, tmp);
7161 /* Perform the specific operation. Special case NAND by noticing
7162 one_cmpl_optab instead. */
7163 if (binoptab == one_cmpl_optab)
7165 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7166 binoptab = and_optab;
7168 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
7170 if (mode == SImode)
7171 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
7172 else
7173 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7174 emit_insn (insn);
7176 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, 0, label);
7178 return ret;
7181 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7183 ar.ccv = oldval
7185 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7186 return ret
7188 For bool_ it's the same except return ret == oldval.
7191 static rtx
7192 ia64_expand_compare_and_swap (mode, boolp, arglist, target)
7193 enum machine_mode mode;
7194 int boolp;
7195 tree arglist;
7196 rtx target;
7198 tree arg0, arg1, arg2;
7199 rtx mem, old, new, ccv, tmp, insn;
7201 arg0 = TREE_VALUE (arglist);
7202 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7203 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7204 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7205 old = expand_expr (arg1, NULL_RTX, mode, 0);
7206 new = expand_expr (arg2, NULL_RTX, mode, 0);
7208 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7209 MEM_VOLATILE_P (mem) = 1;
7211 if (! register_operand (old, mode))
7212 old = copy_to_mode_reg (mode, old);
7213 if (! register_operand (new, mode))
7214 new = copy_to_mode_reg (mode, new);
7216 if (! boolp && target && register_operand (target, mode))
7217 tmp = target;
7218 else
7219 tmp = gen_reg_rtx (mode);
7221 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7222 emit_move_insn (ccv, old);
7223 emit_insn (gen_mf ());
7224 if (mode == SImode)
7225 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7226 else
7227 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7228 emit_insn (insn);
7230 if (boolp)
7232 if (! target)
7233 target = gen_reg_rtx (mode);
7234 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
7236 else
7237 return tmp;
7240 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7242 static rtx
7243 ia64_expand_lock_test_and_set (mode, arglist, target)
7244 enum machine_mode mode;
7245 tree arglist;
7246 rtx target;
7248 tree arg0, arg1;
7249 rtx mem, new, ret, insn;
7251 arg0 = TREE_VALUE (arglist);
7252 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7253 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7254 new = expand_expr (arg1, NULL_RTX, mode, 0);
7256 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7257 MEM_VOLATILE_P (mem) = 1;
7258 if (! register_operand (new, mode))
7259 new = copy_to_mode_reg (mode, new);
7261 if (target && register_operand (target, mode))
7262 ret = target;
7263 else
7264 ret = gen_reg_rtx (mode);
7266 if (mode == SImode)
7267 insn = gen_xchgsi (ret, mem, new);
7268 else
7269 insn = gen_xchgdi (ret, mem, new);
7270 emit_insn (insn);
7272 return ret;
7275 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7277 static rtx
7278 ia64_expand_lock_release (mode, arglist, target)
7279 enum machine_mode mode;
7280 tree arglist;
7281 rtx target ATTRIBUTE_UNUSED;
7283 tree arg0;
7284 rtx mem;
7286 arg0 = TREE_VALUE (arglist);
7287 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7289 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7290 MEM_VOLATILE_P (mem) = 1;
7292 emit_move_insn (mem, const0_rtx);
7294 return const0_rtx;
7298 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
7299 tree exp;
7300 rtx target;
7301 rtx subtarget ATTRIBUTE_UNUSED;
7302 enum machine_mode mode ATTRIBUTE_UNUSED;
7303 int ignore ATTRIBUTE_UNUSED;
7305 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7306 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
7307 tree arglist = TREE_OPERAND (exp, 1);
7309 switch (fcode)
7311 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7312 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7313 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7314 case IA64_BUILTIN_LOCK_RELEASE_SI:
7315 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7316 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7317 case IA64_BUILTIN_FETCH_AND_OR_SI:
7318 case IA64_BUILTIN_FETCH_AND_AND_SI:
7319 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7320 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7321 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7322 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7323 case IA64_BUILTIN_OR_AND_FETCH_SI:
7324 case IA64_BUILTIN_AND_AND_FETCH_SI:
7325 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7326 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7327 mode = SImode;
7328 break;
7330 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7331 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7332 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7333 case IA64_BUILTIN_LOCK_RELEASE_DI:
7334 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7335 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7336 case IA64_BUILTIN_FETCH_AND_OR_DI:
7337 case IA64_BUILTIN_FETCH_AND_AND_DI:
7338 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7339 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7340 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7341 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7342 case IA64_BUILTIN_OR_AND_FETCH_DI:
7343 case IA64_BUILTIN_AND_AND_FETCH_DI:
7344 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7345 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7346 mode = DImode;
7347 break;
7349 default:
7350 break;
7353 switch (fcode)
7355 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7356 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7357 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
7359 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7360 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7361 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
7363 case IA64_BUILTIN_SYNCHRONIZE:
7364 emit_insn (gen_mf ());
7365 return const0_rtx;
7367 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7368 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7369 return ia64_expand_lock_test_and_set (mode, arglist, target);
7371 case IA64_BUILTIN_LOCK_RELEASE_SI:
7372 case IA64_BUILTIN_LOCK_RELEASE_DI:
7373 return ia64_expand_lock_release (mode, arglist, target);
7375 case IA64_BUILTIN_BSP:
7376 if (! target || ! register_operand (target, DImode))
7377 target = gen_reg_rtx (DImode);
7378 emit_insn (gen_bsp_value (target));
7379 return target;
7381 case IA64_BUILTIN_FLUSHRS:
7382 emit_insn (gen_flushrs ());
7383 return const0_rtx;
7385 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7386 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7387 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
7389 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7390 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7391 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
7393 case IA64_BUILTIN_FETCH_AND_OR_SI:
7394 case IA64_BUILTIN_FETCH_AND_OR_DI:
7395 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
7397 case IA64_BUILTIN_FETCH_AND_AND_SI:
7398 case IA64_BUILTIN_FETCH_AND_AND_DI:
7399 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
7401 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7402 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7403 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
7405 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7406 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7407 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
7409 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7410 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7411 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
7413 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7414 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7415 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
7417 case IA64_BUILTIN_OR_AND_FETCH_SI:
7418 case IA64_BUILTIN_OR_AND_FETCH_DI:
7419 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
7421 case IA64_BUILTIN_AND_AND_FETCH_SI:
7422 case IA64_BUILTIN_AND_AND_FETCH_DI:
7423 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
7425 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7426 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7427 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
7429 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7430 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7431 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
7433 default:
7434 break;
7437 return NULL_RTX;