Speed up ia64 compilations
[official-gcc.git] / gcc / config / ia64 / ia64.c
blob17f5768c6ec0fecaa90b71cbcc52697ae4e73e40
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-attr.h"
35 #include "flags.h"
36 #include "recog.h"
37 #include "expr.h"
38 #include "obstack.h"
39 #include "except.h"
40 #include "function.h"
41 #include "ggc.h"
42 #include "basic-block.h"
43 #include "toplev.h"
44 #include "sched-int.h"
46 /* This is used for communication between ASM_OUTPUT_LABEL and
47 ASM_OUTPUT_LABELREF. */
48 int ia64_asm_output_label = 0;
50 /* Define the information needed to generate branch and scc insns. This is
51 stored from the compare operation. */
52 struct rtx_def * ia64_compare_op0;
53 struct rtx_def * ia64_compare_op1;
55 /* Register names for ia64_expand_prologue. */
56 static const char * const ia64_reg_numbers[96] =
57 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
58 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
59 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
60 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
61 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
62 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
63 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
64 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
65 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
66 "r104","r105","r106","r107","r108","r109","r110","r111",
67 "r112","r113","r114","r115","r116","r117","r118","r119",
68 "r120","r121","r122","r123","r124","r125","r126","r127"};
70 /* ??? These strings could be shared with REGISTER_NAMES. */
71 static const char * const ia64_input_reg_names[8] =
72 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
74 /* ??? These strings could be shared with REGISTER_NAMES. */
75 static const char * const ia64_local_reg_names[80] =
76 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
77 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
78 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
79 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
80 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
81 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
82 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
83 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
84 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
85 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
87 /* ??? These strings could be shared with REGISTER_NAMES. */
88 static const char * const ia64_output_reg_names[8] =
89 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
91 /* String used with the -mfixed-range= option. */
92 const char *ia64_fixed_range_string;
94 /* Determines whether we run our final scheduling pass or not. We always
95 avoid the normal second scheduling pass. */
96 static int ia64_flag_schedule_insns2;
98 /* Variables which are this size or smaller are put in the sdata/sbss
99 sections. */
101 unsigned int ia64_section_threshold;
103 static int find_gr_spill PARAMS ((int));
104 static int next_scratch_gr_reg PARAMS ((void));
105 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
106 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
107 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
108 static void finish_spill_pointers PARAMS ((void));
109 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
110 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
111 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
112 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
113 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
114 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
116 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
117 static void fix_range PARAMS ((const char *));
118 static void ia64_add_gc_roots PARAMS ((void));
119 static void ia64_init_machine_status PARAMS ((struct function *));
120 static void ia64_mark_machine_status PARAMS ((struct function *));
121 static void ia64_free_machine_status PARAMS ((struct function *));
122 static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
123 static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
124 static void emit_predicate_relation_info PARAMS ((void));
125 static void process_epilogue PARAMS ((void));
126 static int process_set PARAMS ((FILE *, rtx));
128 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
129 tree, rtx));
130 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
131 tree, rtx));
132 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
133 tree, rtx));
134 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
135 tree, rtx));
136 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
138 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
141 call_operand (op, mode)
142 rtx op;
143 enum machine_mode mode;
145 if (mode != GET_MODE (op))
146 return 0;
148 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
149 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
152 /* Return 1 if OP refers to a symbol in the sdata section. */
155 sdata_symbolic_operand (op, mode)
156 rtx op;
157 enum machine_mode mode ATTRIBUTE_UNUSED;
159 switch (GET_CODE (op))
161 case CONST:
162 if (GET_CODE (XEXP (op, 0)) != PLUS
163 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
164 break;
165 op = XEXP (XEXP (op, 0), 0);
166 /* FALLTHRU */
168 case SYMBOL_REF:
169 if (CONSTANT_POOL_ADDRESS_P (op))
170 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
171 else
172 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
174 default:
175 break;
178 return 0;
181 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
184 got_symbolic_operand (op, mode)
185 rtx op;
186 enum machine_mode mode ATTRIBUTE_UNUSED;
188 switch (GET_CODE (op))
190 case CONST:
191 op = XEXP (op, 0);
192 if (GET_CODE (op) != PLUS)
193 return 0;
194 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
195 return 0;
196 op = XEXP (op, 1);
197 if (GET_CODE (op) != CONST_INT)
198 return 0;
200 return 1;
202 /* Ok if we're not using GOT entries at all. */
203 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
204 return 1;
206 /* "Ok" while emitting rtl, since otherwise we won't be provided
207 with the entire offset during emission, which makes it very
208 hard to split the offset into high and low parts. */
209 if (rtx_equal_function_value_matters)
210 return 1;
212 /* Force the low 14 bits of the constant to zero so that we do not
213 use up so many GOT entries. */
214 return (INTVAL (op) & 0x3fff) == 0;
216 case SYMBOL_REF:
217 case LABEL_REF:
218 return 1;
220 default:
221 break;
223 return 0;
226 /* Return 1 if OP refers to a symbol. */
229 symbolic_operand (op, mode)
230 rtx op;
231 enum machine_mode mode ATTRIBUTE_UNUSED;
233 switch (GET_CODE (op))
235 case CONST:
236 case SYMBOL_REF:
237 case LABEL_REF:
238 return 1;
240 default:
241 break;
243 return 0;
246 /* Return 1 if OP refers to a function. */
249 function_operand (op, mode)
250 rtx op;
251 enum machine_mode mode ATTRIBUTE_UNUSED;
253 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
254 return 1;
255 else
256 return 0;
259 /* Return 1 if OP is setjmp or a similar function. */
261 /* ??? This is an unsatisfying solution. Should rethink. */
264 setjmp_operand (op, mode)
265 rtx op;
266 enum machine_mode mode ATTRIBUTE_UNUSED;
268 const char *name;
269 int retval = 0;
271 if (GET_CODE (op) != SYMBOL_REF)
272 return 0;
274 name = XSTR (op, 0);
276 /* The following code is borrowed from special_function_p in calls.c. */
278 /* Disregard prefix _, __ or __x. */
279 if (name[0] == '_')
281 if (name[1] == '_' && name[2] == 'x')
282 name += 3;
283 else if (name[1] == '_')
284 name += 2;
285 else
286 name += 1;
289 if (name[0] == 's')
291 retval
292 = ((name[1] == 'e'
293 && (! strcmp (name, "setjmp")
294 || ! strcmp (name, "setjmp_syscall")))
295 || (name[1] == 'i'
296 && ! strcmp (name, "sigsetjmp"))
297 || (name[1] == 'a'
298 && ! strcmp (name, "savectx")));
300 else if ((name[0] == 'q' && name[1] == 's'
301 && ! strcmp (name, "qsetjmp"))
302 || (name[0] == 'v' && name[1] == 'f'
303 && ! strcmp (name, "vfork")))
304 retval = 1;
306 return retval;
309 /* Return 1 if OP is a general operand, but when pic exclude symbolic
310 operands. */
312 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
313 from PREDICATE_CODES. */
316 move_operand (op, mode)
317 rtx op;
318 enum machine_mode mode;
320 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
321 return 0;
323 return general_operand (op, mode);
326 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
329 gr_register_operand (op, mode)
330 rtx op;
331 enum machine_mode mode;
333 if (! register_operand (op, mode))
334 return 0;
335 if (GET_CODE (op) == SUBREG)
336 op = SUBREG_REG (op);
337 if (GET_CODE (op) == REG)
339 unsigned int regno = REGNO (op);
340 if (regno < FIRST_PSEUDO_REGISTER)
341 return GENERAL_REGNO_P (regno);
343 return 1;
346 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
349 fr_register_operand (op, mode)
350 rtx op;
351 enum machine_mode mode;
353 if (! register_operand (op, mode))
354 return 0;
355 if (GET_CODE (op) == SUBREG)
356 op = SUBREG_REG (op);
357 if (GET_CODE (op) == REG)
359 unsigned int regno = REGNO (op);
360 if (regno < FIRST_PSEUDO_REGISTER)
361 return FR_REGNO_P (regno);
363 return 1;
366 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
369 grfr_register_operand (op, mode)
370 rtx op;
371 enum machine_mode mode;
373 if (! register_operand (op, mode))
374 return 0;
375 if (GET_CODE (op) == SUBREG)
376 op = SUBREG_REG (op);
377 if (GET_CODE (op) == REG)
379 unsigned int regno = REGNO (op);
380 if (regno < FIRST_PSEUDO_REGISTER)
381 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
383 return 1;
386 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
389 gr_nonimmediate_operand (op, mode)
390 rtx op;
391 enum machine_mode mode;
393 if (! nonimmediate_operand (op, mode))
394 return 0;
395 if (GET_CODE (op) == SUBREG)
396 op = SUBREG_REG (op);
397 if (GET_CODE (op) == REG)
399 unsigned int regno = REGNO (op);
400 if (regno < FIRST_PSEUDO_REGISTER)
401 return GENERAL_REGNO_P (regno);
403 return 1;
406 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
409 fr_nonimmediate_operand (op, mode)
410 rtx op;
411 enum machine_mode mode;
413 if (! nonimmediate_operand (op, mode))
414 return 0;
415 if (GET_CODE (op) == SUBREG)
416 op = SUBREG_REG (op);
417 if (GET_CODE (op) == REG)
419 unsigned int regno = REGNO (op);
420 if (regno < FIRST_PSEUDO_REGISTER)
421 return FR_REGNO_P (regno);
423 return 1;
426 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
429 grfr_nonimmediate_operand (op, mode)
430 rtx op;
431 enum machine_mode mode;
433 if (! nonimmediate_operand (op, mode))
434 return 0;
435 if (GET_CODE (op) == SUBREG)
436 op = SUBREG_REG (op);
437 if (GET_CODE (op) == REG)
439 unsigned int regno = REGNO (op);
440 if (regno < FIRST_PSEUDO_REGISTER)
441 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
443 return 1;
446 /* Return 1 if OP is a GR register operand, or zero. */
449 gr_reg_or_0_operand (op, mode)
450 rtx op;
451 enum machine_mode mode;
453 return (op == const0_rtx || gr_register_operand (op, mode));
456 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
459 gr_reg_or_5bit_operand (op, mode)
460 rtx op;
461 enum machine_mode mode;
463 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
464 || GET_CODE (op) == CONSTANT_P_RTX
465 || gr_register_operand (op, mode));
468 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
471 gr_reg_or_6bit_operand (op, mode)
472 rtx op;
473 enum machine_mode mode;
475 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
476 || GET_CODE (op) == CONSTANT_P_RTX
477 || gr_register_operand (op, mode));
480 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
483 gr_reg_or_8bit_operand (op, mode)
484 rtx op;
485 enum machine_mode mode;
487 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
488 || GET_CODE (op) == CONSTANT_P_RTX
489 || gr_register_operand (op, mode));
492 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
495 grfr_reg_or_8bit_operand (op, mode)
496 rtx op;
497 enum machine_mode mode;
499 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
500 || GET_CODE (op) == CONSTANT_P_RTX
501 || grfr_register_operand (op, mode));
504 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
505 operand. */
508 gr_reg_or_8bit_adjusted_operand (op, mode)
509 rtx op;
510 enum machine_mode mode;
512 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
513 || GET_CODE (op) == CONSTANT_P_RTX
514 || gr_register_operand (op, mode));
517 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
518 immediate and an 8 bit adjusted immediate operand. This is necessary
519 because when we emit a compare, we don't know what the condition will be,
520 so we need the union of the immediates accepted by GT and LT. */
523 gr_reg_or_8bit_and_adjusted_operand (op, mode)
524 rtx op;
525 enum machine_mode mode;
527 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
528 && CONST_OK_FOR_L (INTVAL (op)))
529 || GET_CODE (op) == CONSTANT_P_RTX
530 || gr_register_operand (op, mode));
533 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
536 gr_reg_or_14bit_operand (op, mode)
537 rtx op;
538 enum machine_mode mode;
540 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
541 || GET_CODE (op) == CONSTANT_P_RTX
542 || gr_register_operand (op, mode));
545 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
548 gr_reg_or_22bit_operand (op, mode)
549 rtx op;
550 enum machine_mode mode;
552 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
553 || GET_CODE (op) == CONSTANT_P_RTX
554 || gr_register_operand (op, mode));
557 /* Return 1 if OP is a 6 bit immediate operand. */
560 shift_count_operand (op, mode)
561 rtx op;
562 enum machine_mode mode ATTRIBUTE_UNUSED;
564 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
565 || GET_CODE (op) == CONSTANT_P_RTX);
568 /* Return 1 if OP is a 5 bit immediate operand. */
571 shift_32bit_count_operand (op, mode)
572 rtx op;
573 enum machine_mode mode ATTRIBUTE_UNUSED;
575 return ((GET_CODE (op) == CONST_INT
576 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
577 || GET_CODE (op) == CONSTANT_P_RTX);
580 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
583 shladd_operand (op, mode)
584 rtx op;
585 enum machine_mode mode ATTRIBUTE_UNUSED;
587 return (GET_CODE (op) == CONST_INT
588 && (INTVAL (op) == 2 || INTVAL (op) == 4
589 || INTVAL (op) == 8 || INTVAL (op) == 16));
592 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
595 fetchadd_operand (op, mode)
596 rtx op;
597 enum machine_mode mode ATTRIBUTE_UNUSED;
599 return (GET_CODE (op) == CONST_INT
600 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
601 INTVAL (op) == -4 || INTVAL (op) == -1 ||
602 INTVAL (op) == 1 || INTVAL (op) == 4 ||
603 INTVAL (op) == 8 || INTVAL (op) == 16));
606 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
609 fr_reg_or_fp01_operand (op, mode)
610 rtx op;
611 enum machine_mode mode;
613 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
614 || fr_register_operand (op, mode));
617 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
618 POST_MODIFY with a REG as displacement. */
621 destination_operand (op, mode)
622 rtx op;
623 enum machine_mode mode;
625 if (! nonimmediate_operand (op, mode))
626 return 0;
627 if (GET_CODE (op) == MEM
628 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
629 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
630 return 0;
631 return 1;
634 /* Like memory_operand, but don't allow post-increments. */
637 not_postinc_memory_operand (op, mode)
638 rtx op;
639 enum machine_mode mode;
641 return (memory_operand (op, mode)
642 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
645 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
646 signed immediate operand. */
649 normal_comparison_operator (op, mode)
650 register rtx op;
651 enum machine_mode mode;
653 enum rtx_code code = GET_CODE (op);
654 return ((mode == VOIDmode || GET_MODE (op) == mode)
655 && (code == EQ || code == NE
656 || code == GT || code == LE || code == GTU || code == LEU));
659 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
660 signed immediate operand. */
663 adjusted_comparison_operator (op, mode)
664 register rtx op;
665 enum machine_mode mode;
667 enum rtx_code code = GET_CODE (op);
668 return ((mode == VOIDmode || GET_MODE (op) == mode)
669 && (code == LT || code == GE || code == LTU || code == GEU));
672 /* Return 1 if this is a signed inequality operator. */
675 signed_inequality_operator (op, mode)
676 register rtx op;
677 enum machine_mode mode;
679 enum rtx_code code = GET_CODE (op);
680 return ((mode == VOIDmode || GET_MODE (op) == mode)
681 && (code == GE || code == GT
682 || code == LE || code == LT));
685 /* Return 1 if this operator is valid for predication. */
688 predicate_operator (op, mode)
689 register rtx op;
690 enum machine_mode mode;
692 enum rtx_code code = GET_CODE (op);
693 return ((GET_MODE (op) == mode || mode == VOIDmode)
694 && (code == EQ || code == NE));
697 /* Return 1 if this is the ar.lc register. */
700 ar_lc_reg_operand (op, mode)
701 register rtx op;
702 enum machine_mode mode;
704 return (GET_MODE (op) == DImode
705 && (mode == DImode || mode == VOIDmode)
706 && GET_CODE (op) == REG
707 && REGNO (op) == AR_LC_REGNUM);
710 /* Return 1 if this is the ar.ccv register. */
713 ar_ccv_reg_operand (op, mode)
714 register rtx op;
715 enum machine_mode mode;
717 return ((GET_MODE (op) == mode || mode == VOIDmode)
718 && GET_CODE (op) == REG
719 && REGNO (op) == AR_CCV_REGNUM);
722 /* Like general_operand, but don't allow (mem (addressof)). */
725 general_tfmode_operand (op, mode)
726 rtx op;
727 enum machine_mode mode;
729 if (! general_operand (op, mode))
730 return 0;
731 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
732 return 0;
733 return 1;
736 /* Similarly. */
739 destination_tfmode_operand (op, mode)
740 rtx op;
741 enum machine_mode mode;
743 if (! destination_operand (op, mode))
744 return 0;
745 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
746 return 0;
747 return 1;
750 /* Similarly. */
753 tfreg_or_fp01_operand (op, mode)
754 rtx op;
755 enum machine_mode mode;
757 if (GET_CODE (op) == SUBREG)
758 return 0;
759 return fr_reg_or_fp01_operand (op, mode);
762 /* Return 1 if the operands of a move are ok. */
765 ia64_move_ok (dst, src)
766 rtx dst, src;
768 /* If we're under init_recog_no_volatile, we'll not be able to use
769 memory_operand. So check the code directly and don't worry about
770 the validity of the underlying address, which should have been
771 checked elsewhere anyway. */
772 if (GET_CODE (dst) != MEM)
773 return 1;
774 if (GET_CODE (src) == MEM)
775 return 0;
776 if (register_operand (src, VOIDmode))
777 return 1;
779 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
780 if (INTEGRAL_MODE_P (GET_MODE (dst)))
781 return src == const0_rtx;
782 else
783 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
786 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
787 Return the length of the field, or <= 0 on failure. */
790 ia64_depz_field_mask (rop, rshift)
791 rtx rop, rshift;
793 unsigned HOST_WIDE_INT op = INTVAL (rop);
794 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
796 /* Get rid of the zero bits we're shifting in. */
797 op >>= shift;
799 /* We must now have a solid block of 1's at bit 0. */
800 return exact_log2 (op + 1);
803 /* Expand a symbolic constant load. */
804 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
806 void
807 ia64_expand_load_address (dest, src, scratch)
808 rtx dest, src, scratch;
810 rtx temp;
812 /* The destination could be a MEM during initial rtl generation,
813 which isn't a valid destination for the PIC load address patterns. */
814 if (! register_operand (dest, DImode))
815 temp = gen_reg_rtx (DImode);
816 else
817 temp = dest;
819 if (TARGET_AUTO_PIC)
820 emit_insn (gen_load_gprel64 (temp, src));
821 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
822 emit_insn (gen_load_fptr (temp, src));
823 else if (sdata_symbolic_operand (src, DImode))
824 emit_insn (gen_load_gprel (temp, src));
825 else if (GET_CODE (src) == CONST
826 && GET_CODE (XEXP (src, 0)) == PLUS
827 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
828 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
830 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
831 rtx sym = XEXP (XEXP (src, 0), 0);
832 HOST_WIDE_INT ofs, hi, lo;
834 /* Split the offset into a sign extended 14-bit low part
835 and a complementary high part. */
836 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
837 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
838 hi = ofs - lo;
840 if (! scratch)
841 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
843 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
844 scratch));
845 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
847 else
849 rtx insn;
850 if (! scratch)
851 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
853 insn = emit_insn (gen_load_symptr (temp, src, scratch));
854 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
857 if (temp != dest)
858 emit_move_insn (dest, temp);
862 ia64_gp_save_reg (setjmp_p)
863 int setjmp_p;
865 rtx save = cfun->machine->ia64_gp_save;
867 if (save != NULL)
869 /* We can't save GP in a pseudo if we are calling setjmp, because
870 pseudos won't be restored by longjmp. For now, we save it in r4. */
871 /* ??? It would be more efficient to save this directly into a stack
872 slot. Unfortunately, the stack slot address gets cse'd across
873 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
874 place. */
876 /* ??? Get the barf bag, Virginia. We've got to replace this thing
877 in place, since this rtx is used in exception handling receivers.
878 Moreover, we must get this rtx out of regno_reg_rtx or reload
879 will do the wrong thing. */
880 unsigned int old_regno = REGNO (save);
881 if (setjmp_p && old_regno != GR_REG (4))
883 REGNO (save) = GR_REG (4);
884 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
887 else
889 if (setjmp_p)
890 save = gen_rtx_REG (DImode, GR_REG (4));
891 else if (! optimize)
892 save = gen_rtx_REG (DImode, LOC_REG (0));
893 else
894 save = gen_reg_rtx (DImode);
895 cfun->machine->ia64_gp_save = save;
898 return save;
901 /* Split a post-reload TImode reference into two DImode components. */
904 ia64_split_timode (out, in, scratch)
905 rtx out[2];
906 rtx in, scratch;
908 switch (GET_CODE (in))
910 case REG:
911 out[0] = gen_rtx_REG (DImode, REGNO (in));
912 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
913 return NULL_RTX;
915 case MEM:
917 rtx base = XEXP (in, 0);
919 switch (GET_CODE (base))
921 case REG:
922 out[0] = change_address (in, DImode, NULL_RTX);
923 break;
924 case POST_MODIFY:
925 base = XEXP (base, 0);
926 out[0] = change_address (in, DImode, NULL_RTX);
927 break;
929 /* Since we're changing the mode, we need to change to POST_MODIFY
930 as well to preserve the size of the increment. Either that or
931 do the update in two steps, but we've already got this scratch
932 register handy so let's use it. */
933 case POST_INC:
934 base = XEXP (base, 0);
935 out[0] = change_address (in, DImode,
936 gen_rtx_POST_MODIFY (Pmode, base,plus_constant (base, 16)));
937 break;
938 case POST_DEC:
939 base = XEXP (base, 0);
940 out[0] = change_address (in, DImode,
941 gen_rtx_POST_MODIFY (Pmode, base,plus_constant (base, -16)));
942 break;
943 default:
944 abort ();
947 if (scratch == NULL_RTX)
948 abort ();
949 out[1] = change_address (in, DImode, scratch);
950 return gen_adddi3 (scratch, base, GEN_INT (8));
953 case CONST_INT:
954 case CONST_DOUBLE:
955 split_double (in, &out[0], &out[1]);
956 return NULL_RTX;
958 default:
959 abort ();
963 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
964 through memory plus an extra GR scratch register. Except that you can
965 either get the first from SECONDARY_MEMORY_NEEDED or the second from
966 SECONDARY_RELOAD_CLASS, but not both.
968 We got into problems in the first place by allowing a construct like
969 (subreg:TF (reg:TI)), which we got from a union containing a long double.
970 This solution attempts to prevent this situation from ocurring. When
971 we see something like the above, we spill the inner register to memory. */
974 spill_tfmode_operand (in, force)
975 rtx in;
976 int force;
978 if (GET_CODE (in) == SUBREG
979 && GET_MODE (SUBREG_REG (in)) == TImode
980 && GET_CODE (SUBREG_REG (in)) == REG)
982 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
983 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
985 else if (force && GET_CODE (in) == REG)
987 rtx mem = gen_mem_addressof (in, NULL_TREE);
988 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
990 else if (GET_CODE (in) == MEM
991 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
993 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
995 else
996 return in;
999 /* Emit comparison instruction if necessary, returning the expression
1000 that holds the compare result in the proper mode. */
1003 ia64_expand_compare (code, mode)
1004 enum rtx_code code;
1005 enum machine_mode mode;
1007 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1008 rtx cmp;
1010 /* If we have a BImode input, then we already have a compare result, and
1011 do not need to emit another comparison. */
1012 if (GET_MODE (op0) == BImode)
1014 if ((code == NE || code == EQ) && op1 == const0_rtx)
1015 cmp = op0;
1016 else
1017 abort ();
1019 else
1021 cmp = gen_reg_rtx (BImode);
1022 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1023 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1024 code = NE;
1027 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1030 /* Emit the appropriate sequence for a call. */
1032 void
1033 ia64_expand_call (retval, addr, nextarg, sibcall_p)
1034 rtx retval;
1035 rtx addr;
1036 rtx nextarg;
1037 int sibcall_p;
1039 rtx insn, b0, gp_save, narg_rtx;
1040 int narg;
1042 addr = XEXP (addr, 0);
1043 b0 = gen_rtx_REG (DImode, R_BR (0));
1045 if (! nextarg)
1046 narg = 0;
1047 else if (IN_REGNO_P (REGNO (nextarg)))
1048 narg = REGNO (nextarg) - IN_REG (0);
1049 else
1050 narg = REGNO (nextarg) - OUT_REG (0);
1051 narg_rtx = GEN_INT (narg);
1053 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1055 if (sibcall_p)
1056 insn = gen_sibcall_nopic (addr, narg_rtx, b0);
1057 else if (! retval)
1058 insn = gen_call_nopic (addr, narg_rtx, b0);
1059 else
1060 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1061 emit_call_insn (insn);
1062 return;
1065 if (sibcall_p)
1066 gp_save = NULL_RTX;
1067 else
1068 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1070 /* If this is an indirect call, then we have the address of a descriptor. */
1071 if (! symbolic_operand (addr, VOIDmode))
1073 rtx dest;
1075 if (! sibcall_p)
1076 emit_move_insn (gp_save, pic_offset_table_rtx);
1078 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1079 emit_move_insn (pic_offset_table_rtx,
1080 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
1082 if (sibcall_p)
1083 insn = gen_sibcall_pic (dest, narg_rtx, b0);
1084 else if (! retval)
1085 insn = gen_call_pic (dest, narg_rtx, b0);
1086 else
1087 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1088 emit_call_insn (insn);
1090 if (! sibcall_p)
1091 emit_move_insn (pic_offset_table_rtx, gp_save);
1093 else if (TARGET_CONST_GP)
1095 if (sibcall_p)
1096 insn = gen_sibcall_nopic (addr, narg_rtx, b0);
1097 else if (! retval)
1098 insn = gen_call_nopic (addr, narg_rtx, b0);
1099 else
1100 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1101 emit_call_insn (insn);
1103 else
1105 if (sibcall_p)
1106 emit_call_insn (gen_sibcall_pic (addr, narg_rtx, b0));
1107 else
1109 emit_move_insn (gp_save, pic_offset_table_rtx);
1111 if (! retval)
1112 insn = gen_call_pic (addr, narg_rtx, b0);
1113 else
1114 insn = gen_call_value_pic (retval, addr, narg_rtx, b0);
1115 emit_call_insn (insn);
1117 emit_move_insn (pic_offset_table_rtx, gp_save);
1122 /* Begin the assembly file. */
1124 void
1125 emit_safe_across_calls (f)
1126 FILE *f;
1128 unsigned int rs, re;
1129 int out_state;
1131 rs = 1;
1132 out_state = 0;
1133 while (1)
1135 while (rs < 64 && call_used_regs[PR_REG (rs)])
1136 rs++;
1137 if (rs >= 64)
1138 break;
1139 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1140 continue;
1141 if (out_state == 0)
1143 fputs ("\t.pred.safe_across_calls ", f);
1144 out_state = 1;
1146 else
1147 fputc (',', f);
1148 if (re == rs + 1)
1149 fprintf (f, "p%u", rs);
1150 else
1151 fprintf (f, "p%u-p%u", rs, re - 1);
1152 rs = re + 1;
1154 if (out_state)
1155 fputc ('\n', f);
1159 /* Structure to be filled in by ia64_compute_frame_size with register
1160 save masks and offsets for the current function. */
1162 struct ia64_frame_info
1164 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1165 the caller's scratch area. */
1166 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1167 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1168 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
1169 HARD_REG_SET mask; /* mask of saved registers. */
1170 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1171 registers or long-term scratches. */
1172 int n_spilled; /* number of spilled registers. */
1173 int reg_fp; /* register for fp. */
1174 int reg_save_b0; /* save register for b0. */
1175 int reg_save_pr; /* save register for prs. */
1176 int reg_save_ar_pfs; /* save register for ar.pfs. */
1177 int reg_save_ar_unat; /* save register for ar.unat. */
1178 int reg_save_ar_lc; /* save register for ar.lc. */
1179 int n_input_regs; /* number of input registers used. */
1180 int n_local_regs; /* number of local registers used. */
1181 int n_output_regs; /* number of output registers used. */
1182 int n_rotate_regs; /* number of rotating registers used. */
1184 char need_regstk; /* true if a .regstk directive needed. */
1185 char initialized; /* true if the data is finalized. */
1188 /* Current frame information calculated by ia64_compute_frame_size. */
1189 static struct ia64_frame_info current_frame_info;
1191 /* Helper function for ia64_compute_frame_size: find an appropriate general
1192 register to spill some special register to. SPECIAL_SPILL_MASK contains
1193 bits in GR0 to GR31 that have already been allocated by this routine.
1194 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1196 static int
1197 find_gr_spill (try_locals)
1198 int try_locals;
1200 int regno;
1202 /* If this is a leaf function, first try an otherwise unused
1203 call-clobbered register. */
1204 if (current_function_is_leaf)
1206 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1207 if (! regs_ever_live[regno]
1208 && call_used_regs[regno]
1209 && ! fixed_regs[regno]
1210 && ! global_regs[regno]
1211 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1213 current_frame_info.gr_used_mask |= 1 << regno;
1214 return regno;
1218 if (try_locals)
1220 regno = current_frame_info.n_local_regs;
1221 /* If there is a frame pointer, then we can't use loc79, because
1222 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1223 reg_name switching code in ia64_expand_prologue. */
1224 if (regno < (80 - frame_pointer_needed))
1226 current_frame_info.n_local_regs = regno + 1;
1227 return LOC_REG (0) + regno;
1231 /* Failed to find a general register to spill to. Must use stack. */
1232 return 0;
1235 /* In order to make for nice schedules, we try to allocate every temporary
1236 to a different register. We must of course stay away from call-saved,
1237 fixed, and global registers. We must also stay away from registers
1238 allocated in current_frame_info.gr_used_mask, since those include regs
1239 used all through the prologue.
1241 Any register allocated here must be used immediately. The idea is to
1242 aid scheduling, not to solve data flow problems. */
1244 static int last_scratch_gr_reg;
1246 static int
1247 next_scratch_gr_reg ()
1249 int i, regno;
1251 for (i = 0; i < 32; ++i)
1253 regno = (last_scratch_gr_reg + i + 1) & 31;
1254 if (call_used_regs[regno]
1255 && ! fixed_regs[regno]
1256 && ! global_regs[regno]
1257 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1259 last_scratch_gr_reg = regno;
1260 return regno;
1264 /* There must be _something_ available. */
1265 abort ();
1268 /* Helper function for ia64_compute_frame_size, called through
1269 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1271 static void
1272 mark_reg_gr_used_mask (reg, data)
1273 rtx reg;
1274 void *data ATTRIBUTE_UNUSED;
1276 unsigned int regno = REGNO (reg);
1277 if (regno < 32)
1278 current_frame_info.gr_used_mask |= 1 << regno;
1281 /* Returns the number of bytes offset between the frame pointer and the stack
1282 pointer for the current function. SIZE is the number of bytes of space
1283 needed for local variables. */
1285 static void
1286 ia64_compute_frame_size (size)
1287 HOST_WIDE_INT size;
1289 HOST_WIDE_INT total_size;
1290 HOST_WIDE_INT spill_size = 0;
1291 HOST_WIDE_INT extra_spill_size = 0;
1292 HOST_WIDE_INT pretend_args_size;
1293 HARD_REG_SET mask;
1294 int n_spilled = 0;
1295 int spilled_gr_p = 0;
1296 int spilled_fr_p = 0;
1297 unsigned int regno;
1298 int i;
1300 if (current_frame_info.initialized)
1301 return;
1303 memset (&current_frame_info, 0, sizeof current_frame_info);
1304 CLEAR_HARD_REG_SET (mask);
1306 /* Don't allocate scratches to the return register. */
1307 diddle_return_value (mark_reg_gr_used_mask, NULL);
1309 /* Don't allocate scratches to the EH scratch registers. */
1310 if (cfun->machine->ia64_eh_epilogue_sp)
1311 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1312 if (cfun->machine->ia64_eh_epilogue_bsp)
1313 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1315 /* Find the size of the register stack frame. We have only 80 local
1316 registers, because we reserve 8 for the inputs and 8 for the
1317 outputs. */
1319 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1320 since we'll be adjusting that down later. */
1321 regno = LOC_REG (78) + ! frame_pointer_needed;
1322 for (; regno >= LOC_REG (0); regno--)
1323 if (regs_ever_live[regno])
1324 break;
1325 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1327 /* For functions marked with the syscall_linkage attribute, we must mark
1328 all eight input registers as in use, so that locals aren't visible to
1329 the caller. */
1331 if (cfun->machine->n_varargs > 0
1332 || lookup_attribute ("syscall_linkage",
1333 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1334 current_frame_info.n_input_regs = 8;
1335 else
1337 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1338 if (regs_ever_live[regno])
1339 break;
1340 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1343 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1344 if (regs_ever_live[regno])
1345 break;
1346 i = regno - OUT_REG (0) + 1;
1348 /* When -p profiling, we need one output register for the mcount argument.
1349 Likwise for -a profiling for the bb_init_func argument. For -ax
1350 profiling, we need two output registers for the two bb_init_trace_func
1351 arguments. */
1352 if (profile_flag || profile_block_flag == 1)
1353 i = MAX (i, 1);
1354 else if (profile_block_flag == 2)
1355 i = MAX (i, 2);
1356 current_frame_info.n_output_regs = i;
1358 /* ??? No rotating register support yet. */
1359 current_frame_info.n_rotate_regs = 0;
1361 /* Discover which registers need spilling, and how much room that
1362 will take. Begin with floating point and general registers,
1363 which will always wind up on the stack. */
1365 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1366 if (regs_ever_live[regno] && ! call_used_regs[regno])
1368 SET_HARD_REG_BIT (mask, regno);
1369 spill_size += 16;
1370 n_spilled += 1;
1371 spilled_fr_p = 1;
1374 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1375 if (regs_ever_live[regno] && ! call_used_regs[regno])
1377 SET_HARD_REG_BIT (mask, regno);
1378 spill_size += 8;
1379 n_spilled += 1;
1380 spilled_gr_p = 1;
1383 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1384 if (regs_ever_live[regno] && ! call_used_regs[regno])
1386 SET_HARD_REG_BIT (mask, regno);
1387 spill_size += 8;
1388 n_spilled += 1;
1391 /* Now come all special registers that might get saved in other
1392 general registers. */
1394 if (frame_pointer_needed)
1396 current_frame_info.reg_fp = find_gr_spill (1);
1397 /* If we did not get a register, then we take LOC79. This is guaranteed
1398 to be free, even if regs_ever_live is already set, because this is
1399 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1400 as we don't count loc79 above. */
1401 if (current_frame_info.reg_fp == 0)
1403 current_frame_info.reg_fp = LOC_REG (79);
1404 current_frame_info.n_local_regs++;
1408 if (! current_function_is_leaf)
1410 /* Emit a save of BR0 if we call other functions. Do this even
1411 if this function doesn't return, as EH depends on this to be
1412 able to unwind the stack. */
1413 SET_HARD_REG_BIT (mask, BR_REG (0));
1415 current_frame_info.reg_save_b0 = find_gr_spill (1);
1416 if (current_frame_info.reg_save_b0 == 0)
1418 spill_size += 8;
1419 n_spilled += 1;
1422 /* Similarly for ar.pfs. */
1423 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1424 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1425 if (current_frame_info.reg_save_ar_pfs == 0)
1427 extra_spill_size += 8;
1428 n_spilled += 1;
1431 else
1433 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1435 SET_HARD_REG_BIT (mask, BR_REG (0));
1436 spill_size += 8;
1437 n_spilled += 1;
1441 /* Unwind descriptor hackery: things are most efficient if we allocate
1442 consecutive GR save registers for RP, PFS, FP in that order. However,
1443 it is absolutely critical that FP get the only hard register that's
1444 guaranteed to be free, so we allocated it first. If all three did
1445 happen to be allocated hard regs, and are consecutive, rearrange them
1446 into the preferred order now. */
1447 if (current_frame_info.reg_fp != 0
1448 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1449 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1451 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1452 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1453 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1456 /* See if we need to store the predicate register block. */
1457 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1458 if (regs_ever_live[regno] && ! call_used_regs[regno])
1459 break;
1460 if (regno <= PR_REG (63))
1462 SET_HARD_REG_BIT (mask, PR_REG (0));
1463 current_frame_info.reg_save_pr = find_gr_spill (1);
1464 if (current_frame_info.reg_save_pr == 0)
1466 extra_spill_size += 8;
1467 n_spilled += 1;
1470 /* ??? Mark them all as used so that register renaming and such
1471 are free to use them. */
1472 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1473 regs_ever_live[regno] = 1;
1476 /* If we're forced to use st8.spill, we're forced to save and restore
1477 ar.unat as well. */
1478 if (spilled_gr_p || cfun->machine->n_varargs)
1480 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1481 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1482 if (current_frame_info.reg_save_ar_unat == 0)
1484 extra_spill_size += 8;
1485 n_spilled += 1;
1489 if (regs_ever_live[AR_LC_REGNUM])
1491 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1492 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1493 if (current_frame_info.reg_save_ar_lc == 0)
1495 extra_spill_size += 8;
1496 n_spilled += 1;
1500 /* If we have an odd number of words of pretend arguments written to
1501 the stack, then the FR save area will be unaligned. We round the
1502 size of this area up to keep things 16 byte aligned. */
1503 if (spilled_fr_p)
1504 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1505 else
1506 pretend_args_size = current_function_pretend_args_size;
1508 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1509 + current_function_outgoing_args_size);
1510 total_size = IA64_STACK_ALIGN (total_size);
1512 /* We always use the 16-byte scratch area provided by the caller, but
1513 if we are a leaf function, there's no one to which we need to provide
1514 a scratch area. */
1515 if (current_function_is_leaf)
1516 total_size = MAX (0, total_size - 16);
1518 current_frame_info.total_size = total_size;
1519 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1520 current_frame_info.spill_size = spill_size;
1521 current_frame_info.extra_spill_size = extra_spill_size;
1522 COPY_HARD_REG_SET (current_frame_info.mask, mask);
1523 current_frame_info.n_spilled = n_spilled;
1524 current_frame_info.initialized = reload_completed;
1527 /* Compute the initial difference between the specified pair of registers. */
1529 HOST_WIDE_INT
1530 ia64_initial_elimination_offset (from, to)
1531 int from, to;
1533 HOST_WIDE_INT offset;
1535 ia64_compute_frame_size (get_frame_size ());
1536 switch (from)
1538 case FRAME_POINTER_REGNUM:
1539 if (to == HARD_FRAME_POINTER_REGNUM)
1541 if (current_function_is_leaf)
1542 offset = -current_frame_info.total_size;
1543 else
1544 offset = -(current_frame_info.total_size
1545 - current_function_outgoing_args_size - 16);
1547 else if (to == STACK_POINTER_REGNUM)
1549 if (current_function_is_leaf)
1550 offset = 0;
1551 else
1552 offset = 16 + current_function_outgoing_args_size;
1554 else
1555 abort ();
1556 break;
1558 case ARG_POINTER_REGNUM:
1559 /* Arguments start above the 16 byte save area, unless stdarg
1560 in which case we store through the 16 byte save area. */
1561 if (to == HARD_FRAME_POINTER_REGNUM)
1562 offset = 16 - current_function_pretend_args_size;
1563 else if (to == STACK_POINTER_REGNUM)
1564 offset = (current_frame_info.total_size
1565 + 16 - current_function_pretend_args_size);
1566 else
1567 abort ();
1568 break;
1570 case RETURN_ADDRESS_POINTER_REGNUM:
1571 offset = 0;
1572 break;
1574 default:
1575 abort ();
1578 return offset;
1581 /* If there are more than a trivial number of register spills, we use
1582 two interleaved iterators so that we can get two memory references
1583 per insn group.
1585 In order to simplify things in the prologue and epilogue expanders,
1586 we use helper functions to fix up the memory references after the
1587 fact with the appropriate offsets to a POST_MODIFY memory mode.
1588 The following data structure tracks the state of the two iterators
1589 while insns are being emitted. */
1591 struct spill_fill_data
1593 rtx init_after; /* point at which to emit intializations */
1594 rtx init_reg[2]; /* initial base register */
1595 rtx iter_reg[2]; /* the iterator registers */
1596 rtx *prev_addr[2]; /* address of last memory use */
1597 HOST_WIDE_INT prev_off[2]; /* last offset */
1598 int n_iter; /* number of iterators in use */
1599 int next_iter; /* next iterator to use */
1600 unsigned int save_gr_used_mask;
1603 static struct spill_fill_data spill_fill_data;
1605 static void
1606 setup_spill_pointers (n_spills, init_reg, cfa_off)
1607 int n_spills;
1608 rtx init_reg;
1609 HOST_WIDE_INT cfa_off;
1611 int i;
1613 spill_fill_data.init_after = get_last_insn ();
1614 spill_fill_data.init_reg[0] = init_reg;
1615 spill_fill_data.init_reg[1] = init_reg;
1616 spill_fill_data.prev_addr[0] = NULL;
1617 spill_fill_data.prev_addr[1] = NULL;
1618 spill_fill_data.prev_off[0] = cfa_off;
1619 spill_fill_data.prev_off[1] = cfa_off;
1620 spill_fill_data.next_iter = 0;
1621 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1623 spill_fill_data.n_iter = 1 + (n_spills > 2);
1624 for (i = 0; i < spill_fill_data.n_iter; ++i)
1626 int regno = next_scratch_gr_reg ();
1627 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1628 current_frame_info.gr_used_mask |= 1 << regno;
1632 static void
1633 finish_spill_pointers ()
1635 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1638 static rtx
1639 spill_restore_mem (reg, cfa_off)
1640 rtx reg;
1641 HOST_WIDE_INT cfa_off;
1643 int iter = spill_fill_data.next_iter;
1644 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1645 rtx disp_rtx = GEN_INT (disp);
1646 rtx mem;
1648 if (spill_fill_data.prev_addr[iter])
1650 if (CONST_OK_FOR_N (disp))
1651 *spill_fill_data.prev_addr[iter]
1652 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1653 gen_rtx_PLUS (DImode,
1654 spill_fill_data.iter_reg[iter],
1655 disp_rtx));
1656 else
1658 /* ??? Could use register post_modify for loads. */
1659 if (! CONST_OK_FOR_I (disp))
1661 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1662 emit_move_insn (tmp, disp_rtx);
1663 disp_rtx = tmp;
1665 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1666 spill_fill_data.iter_reg[iter], disp_rtx));
1669 /* Micro-optimization: if we've created a frame pointer, it's at
1670 CFA 0, which may allow the real iterator to be initialized lower,
1671 slightly increasing parallelism. Also, if there are few saves
1672 it may eliminate the iterator entirely. */
1673 else if (disp == 0
1674 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1675 && frame_pointer_needed)
1677 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1678 MEM_ALIAS_SET (mem) = get_varargs_alias_set ();
1679 return mem;
1681 else
1683 rtx seq;
1685 if (disp == 0)
1686 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1687 spill_fill_data.init_reg[iter]);
1688 else
1690 start_sequence ();
1692 if (! CONST_OK_FOR_I (disp))
1694 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1695 emit_move_insn (tmp, disp_rtx);
1696 disp_rtx = tmp;
1699 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1700 spill_fill_data.init_reg[iter],
1701 disp_rtx));
1703 seq = gen_sequence ();
1704 end_sequence ();
1707 /* Careful for being the first insn in a sequence. */
1708 if (spill_fill_data.init_after)
1709 spill_fill_data.init_after
1710 = emit_insn_after (seq, spill_fill_data.init_after);
1711 else
1713 rtx first = get_insns ();
1714 if (first)
1715 spill_fill_data.init_after
1716 = emit_insn_before (seq, first);
1717 else
1718 spill_fill_data.init_after = emit_insn (seq);
1722 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
1724 /* ??? Not all of the spills are for varargs, but some of them are.
1725 The rest of the spills belong in an alias set of their own. But
1726 it doesn't actually hurt to include them here. */
1727 MEM_ALIAS_SET (mem) = get_varargs_alias_set ();
1729 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1730 spill_fill_data.prev_off[iter] = cfa_off;
1732 if (++iter >= spill_fill_data.n_iter)
1733 iter = 0;
1734 spill_fill_data.next_iter = iter;
1736 return mem;
1739 static void
1740 do_spill (move_fn, reg, cfa_off, frame_reg)
1741 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1742 rtx reg, frame_reg;
1743 HOST_WIDE_INT cfa_off;
1745 rtx mem, insn;
1747 mem = spill_restore_mem (reg, cfa_off);
1748 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
1750 if (frame_reg)
1752 rtx base;
1753 HOST_WIDE_INT off;
1755 RTX_FRAME_RELATED_P (insn) = 1;
1757 /* Don't even pretend that the unwind code can intuit its way
1758 through a pair of interleaved post_modify iterators. Just
1759 provide the correct answer. */
1761 if (frame_pointer_needed)
1763 base = hard_frame_pointer_rtx;
1764 off = - cfa_off;
1766 else
1768 base = stack_pointer_rtx;
1769 off = current_frame_info.total_size - cfa_off;
1772 REG_NOTES (insn)
1773 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1774 gen_rtx_SET (VOIDmode,
1775 gen_rtx_MEM (GET_MODE (reg),
1776 plus_constant (base, off)),
1777 frame_reg),
1778 REG_NOTES (insn));
1782 static void
1783 do_restore (move_fn, reg, cfa_off)
1784 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1785 rtx reg;
1786 HOST_WIDE_INT cfa_off;
1788 emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1789 GEN_INT (cfa_off)));
1792 /* Wrapper functions that discards the CONST_INT spill offset. These
1793 exist so that we can give gr_spill/gr_fill the offset they need and
1794 use a consistant function interface. */
1796 static rtx
1797 gen_movdi_x (dest, src, offset)
1798 rtx dest, src;
1799 rtx offset ATTRIBUTE_UNUSED;
1801 return gen_movdi (dest, src);
1804 static rtx
1805 gen_fr_spill_x (dest, src, offset)
1806 rtx dest, src;
1807 rtx offset ATTRIBUTE_UNUSED;
1809 return gen_fr_spill (dest, src);
1812 static rtx
1813 gen_fr_restore_x (dest, src, offset)
1814 rtx dest, src;
1815 rtx offset ATTRIBUTE_UNUSED;
1817 return gen_fr_restore (dest, src);
1820 /* Called after register allocation to add any instructions needed for the
1821 prologue. Using a prologue insn is favored compared to putting all of the
1822 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
1823 to intermix instructions with the saves of the caller saved registers. In
1824 some cases, it might be necessary to emit a barrier instruction as the last
1825 insn to prevent such scheduling.
1827 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
1828 so that the debug info generation code can handle them properly.
1830 The register save area is layed out like so:
1831 cfa+16
1832 [ varargs spill area ]
1833 [ fr register spill area ]
1834 [ br register spill area ]
1835 [ ar register spill area ]
1836 [ pr register spill area ]
1837 [ gr register spill area ] */
1839 /* ??? Get inefficient code when the frame size is larger than can fit in an
1840 adds instruction. */
1842 void
1843 ia64_expand_prologue ()
1845 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
1846 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
1847 rtx reg, alt_reg;
1849 ia64_compute_frame_size (get_frame_size ());
1850 last_scratch_gr_reg = 15;
1852 /* If there is no epilogue, then we don't need some prologue insns.
1853 We need to avoid emitting the dead prologue insns, because flow
1854 will complain about them. */
1855 if (optimize)
1857 edge e;
1859 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
1860 if ((e->flags & EDGE_FAKE) == 0
1861 && (e->flags & EDGE_FALLTHRU) != 0)
1862 break;
1863 epilogue_p = (e != NULL);
1865 else
1866 epilogue_p = 1;
1868 /* Set the local, input, and output register names. We need to do this
1869 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1870 half. If we use in/loc/out register names, then we get assembler errors
1871 in crtn.S because there is no alloc insn or regstk directive in there. */
1872 if (! TARGET_REG_NAMES)
1874 int inputs = current_frame_info.n_input_regs;
1875 int locals = current_frame_info.n_local_regs;
1876 int outputs = current_frame_info.n_output_regs;
1878 for (i = 0; i < inputs; i++)
1879 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
1880 for (i = 0; i < locals; i++)
1881 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
1882 for (i = 0; i < outputs; i++)
1883 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
1886 /* Set the frame pointer register name. The regnum is logically loc79,
1887 but of course we'll not have allocated that many locals. Rather than
1888 worrying about renumbering the existing rtxs, we adjust the name. */
1889 /* ??? This code means that we can never use one local register when
1890 there is a frame pointer. loc79 gets wasted in this case, as it is
1891 renamed to a register that will never be used. See also the try_locals
1892 code in find_gr_spill. */
1893 if (current_frame_info.reg_fp)
1895 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
1896 reg_names[HARD_FRAME_POINTER_REGNUM]
1897 = reg_names[current_frame_info.reg_fp];
1898 reg_names[current_frame_info.reg_fp] = tmp;
1901 /* Fix up the return address placeholder. */
1902 /* ??? We can fail if __builtin_return_address is used, and we didn't
1903 allocate a register in which to save b0. I can't think of a way to
1904 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
1905 then be sure that I got the right one. Further, reload doesn't seem
1906 to care if an eliminable register isn't used, and "eliminates" it
1907 anyway. */
1908 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
1909 && current_frame_info.reg_save_b0 != 0)
1910 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
1912 /* We don't need an alloc instruction if we've used no outputs or locals. */
1913 if (current_frame_info.n_local_regs == 0
1914 && current_frame_info.n_output_regs == 0
1915 && current_frame_info.n_input_regs <= current_function_args_info.words)
1917 /* If there is no alloc, but there are input registers used, then we
1918 need a .regstk directive. */
1919 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
1920 ar_pfs_save_reg = NULL_RTX;
1922 else
1924 current_frame_info.need_regstk = 0;
1926 if (current_frame_info.reg_save_ar_pfs)
1927 regno = current_frame_info.reg_save_ar_pfs;
1928 else
1929 regno = next_scratch_gr_reg ();
1930 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
1932 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
1933 GEN_INT (current_frame_info.n_input_regs),
1934 GEN_INT (current_frame_info.n_local_regs),
1935 GEN_INT (current_frame_info.n_output_regs),
1936 GEN_INT (current_frame_info.n_rotate_regs)));
1937 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
1940 /* Set up frame pointer, stack pointer, and spill iterators. */
1942 n_varargs = cfun->machine->n_varargs;
1943 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
1944 stack_pointer_rtx, 0);
1946 if (frame_pointer_needed)
1948 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
1949 RTX_FRAME_RELATED_P (insn) = 1;
1952 if (current_frame_info.total_size != 0)
1954 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
1955 rtx offset;
1957 if (CONST_OK_FOR_I (- current_frame_info.total_size))
1958 offset = frame_size_rtx;
1959 else
1961 regno = next_scratch_gr_reg ();
1962 offset = gen_rtx_REG (DImode, regno);
1963 emit_move_insn (offset, frame_size_rtx);
1966 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
1967 stack_pointer_rtx, offset));
1969 if (! frame_pointer_needed)
1971 RTX_FRAME_RELATED_P (insn) = 1;
1972 if (GET_CODE (offset) != CONST_INT)
1974 REG_NOTES (insn)
1975 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1976 gen_rtx_SET (VOIDmode,
1977 stack_pointer_rtx,
1978 gen_rtx_PLUS (DImode,
1979 stack_pointer_rtx,
1980 frame_size_rtx)),
1981 REG_NOTES (insn));
1985 /* ??? At this point we must generate a magic insn that appears to
1986 modify the stack pointer, the frame pointer, and all spill
1987 iterators. This would allow the most scheduling freedom. For
1988 now, just hard stop. */
1989 emit_insn (gen_blockage ());
1992 /* Must copy out ar.unat before doing any integer spills. */
1993 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
1995 if (current_frame_info.reg_save_ar_unat)
1996 ar_unat_save_reg
1997 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
1998 else
2000 alt_regno = next_scratch_gr_reg ();
2001 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2002 current_frame_info.gr_used_mask |= 1 << alt_regno;
2005 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2006 insn = emit_move_insn (ar_unat_save_reg, reg);
2007 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2009 /* Even if we're not going to generate an epilogue, we still
2010 need to save the register so that EH works. */
2011 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2012 emit_insn (gen_rtx_USE (VOIDmode, ar_unat_save_reg));
2014 else
2015 ar_unat_save_reg = NULL_RTX;
2017 /* Spill all varargs registers. Do this before spilling any GR registers,
2018 since we want the UNAT bits for the GR registers to override the UNAT
2019 bits from varargs, which we don't care about. */
2021 cfa_off = -16;
2022 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2024 reg = gen_rtx_REG (DImode, regno);
2025 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2028 /* Locate the bottom of the register save area. */
2029 cfa_off = (current_frame_info.spill_cfa_off
2030 + current_frame_info.spill_size
2031 + current_frame_info.extra_spill_size);
2033 /* Save the predicate register block either in a register or in memory. */
2034 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2036 reg = gen_rtx_REG (DImode, PR_REG (0));
2037 if (current_frame_info.reg_save_pr != 0)
2039 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2040 insn = emit_move_insn (alt_reg, reg);
2042 /* ??? Denote pr spill/fill by a DImode move that modifies all
2043 64 hard registers. */
2044 RTX_FRAME_RELATED_P (insn) = 1;
2045 REG_NOTES (insn)
2046 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2047 gen_rtx_SET (VOIDmode, alt_reg, reg),
2048 REG_NOTES (insn));
2050 /* Even if we're not going to generate an epilogue, we still
2051 need to save the register so that EH works. */
2052 if (! epilogue_p)
2053 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2055 else
2057 alt_regno = next_scratch_gr_reg ();
2058 alt_reg = gen_rtx_REG (DImode, alt_regno);
2059 insn = emit_move_insn (alt_reg, reg);
2060 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2061 cfa_off -= 8;
2065 /* Handle AR regs in numerical order. All of them get special handling. */
2066 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2067 && current_frame_info.reg_save_ar_unat == 0)
2069 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2070 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2071 cfa_off -= 8;
2074 /* The alloc insn already copied ar.pfs into a general register. The
2075 only thing we have to do now is copy that register to a stack slot
2076 if we'd not allocated a local register for the job. */
2077 if (current_frame_info.reg_save_ar_pfs == 0
2078 && ! current_function_is_leaf)
2080 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2081 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2082 cfa_off -= 8;
2085 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2087 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2088 if (current_frame_info.reg_save_ar_lc != 0)
2090 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2091 insn = emit_move_insn (alt_reg, reg);
2092 RTX_FRAME_RELATED_P (insn) = 1;
2094 /* Even if we're not going to generate an epilogue, we still
2095 need to save the register so that EH works. */
2096 if (! epilogue_p)
2097 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2099 else
2101 alt_regno = next_scratch_gr_reg ();
2102 alt_reg = gen_rtx_REG (DImode, alt_regno);
2103 emit_move_insn (alt_reg, reg);
2104 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2105 cfa_off -= 8;
2109 /* We should now be at the base of the gr/br/fr spill area. */
2110 if (cfa_off != (current_frame_info.spill_cfa_off
2111 + current_frame_info.spill_size))
2112 abort ();
2114 /* Spill all general registers. */
2115 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2116 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2118 reg = gen_rtx_REG (DImode, regno);
2119 do_spill (gen_gr_spill, reg, cfa_off, reg);
2120 cfa_off -= 8;
2123 /* Handle BR0 specially -- it may be getting stored permanently in
2124 some GR register. */
2125 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2127 reg = gen_rtx_REG (DImode, BR_REG (0));
2128 if (current_frame_info.reg_save_b0 != 0)
2130 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2131 insn = emit_move_insn (alt_reg, reg);
2132 RTX_FRAME_RELATED_P (insn) = 1;
2134 /* Even if we're not going to generate an epilogue, we still
2135 need to save the register so that EH works. */
2136 if (! epilogue_p)
2137 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2139 else
2141 alt_regno = next_scratch_gr_reg ();
2142 alt_reg = gen_rtx_REG (DImode, alt_regno);
2143 emit_move_insn (alt_reg, reg);
2144 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2145 cfa_off -= 8;
2149 /* Spill the rest of the BR registers. */
2150 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2151 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2153 alt_regno = next_scratch_gr_reg ();
2154 alt_reg = gen_rtx_REG (DImode, alt_regno);
2155 reg = gen_rtx_REG (DImode, regno);
2156 emit_move_insn (alt_reg, reg);
2157 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2158 cfa_off -= 8;
2161 /* Align the frame and spill all FR registers. */
2162 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2163 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2165 if (cfa_off & 15)
2166 abort ();
2167 reg = gen_rtx_REG (TFmode, regno);
2168 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2169 cfa_off -= 16;
2172 if (cfa_off != current_frame_info.spill_cfa_off)
2173 abort ();
2175 finish_spill_pointers ();
2178 /* Called after register allocation to add any instructions needed for the
2179 epilogue. Using a epilogue insn is favored compared to putting all of the
2180 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
2181 to intermix instructions with the saves of the caller saved registers. In
2182 some cases, it might be necessary to emit a barrier instruction as the last
2183 insn to prevent such scheduling. */
2185 void
2186 ia64_expand_epilogue (sibcall_p)
2187 int sibcall_p;
2189 rtx insn, reg, alt_reg, ar_unat_save_reg;
2190 int regno, alt_regno, cfa_off;
2192 ia64_compute_frame_size (get_frame_size ());
2194 /* If there is a frame pointer, then we use it instead of the stack
2195 pointer, so that the stack pointer does not need to be valid when
2196 the epilogue starts. See EXIT_IGNORE_STACK. */
2197 if (frame_pointer_needed)
2198 setup_spill_pointers (current_frame_info.n_spilled,
2199 hard_frame_pointer_rtx, 0);
2200 else
2201 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2202 current_frame_info.total_size);
2204 if (current_frame_info.total_size != 0)
2206 /* ??? At this point we must generate a magic insn that appears to
2207 modify the spill iterators and the frame pointer. This would
2208 allow the most scheduling freedom. For now, just hard stop. */
2209 emit_insn (gen_blockage ());
2212 /* Locate the bottom of the register save area. */
2213 cfa_off = (current_frame_info.spill_cfa_off
2214 + current_frame_info.spill_size
2215 + current_frame_info.extra_spill_size);
2217 /* Restore the predicate registers. */
2218 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2220 if (current_frame_info.reg_save_pr != 0)
2221 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2222 else
2224 alt_regno = next_scratch_gr_reg ();
2225 alt_reg = gen_rtx_REG (DImode, alt_regno);
2226 do_restore (gen_movdi_x, alt_reg, cfa_off);
2227 cfa_off -= 8;
2229 reg = gen_rtx_REG (DImode, PR_REG (0));
2230 emit_move_insn (reg, alt_reg);
2233 /* Restore the application registers. */
2235 /* Load the saved unat from the stack, but do not restore it until
2236 after the GRs have been restored. */
2237 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2239 if (current_frame_info.reg_save_ar_unat != 0)
2240 ar_unat_save_reg
2241 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2242 else
2244 alt_regno = next_scratch_gr_reg ();
2245 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2246 current_frame_info.gr_used_mask |= 1 << alt_regno;
2247 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2248 cfa_off -= 8;
2251 else
2252 ar_unat_save_reg = NULL_RTX;
2254 if (current_frame_info.reg_save_ar_pfs != 0)
2256 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2257 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2258 emit_move_insn (reg, alt_reg);
2260 else if (! current_function_is_leaf)
2262 alt_regno = next_scratch_gr_reg ();
2263 alt_reg = gen_rtx_REG (DImode, alt_regno);
2264 do_restore (gen_movdi_x, alt_reg, cfa_off);
2265 cfa_off -= 8;
2266 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2267 emit_move_insn (reg, alt_reg);
2270 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2272 if (current_frame_info.reg_save_ar_lc != 0)
2273 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2274 else
2276 alt_regno = next_scratch_gr_reg ();
2277 alt_reg = gen_rtx_REG (DImode, alt_regno);
2278 do_restore (gen_movdi_x, alt_reg, cfa_off);
2279 cfa_off -= 8;
2281 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2282 emit_move_insn (reg, alt_reg);
2285 /* We should now be at the base of the gr/br/fr spill area. */
2286 if (cfa_off != (current_frame_info.spill_cfa_off
2287 + current_frame_info.spill_size))
2288 abort ();
2290 /* Restore all general registers. */
2291 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2292 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2294 reg = gen_rtx_REG (DImode, regno);
2295 do_restore (gen_gr_restore, reg, cfa_off);
2296 cfa_off -= 8;
2299 /* Restore the branch registers. Handle B0 specially, as it may
2300 have gotten stored in some GR register. */
2301 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2303 if (current_frame_info.reg_save_b0 != 0)
2304 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2305 else
2307 alt_regno = next_scratch_gr_reg ();
2308 alt_reg = gen_rtx_REG (DImode, alt_regno);
2309 do_restore (gen_movdi_x, alt_reg, cfa_off);
2310 cfa_off -= 8;
2312 reg = gen_rtx_REG (DImode, BR_REG (0));
2313 emit_move_insn (reg, alt_reg);
2316 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2317 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2319 alt_regno = next_scratch_gr_reg ();
2320 alt_reg = gen_rtx_REG (DImode, alt_regno);
2321 do_restore (gen_movdi_x, alt_reg, cfa_off);
2322 cfa_off -= 8;
2323 reg = gen_rtx_REG (DImode, regno);
2324 emit_move_insn (reg, alt_reg);
2327 /* Restore floating point registers. */
2328 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2329 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2331 if (cfa_off & 15)
2332 abort ();
2333 reg = gen_rtx_REG (TFmode, regno);
2334 do_restore (gen_fr_restore_x, reg, cfa_off);
2335 cfa_off -= 16;
2338 /* Restore ar.unat for real. */
2339 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2341 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2342 emit_move_insn (reg, ar_unat_save_reg);
2345 if (cfa_off != current_frame_info.spill_cfa_off)
2346 abort ();
2348 finish_spill_pointers ();
2350 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2352 /* ??? At this point we must generate a magic insn that appears to
2353 modify the spill iterators, the stack pointer, and the frame
2354 pointer. This would allow the most scheduling freedom. For now,
2355 just hard stop. */
2356 emit_insn (gen_blockage ());
2359 if (cfun->machine->ia64_eh_epilogue_sp)
2360 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2361 else if (frame_pointer_needed)
2363 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2364 RTX_FRAME_RELATED_P (insn) = 1;
2366 else if (current_frame_info.total_size)
2368 rtx offset, frame_size_rtx;
2370 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2371 if (CONST_OK_FOR_I (current_frame_info.total_size))
2372 offset = frame_size_rtx;
2373 else
2375 regno = next_scratch_gr_reg ();
2376 offset = gen_rtx_REG (DImode, regno);
2377 emit_move_insn (offset, frame_size_rtx);
2380 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2381 offset));
2383 RTX_FRAME_RELATED_P (insn) = 1;
2384 if (GET_CODE (offset) != CONST_INT)
2386 REG_NOTES (insn)
2387 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2388 gen_rtx_SET (VOIDmode,
2389 stack_pointer_rtx,
2390 gen_rtx_PLUS (DImode,
2391 stack_pointer_rtx,
2392 frame_size_rtx)),
2393 REG_NOTES (insn));
2397 if (cfun->machine->ia64_eh_epilogue_bsp)
2398 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2400 if (! sibcall_p)
2401 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2404 /* Return 1 if br.ret can do all the work required to return from a
2405 function. */
2408 ia64_direct_return ()
2410 if (reload_completed && ! frame_pointer_needed)
2412 ia64_compute_frame_size (get_frame_size ());
2414 return (current_frame_info.total_size == 0
2415 && current_frame_info.n_spilled == 0
2416 && current_frame_info.reg_save_b0 == 0
2417 && current_frame_info.reg_save_pr == 0
2418 && current_frame_info.reg_save_ar_pfs == 0
2419 && current_frame_info.reg_save_ar_unat == 0
2420 && current_frame_info.reg_save_ar_lc == 0);
2422 return 0;
2426 ia64_hard_regno_rename_ok (from, to)
2427 int from;
2428 int to;
2430 /* Don't clobber any of the registers we reserved for the prologue. */
2431 if (to == current_frame_info.reg_fp
2432 || to == current_frame_info.reg_save_b0
2433 || to == current_frame_info.reg_save_pr
2434 || to == current_frame_info.reg_save_ar_pfs
2435 || to == current_frame_info.reg_save_ar_unat
2436 || to == current_frame_info.reg_save_ar_lc)
2437 return 0;
2439 if (from == current_frame_info.reg_fp
2440 || from == current_frame_info.reg_save_b0
2441 || from == current_frame_info.reg_save_pr
2442 || from == current_frame_info.reg_save_ar_pfs
2443 || from == current_frame_info.reg_save_ar_unat
2444 || from == current_frame_info.reg_save_ar_lc)
2445 return 0;
2447 /* Don't use output registers outside the register frame. */
2448 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2449 return 0;
2451 /* Retain even/oddness on predicate register pairs. */
2452 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2453 return (from & 1) == (to & 1);
2455 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2456 if (from == GR_REG (4) && current_function_calls_setjmp)
2457 return 0;
2459 return 1;
2462 /* Emit the function prologue. */
2464 void
2465 ia64_function_prologue (file, size)
2466 FILE *file;
2467 int size ATTRIBUTE_UNUSED;
2469 int mask, grsave, grsave_prev;
2471 if (current_frame_info.need_regstk)
2472 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2473 current_frame_info.n_input_regs,
2474 current_frame_info.n_local_regs,
2475 current_frame_info.n_output_regs,
2476 current_frame_info.n_rotate_regs);
2478 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2479 return;
2481 /* Emit the .prologue directive. */
2483 mask = 0;
2484 grsave = grsave_prev = 0;
2485 if (current_frame_info.reg_save_b0 != 0)
2487 mask |= 8;
2488 grsave = grsave_prev = current_frame_info.reg_save_b0;
2490 if (current_frame_info.reg_save_ar_pfs != 0
2491 && (grsave_prev == 0
2492 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2494 mask |= 4;
2495 if (grsave_prev == 0)
2496 grsave = current_frame_info.reg_save_ar_pfs;
2497 grsave_prev = current_frame_info.reg_save_ar_pfs;
2499 if (current_frame_info.reg_fp != 0
2500 && (grsave_prev == 0
2501 || current_frame_info.reg_fp == grsave_prev + 1))
2503 mask |= 2;
2504 if (grsave_prev == 0)
2505 grsave = HARD_FRAME_POINTER_REGNUM;
2506 grsave_prev = current_frame_info.reg_fp;
2508 if (current_frame_info.reg_save_pr != 0
2509 && (grsave_prev == 0
2510 || current_frame_info.reg_save_pr == grsave_prev + 1))
2512 mask |= 1;
2513 if (grsave_prev == 0)
2514 grsave = current_frame_info.reg_save_pr;
2517 if (mask)
2518 fprintf (file, "\t.prologue %d, %d\n", mask,
2519 ia64_dbx_register_number (grsave));
2520 else
2521 fputs ("\t.prologue\n", file);
2523 /* Emit a .spill directive, if necessary, to relocate the base of
2524 the register spill area. */
2525 if (current_frame_info.spill_cfa_off != -16)
2526 fprintf (file, "\t.spill %ld\n",
2527 (long) (current_frame_info.spill_cfa_off
2528 + current_frame_info.spill_size));
2531 /* Emit the .body directive at the scheduled end of the prologue. */
2533 void
2534 ia64_output_end_prologue (file)
2535 FILE *file;
2537 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2538 return;
2540 fputs ("\t.body\n", file);
2543 /* Emit the function epilogue. */
2545 void
2546 ia64_function_epilogue (file, size)
2547 FILE *file ATTRIBUTE_UNUSED;
2548 int size ATTRIBUTE_UNUSED;
2550 int i;
2552 /* Reset from the function's potential modifications. */
2553 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
2555 if (current_frame_info.reg_fp)
2557 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2558 reg_names[HARD_FRAME_POINTER_REGNUM]
2559 = reg_names[current_frame_info.reg_fp];
2560 reg_names[current_frame_info.reg_fp] = tmp;
2562 if (! TARGET_REG_NAMES)
2564 for (i = 0; i < current_frame_info.n_input_regs; i++)
2565 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2566 for (i = 0; i < current_frame_info.n_local_regs; i++)
2567 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2568 for (i = 0; i < current_frame_info.n_output_regs; i++)
2569 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2572 current_frame_info.initialized = 0;
2576 ia64_dbx_register_number (regno)
2577 int regno;
2579 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2580 from its home at loc79 to something inside the register frame. We
2581 must perform the same renumbering here for the debug info. */
2582 if (current_frame_info.reg_fp)
2584 if (regno == HARD_FRAME_POINTER_REGNUM)
2585 regno = current_frame_info.reg_fp;
2586 else if (regno == current_frame_info.reg_fp)
2587 regno = HARD_FRAME_POINTER_REGNUM;
2590 if (IN_REGNO_P (regno))
2591 return 32 + regno - IN_REG (0);
2592 else if (LOC_REGNO_P (regno))
2593 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2594 else if (OUT_REGNO_P (regno))
2595 return (32 + current_frame_info.n_input_regs
2596 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2597 else
2598 return regno;
2601 void
2602 ia64_initialize_trampoline (addr, fnaddr, static_chain)
2603 rtx addr, fnaddr, static_chain;
2605 rtx addr_reg, eight = GEN_INT (8);
2607 /* Load up our iterator. */
2608 addr_reg = gen_reg_rtx (Pmode);
2609 emit_move_insn (addr_reg, addr);
2611 /* The first two words are the fake descriptor:
2612 __ia64_trampoline, ADDR+16. */
2613 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2614 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2615 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2617 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2618 copy_to_reg (plus_constant (addr, 16)));
2619 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2621 /* The third word is the target descriptor. */
2622 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2623 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2625 /* The fourth word is the static chain. */
2626 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2629 /* Do any needed setup for a variadic function. CUM has not been updated
2630 for the last named argument which has type TYPE and mode MODE.
2632 We generate the actual spill instructions during prologue generation. */
2634 void
2635 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
2636 CUMULATIVE_ARGS cum;
2637 int int_mode;
2638 tree type;
2639 int * pretend_size;
2640 int second_time ATTRIBUTE_UNUSED;
2642 /* If this is a stdarg function, then skip the current argument. */
2643 if (! current_function_varargs)
2644 ia64_function_arg_advance (&cum, int_mode, type, 1);
2646 if (cum.words < MAX_ARGUMENT_SLOTS)
2648 int n = MAX_ARGUMENT_SLOTS - cum.words;
2649 *pretend_size = n * UNITS_PER_WORD;
2650 cfun->machine->n_varargs = n;
2654 /* Check whether TYPE is a homogeneous floating point aggregate. If
2655 it is, return the mode of the floating point type that appears
2656 in all leafs. If it is not, return VOIDmode.
2658 An aggregate is a homogeneous floating point aggregate is if all
2659 fields/elements in it have the same floating point type (e.g,
2660 SFmode). 128-bit quad-precision floats are excluded. */
2662 static enum machine_mode
2663 hfa_element_mode (type, nested)
2664 tree type;
2665 int nested;
2667 enum machine_mode element_mode = VOIDmode;
2668 enum machine_mode mode;
2669 enum tree_code code = TREE_CODE (type);
2670 int know_element_mode = 0;
2671 tree t;
2673 switch (code)
2675 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2676 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2677 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2678 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2679 case FUNCTION_TYPE:
2680 return VOIDmode;
2682 /* Fortran complex types are supposed to be HFAs, so we need to handle
2683 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2684 types though. */
2685 case COMPLEX_TYPE:
2686 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
2687 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
2688 * BITS_PER_UNIT, MODE_FLOAT, 0);
2689 else
2690 return VOIDmode;
2692 case REAL_TYPE:
2693 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2694 mode if this is contained within an aggregate. */
2695 if (nested)
2696 return TYPE_MODE (type);
2697 else
2698 return VOIDmode;
2700 case ARRAY_TYPE:
2701 return TYPE_MODE (TREE_TYPE (type));
2703 case RECORD_TYPE:
2704 case UNION_TYPE:
2705 case QUAL_UNION_TYPE:
2706 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
2708 if (TREE_CODE (t) != FIELD_DECL)
2709 continue;
2711 mode = hfa_element_mode (TREE_TYPE (t), 1);
2712 if (know_element_mode)
2714 if (mode != element_mode)
2715 return VOIDmode;
2717 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
2718 return VOIDmode;
2719 else
2721 know_element_mode = 1;
2722 element_mode = mode;
2725 return element_mode;
2727 default:
2728 /* If we reach here, we probably have some front-end specific type
2729 that the backend doesn't know about. This can happen via the
2730 aggregate_value_p call in init_function_start. All we can do is
2731 ignore unknown tree types. */
2732 return VOIDmode;
2735 return VOIDmode;
2738 /* Return rtx for register where argument is passed, or zero if it is passed
2739 on the stack. */
2741 /* ??? 128-bit quad-precision floats are always passed in general
2742 registers. */
2745 ia64_function_arg (cum, mode, type, named, incoming)
2746 CUMULATIVE_ARGS *cum;
2747 enum machine_mode mode;
2748 tree type;
2749 int named;
2750 int incoming;
2752 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
2753 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2754 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2755 / UNITS_PER_WORD);
2756 int offset = 0;
2757 enum machine_mode hfa_mode = VOIDmode;
2759 /* Integer and float arguments larger than 8 bytes start at the next even
2760 boundary. Aggregates larger than 8 bytes start at the next even boundary
2761 if the aggregate has 16 byte alignment. Net effect is that types with
2762 alignment greater than 8 start at the next even boundary. */
2763 /* ??? The ABI does not specify how to handle aggregates with alignment from
2764 9 to 15 bytes, or greater than 16. We handle them all as if they had
2765 16 byte alignment. Such aggregates can occur only if gcc extensions are
2766 used. */
2767 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2768 : (words > 1))
2769 && (cum->words & 1))
2770 offset = 1;
2772 /* If all argument slots are used, then it must go on the stack. */
2773 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2774 return 0;
2776 /* Check for and handle homogeneous FP aggregates. */
2777 if (type)
2778 hfa_mode = hfa_element_mode (type, 0);
2780 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2781 and unprototyped hfas are passed specially. */
2782 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2784 rtx loc[16];
2785 int i = 0;
2786 int fp_regs = cum->fp_regs;
2787 int int_regs = cum->words + offset;
2788 int hfa_size = GET_MODE_SIZE (hfa_mode);
2789 int byte_size;
2790 int args_byte_size;
2792 /* If prototyped, pass it in FR regs then GR regs.
2793 If not prototyped, pass it in both FR and GR regs.
2795 If this is an SFmode aggregate, then it is possible to run out of
2796 FR regs while GR regs are still left. In that case, we pass the
2797 remaining part in the GR regs. */
2799 /* Fill the FP regs. We do this always. We stop if we reach the end
2800 of the argument, the last FP register, or the last argument slot. */
2802 byte_size = ((mode == BLKmode)
2803 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2804 args_byte_size = int_regs * UNITS_PER_WORD;
2805 offset = 0;
2806 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2807 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
2809 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2810 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
2811 + fp_regs)),
2812 GEN_INT (offset));
2813 offset += hfa_size;
2814 args_byte_size += hfa_size;
2815 fp_regs++;
2818 /* If no prototype, then the whole thing must go in GR regs. */
2819 if (! cum->prototype)
2820 offset = 0;
2821 /* If this is an SFmode aggregate, then we might have some left over
2822 that needs to go in GR regs. */
2823 else if (byte_size != offset)
2824 int_regs += offset / UNITS_PER_WORD;
2826 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
2828 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
2830 enum machine_mode gr_mode = DImode;
2832 /* If we have an odd 4 byte hunk because we ran out of FR regs,
2833 then this goes in a GR reg left adjusted/little endian, right
2834 adjusted/big endian. */
2835 /* ??? Currently this is handled wrong, because 4-byte hunks are
2836 always right adjusted/little endian. */
2837 if (offset & 0x4)
2838 gr_mode = SImode;
2839 /* If we have an even 4 byte hunk because the aggregate is a
2840 multiple of 4 bytes in size, then this goes in a GR reg right
2841 adjusted/little endian. */
2842 else if (byte_size - offset == 4)
2843 gr_mode = SImode;
2845 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2846 gen_rtx_REG (gr_mode, (basereg
2847 + int_regs)),
2848 GEN_INT (offset));
2849 offset += GET_MODE_SIZE (gr_mode);
2850 int_regs++;
2853 /* If we ended up using just one location, just return that one loc. */
2854 if (i == 1)
2855 return XEXP (loc[0], 0);
2856 else
2857 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
2860 /* Integral and aggregates go in general registers. If we have run out of
2861 FR registers, then FP values must also go in general registers. This can
2862 happen when we have a SFmode HFA. */
2863 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
2864 return gen_rtx_REG (mode, basereg + cum->words + offset);
2866 /* If there is a prototype, then FP values go in a FR register when
2867 named, and in a GR registeer when unnamed. */
2868 else if (cum->prototype)
2870 if (! named)
2871 return gen_rtx_REG (mode, basereg + cum->words + offset);
2872 else
2873 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
2875 /* If there is no prototype, then FP values go in both FR and GR
2876 registers. */
2877 else
2879 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
2880 gen_rtx_REG (mode, (FR_ARG_FIRST
2881 + cum->fp_regs)),
2882 const0_rtx);
2883 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
2884 gen_rtx_REG (mode,
2885 (basereg + cum->words
2886 + offset)),
2887 const0_rtx);
2889 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
2893 /* Return number of words, at the beginning of the argument, that must be
2894 put in registers. 0 is the argument is entirely in registers or entirely
2895 in memory. */
2898 ia64_function_arg_partial_nregs (cum, mode, type, named)
2899 CUMULATIVE_ARGS *cum;
2900 enum machine_mode mode;
2901 tree type;
2902 int named ATTRIBUTE_UNUSED;
2904 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2905 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2906 / UNITS_PER_WORD);
2907 int offset = 0;
2909 /* Arguments with alignment larger than 8 bytes start at the next even
2910 boundary. */
2911 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2912 : (words > 1))
2913 && (cum->words & 1))
2914 offset = 1;
2916 /* If all argument slots are used, then it must go on the stack. */
2917 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2918 return 0;
2920 /* It doesn't matter whether the argument goes in FR or GR regs. If
2921 it fits within the 8 argument slots, then it goes entirely in
2922 registers. If it extends past the last argument slot, then the rest
2923 goes on the stack. */
2925 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
2926 return 0;
2928 return MAX_ARGUMENT_SLOTS - cum->words - offset;
2931 /* Update CUM to point after this argument. This is patterned after
2932 ia64_function_arg. */
2934 void
2935 ia64_function_arg_advance (cum, mode, type, named)
2936 CUMULATIVE_ARGS *cum;
2937 enum machine_mode mode;
2938 tree type;
2939 int named;
2941 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2942 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2943 / UNITS_PER_WORD);
2944 int offset = 0;
2945 enum machine_mode hfa_mode = VOIDmode;
2947 /* If all arg slots are already full, then there is nothing to do. */
2948 if (cum->words >= MAX_ARGUMENT_SLOTS)
2949 return;
2951 /* Arguments with alignment larger than 8 bytes start at the next even
2952 boundary. */
2953 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2954 : (words > 1))
2955 && (cum->words & 1))
2956 offset = 1;
2958 cum->words += words + offset;
2960 /* Check for and handle homogeneous FP aggregates. */
2961 if (type)
2962 hfa_mode = hfa_element_mode (type, 0);
2964 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2965 and unprototyped hfas are passed specially. */
2966 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2968 int fp_regs = cum->fp_regs;
2969 /* This is the original value of cum->words + offset. */
2970 int int_regs = cum->words - words;
2971 int hfa_size = GET_MODE_SIZE (hfa_mode);
2972 int byte_size;
2973 int args_byte_size;
2975 /* If prototyped, pass it in FR regs then GR regs.
2976 If not prototyped, pass it in both FR and GR regs.
2978 If this is an SFmode aggregate, then it is possible to run out of
2979 FR regs while GR regs are still left. In that case, we pass the
2980 remaining part in the GR regs. */
2982 /* Fill the FP regs. We do this always. We stop if we reach the end
2983 of the argument, the last FP register, or the last argument slot. */
2985 byte_size = ((mode == BLKmode)
2986 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2987 args_byte_size = int_regs * UNITS_PER_WORD;
2988 offset = 0;
2989 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2990 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
2992 offset += hfa_size;
2993 args_byte_size += hfa_size;
2994 fp_regs++;
2997 cum->fp_regs = fp_regs;
3000 /* Integral and aggregates go in general registers. If we have run out of
3001 FR registers, then FP values must also go in general registers. This can
3002 happen when we have a SFmode HFA. */
3003 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3004 return;
3006 /* If there is a prototype, then FP values go in a FR register when
3007 named, and in a GR registeer when unnamed. */
3008 else if (cum->prototype)
3010 if (! named)
3011 return;
3012 else
3013 /* ??? Complex types should not reach here. */
3014 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3016 /* If there is no prototype, then FP values go in both FR and GR
3017 registers. */
3018 else
3019 /* ??? Complex types should not reach here. */
3020 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3022 return;
3025 /* Implement va_start. */
3027 void
3028 ia64_va_start (stdarg_p, valist, nextarg)
3029 int stdarg_p;
3030 tree valist;
3031 rtx nextarg;
3033 int arg_words;
3034 int ofs;
3036 arg_words = current_function_args_info.words;
3038 if (stdarg_p)
3039 ofs = 0;
3040 else
3041 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
3043 nextarg = plus_constant (nextarg, ofs);
3044 std_expand_builtin_va_start (1, valist, nextarg);
3047 /* Implement va_arg. */
3050 ia64_va_arg (valist, type)
3051 tree valist, type;
3053 tree t;
3055 /* Arguments with alignment larger than 8 bytes start at the next even
3056 boundary. */
3057 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3059 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3060 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3061 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3062 build_int_2 (-2 * UNITS_PER_WORD, -1));
3063 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3064 TREE_SIDE_EFFECTS (t) = 1;
3065 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3068 return std_expand_builtin_va_arg (valist, type);
3071 /* Return 1 if function return value returned in memory. Return 0 if it is
3072 in a register. */
3075 ia64_return_in_memory (valtype)
3076 tree valtype;
3078 enum machine_mode mode;
3079 enum machine_mode hfa_mode;
3080 int byte_size;
3082 mode = TYPE_MODE (valtype);
3083 byte_size = ((mode == BLKmode)
3084 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3086 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3088 hfa_mode = hfa_element_mode (valtype, 0);
3089 if (hfa_mode != VOIDmode)
3091 int hfa_size = GET_MODE_SIZE (hfa_mode);
3093 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3094 return 1;
3095 else
3096 return 0;
3099 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3100 return 1;
3101 else
3102 return 0;
3105 /* Return rtx for register that holds the function return value. */
3108 ia64_function_value (valtype, func)
3109 tree valtype;
3110 tree func ATTRIBUTE_UNUSED;
3112 enum machine_mode mode;
3113 enum machine_mode hfa_mode;
3115 mode = TYPE_MODE (valtype);
3116 hfa_mode = hfa_element_mode (valtype, 0);
3118 if (hfa_mode != VOIDmode)
3120 rtx loc[8];
3121 int i;
3122 int hfa_size;
3123 int byte_size;
3124 int offset;
3126 hfa_size = GET_MODE_SIZE (hfa_mode);
3127 byte_size = ((mode == BLKmode)
3128 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3129 offset = 0;
3130 for (i = 0; offset < byte_size; i++)
3132 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3133 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3134 GEN_INT (offset));
3135 offset += hfa_size;
3138 if (i == 1)
3139 return XEXP (loc[0], 0);
3140 else
3141 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3143 else if (FLOAT_TYPE_P (valtype))
3144 return gen_rtx_REG (mode, FR_ARG_FIRST);
3145 else
3146 return gen_rtx_REG (mode, GR_RET_FIRST);
3149 /* Print a memory address as an operand to reference that memory location. */
3151 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3152 also call this from ia64_print_operand for memory addresses. */
3154 void
3155 ia64_print_operand_address (stream, address)
3156 FILE * stream ATTRIBUTE_UNUSED;
3157 rtx address ATTRIBUTE_UNUSED;
3161 /* Print an operand to a assembler instruction.
3162 C Swap and print a comparison operator.
3163 D Print an FP comparison operator.
3164 E Print 32 - constant, for SImode shifts as extract.
3165 e Print 64 - constant, for DImode rotates.
3166 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3167 a floating point register emitted normally.
3168 I Invert a predicate register by adding 1.
3169 J Select the proper predicate register for a condition.
3170 j Select the inverse predicate register for a condition.
3171 O Append .acq for volatile load.
3172 P Postincrement of a MEM.
3173 Q Append .rel for volatile store.
3174 S Shift amount for shladd instruction.
3175 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3176 for Intel assembler.
3177 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3178 for Intel assembler.
3179 r Print register name, or constant 0 as r0. HP compatibility for
3180 Linux kernel. */
3181 void
3182 ia64_print_operand (file, x, code)
3183 FILE * file;
3184 rtx x;
3185 int code;
3187 const char *str;
3189 switch (code)
3191 case 0:
3192 /* Handled below. */
3193 break;
3195 case 'C':
3197 enum rtx_code c = swap_condition (GET_CODE (x));
3198 fputs (GET_RTX_NAME (c), file);
3199 return;
3202 case 'D':
3203 switch (GET_CODE (x))
3205 case NE:
3206 str = "neq";
3207 break;
3208 case UNORDERED:
3209 str = "unord";
3210 break;
3211 case ORDERED:
3212 str = "ord";
3213 break;
3214 default:
3215 str = GET_RTX_NAME (GET_CODE (x));
3216 break;
3218 fputs (str, file);
3219 return;
3221 case 'E':
3222 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3223 return;
3225 case 'e':
3226 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3227 return;
3229 case 'F':
3230 if (x == CONST0_RTX (GET_MODE (x)))
3231 str = reg_names [FR_REG (0)];
3232 else if (x == CONST1_RTX (GET_MODE (x)))
3233 str = reg_names [FR_REG (1)];
3234 else if (GET_CODE (x) == REG)
3235 str = reg_names [REGNO (x)];
3236 else
3237 abort ();
3238 fputs (str, file);
3239 return;
3241 case 'I':
3242 fputs (reg_names [REGNO (x) + 1], file);
3243 return;
3245 case 'J':
3246 case 'j':
3248 unsigned int regno = REGNO (XEXP (x, 0));
3249 if (GET_CODE (x) == EQ)
3250 regno += 1;
3251 if (code == 'j')
3252 regno ^= 1;
3253 fputs (reg_names [regno], file);
3255 return;
3257 case 'O':
3258 if (MEM_VOLATILE_P (x))
3259 fputs(".acq", file);
3260 return;
3262 case 'P':
3264 HOST_WIDE_INT value;
3266 switch (GET_CODE (XEXP (x, 0)))
3268 default:
3269 return;
3271 case POST_MODIFY:
3272 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3273 if (GET_CODE (x) == CONST_INT)
3274 value = INTVAL (x);
3275 else if (GET_CODE (x) == REG)
3277 fprintf (file, ", %s", reg_names[REGNO (x)]);
3278 return;
3280 else
3281 abort ();
3282 break;
3284 case POST_INC:
3285 value = GET_MODE_SIZE (GET_MODE (x));
3286 break;
3288 case POST_DEC:
3289 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3290 break;
3293 putc (',', file);
3294 putc (' ', file);
3295 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
3296 return;
3299 case 'Q':
3300 if (MEM_VOLATILE_P (x))
3301 fputs(".rel", file);
3302 return;
3304 case 'S':
3305 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3306 return;
3308 case 'T':
3309 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3311 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3312 return;
3314 break;
3316 case 'U':
3317 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3319 const char *prefix = "0x";
3320 if (INTVAL (x) & 0x80000000)
3322 fprintf (file, "0xffffffff");
3323 prefix = "";
3325 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3326 return;
3328 break;
3330 case 'r':
3331 /* If this operand is the constant zero, write it as register zero.
3332 Any register, zero, or CONST_INT value is OK here. */
3333 if (GET_CODE (x) == REG)
3334 fputs (reg_names[REGNO (x)], file);
3335 else if (x == CONST0_RTX (GET_MODE (x)))
3336 fputs ("r0", file);
3337 else if (GET_CODE (x) == CONST_INT)
3338 output_addr_const (file, x);
3339 else
3340 output_operand_lossage ("invalid %%r value");
3341 return;
3343 case '+':
3345 const char *which;
3347 /* For conditional branches, returns or calls, substitute
3348 sptk, dptk, dpnt, or spnt for %s. */
3349 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3350 if (x)
3352 int pred_val = INTVAL (XEXP (x, 0));
3354 /* Guess top and bottom 10% statically predicted. */
3355 if (pred_val < REG_BR_PROB_BASE / 50)
3356 which = ".spnt";
3357 else if (pred_val < REG_BR_PROB_BASE / 2)
3358 which = ".dpnt";
3359 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3360 which = ".dptk";
3361 else
3362 which = ".sptk";
3364 else if (GET_CODE (current_output_insn) == CALL_INSN)
3365 which = ".sptk";
3366 else
3367 which = ".dptk";
3369 fputs (which, file);
3370 return;
3373 case ',':
3374 x = current_insn_predicate;
3375 if (x)
3377 unsigned int regno = REGNO (XEXP (x, 0));
3378 if (GET_CODE (x) == EQ)
3379 regno += 1;
3380 fprintf (file, "(%s) ", reg_names [regno]);
3382 return;
3384 default:
3385 output_operand_lossage ("ia64_print_operand: unknown code");
3386 return;
3389 switch (GET_CODE (x))
3391 /* This happens for the spill/restore instructions. */
3392 case POST_INC:
3393 case POST_DEC:
3394 case POST_MODIFY:
3395 x = XEXP (x, 0);
3396 /* ... fall through ... */
3398 case REG:
3399 fputs (reg_names [REGNO (x)], file);
3400 break;
3402 case MEM:
3404 rtx addr = XEXP (x, 0);
3405 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
3406 addr = XEXP (addr, 0);
3407 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3408 break;
3411 default:
3412 output_addr_const (file, x);
3413 break;
3416 return;
3419 /* Calulate the cost of moving data from a register in class FROM to
3420 one in class TO. */
3423 ia64_register_move_cost (from, to)
3424 enum reg_class from, to;
3426 int from_hard, to_hard;
3427 int from_gr, to_gr;
3428 int from_fr, to_fr;
3429 int from_pr, to_pr;
3431 from_hard = (from == BR_REGS || from == AR_M_REGS || from == AR_I_REGS);
3432 to_hard = (to == BR_REGS || to == AR_M_REGS || to == AR_I_REGS);
3433 from_gr = (from == GENERAL_REGS);
3434 to_gr = (to == GENERAL_REGS);
3435 from_fr = (from == FR_REGS);
3436 to_fr = (to == FR_REGS);
3437 from_pr = (from == PR_REGS);
3438 to_pr = (to == PR_REGS);
3440 if (from_hard && to_hard)
3441 return 8;
3442 else if ((from_hard && !to_gr) || (!from_gr && to_hard))
3443 return 6;
3445 /* Moving between PR registers takes two insns. */
3446 else if (from_pr && to_pr)
3447 return 3;
3448 /* Moving between PR and anything but GR is impossible. */
3449 else if ((from_pr && !to_gr) || (!from_gr && to_pr))
3450 return 6;
3452 /* ??? Moving from FR<->GR must be more expensive than 2, so that we get
3453 secondary memory reloads for TFmode moves. Unfortunately, we don't
3454 have the mode here, so we can't check that. */
3455 /* Moreover, we have to make this at least as high as MEMORY_MOVE_COST
3456 to avoid spectacularly poor register class preferencing for TFmode. */
3457 else if (from_fr != to_fr)
3458 return 5;
3460 return 2;
3463 /* This function returns the register class required for a secondary
3464 register when copying between one of the registers in CLASS, and X,
3465 using MODE. A return value of NO_REGS means that no secondary register
3466 is required. */
3468 enum reg_class
3469 ia64_secondary_reload_class (class, mode, x)
3470 enum reg_class class;
3471 enum machine_mode mode ATTRIBUTE_UNUSED;
3472 rtx x;
3474 int regno = -1;
3476 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3477 regno = true_regnum (x);
3479 switch (class)
3481 case BR_REGS:
3482 /* ??? This is required because of a bad gcse/cse/global interaction.
3483 We end up with two pseudos with overlapping lifetimes both of which
3484 are equiv to the same constant, and both which need to be in BR_REGS.
3485 This results in a BR_REGS to BR_REGS copy which doesn't exist. To
3486 reproduce, return NO_REGS here, and compile divdi3 in libgcc2.c.
3487 This seems to be a cse bug. cse_basic_block_end changes depending
3488 on the path length, which means the qty_first_reg check in
3489 make_regs_eqv can give different answers at different times. */
3490 /* ??? At some point I'll probably need a reload_indi pattern to handle
3491 this. */
3492 if (BR_REGNO_P (regno))
3493 return GR_REGS;
3495 /* This is needed if a pseudo used as a call_operand gets spilled to a
3496 stack slot. */
3497 if (GET_CODE (x) == MEM)
3498 return GR_REGS;
3499 break;
3501 case FR_REGS:
3502 /* This can happen when a paradoxical subreg is an operand to the
3503 muldi3 pattern. */
3504 /* ??? This shouldn't be necessary after instruction scheduling is
3505 enabled, because paradoxical subregs are not accepted by
3506 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3507 stop the paradoxical subreg stupidity in the *_operand functions
3508 in recog.c. */
3509 if (GET_CODE (x) == MEM
3510 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3511 || GET_MODE (x) == QImode))
3512 return GR_REGS;
3514 /* This can happen because of the ior/and/etc patterns that accept FP
3515 registers as operands. If the third operand is a constant, then it
3516 needs to be reloaded into a FP register. */
3517 if (GET_CODE (x) == CONST_INT)
3518 return GR_REGS;
3520 /* This can happen because of register elimination in a muldi3 insn.
3521 E.g. `26107 * (unsigned long)&u'. */
3522 if (GET_CODE (x) == PLUS)
3523 return GR_REGS;
3524 break;
3526 case PR_REGS:
3527 /* ??? This happens if we cse/gcse a BImode value across a call,
3528 and the function has a nonlocal goto. This is because global
3529 does not allocate call crossing pseudos to hard registers when
3530 current_function_has_nonlocal_goto is true. This is relatively
3531 common for C++ programs that use exceptions. To reproduce,
3532 return NO_REGS and compile libstdc++. */
3533 if (GET_CODE (x) == MEM)
3534 return GR_REGS;
3536 /* This can happen when we take a BImode subreg of a DImode value,
3537 and that DImode value winds up in some non-GR register. */
3538 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
3539 return GR_REGS;
3540 break;
3542 case GR_REGS:
3543 /* Since we have no offsettable memory addresses, we need a temporary
3544 to hold the address of the second word. */
3545 if (mode == TImode)
3546 return GR_REGS;
3547 break;
3549 default:
3550 break;
3553 return NO_REGS;
3557 /* Emit text to declare externally defined variables and functions, because
3558 the Intel assembler does not support undefined externals. */
3560 void
3561 ia64_asm_output_external (file, decl, name)
3562 FILE *file;
3563 tree decl;
3564 const char *name;
3566 int save_referenced;
3568 /* GNU as does not need anything here. */
3569 if (TARGET_GNU_AS)
3570 return;
3572 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3573 the linker when we do this, so we need to be careful not to do this for
3574 builtin functions which have no library equivalent. Unfortunately, we
3575 can't tell here whether or not a function will actually be called by
3576 expand_expr, so we pull in library functions even if we may not need
3577 them later. */
3578 if (! strcmp (name, "__builtin_next_arg")
3579 || ! strcmp (name, "alloca")
3580 || ! strcmp (name, "__builtin_constant_p")
3581 || ! strcmp (name, "__builtin_args_info"))
3582 return;
3584 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3585 restore it. */
3586 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
3587 if (TREE_CODE (decl) == FUNCTION_DECL)
3589 fprintf (file, "%s", TYPE_ASM_OP);
3590 assemble_name (file, name);
3591 putc (',', file);
3592 fprintf (file, TYPE_OPERAND_FMT, "function");
3593 putc ('\n', file);
3595 ASM_GLOBALIZE_LABEL (file, name);
3596 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
3599 /* Parse the -mfixed-range= option string. */
3601 static void
3602 fix_range (const_str)
3603 const char *const_str;
3605 int i, first, last;
3606 char *str, *dash, *comma;
3608 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3609 REG2 are either register names or register numbers. The effect
3610 of this option is to mark the registers in the range from REG1 to
3611 REG2 as ``fixed'' so they won't be used by the compiler. This is
3612 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3614 i = strlen (const_str);
3615 str = (char *) alloca (i + 1);
3616 memcpy (str, const_str, i + 1);
3618 while (1)
3620 dash = strchr (str, '-');
3621 if (!dash)
3623 warning ("value of -mfixed-range must have form REG1-REG2");
3624 return;
3626 *dash = '\0';
3628 comma = strchr (dash + 1, ',');
3629 if (comma)
3630 *comma = '\0';
3632 first = decode_reg_name (str);
3633 if (first < 0)
3635 warning ("unknown register name: %s", str);
3636 return;
3639 last = decode_reg_name (dash + 1);
3640 if (last < 0)
3642 warning ("unknown register name: %s", dash + 1);
3643 return;
3646 *dash = '-';
3648 if (first > last)
3650 warning ("%s-%s is an empty range", str, dash + 1);
3651 return;
3654 for (i = first; i <= last; ++i)
3655 fixed_regs[i] = call_used_regs[i] = 1;
3657 if (!comma)
3658 break;
3660 *comma = ',';
3661 str = comma + 1;
3665 /* Called to register all of our global variables with the garbage
3666 collector. */
3668 static void
3669 ia64_add_gc_roots ()
3671 ggc_add_rtx_root (&ia64_compare_op0, 1);
3672 ggc_add_rtx_root (&ia64_compare_op1, 1);
3675 static void
3676 ia64_init_machine_status (p)
3677 struct function *p;
3679 p->machine =
3680 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
3683 static void
3684 ia64_mark_machine_status (p)
3685 struct function *p;
3687 struct machine_function *machine = p->machine;
3689 if (machine)
3691 ggc_mark_rtx (machine->ia64_eh_epilogue_sp);
3692 ggc_mark_rtx (machine->ia64_eh_epilogue_bsp);
3693 ggc_mark_rtx (machine->ia64_gp_save);
3697 static void
3698 ia64_free_machine_status (p)
3699 struct function *p;
3701 free (p->machine);
3702 p->machine = NULL;
3705 /* Handle TARGET_OPTIONS switches. */
3707 void
3708 ia64_override_options ()
3710 if (TARGET_AUTO_PIC)
3711 target_flags |= MASK_CONST_GP;
3713 if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
3715 warning ("cannot optimize division for both latency and throughput");
3716 target_flags &= ~MASK_INLINE_DIV_THR;
3719 if (ia64_fixed_range_string)
3720 fix_range (ia64_fixed_range_string);
3722 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
3723 flag_schedule_insns_after_reload = 0;
3725 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
3727 init_machine_status = ia64_init_machine_status;
3728 mark_machine_status = ia64_mark_machine_status;
3729 free_machine_status = ia64_free_machine_status;
3731 ia64_add_gc_roots ();
3734 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
3735 static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
3736 static enum attr_type ia64_safe_type PARAMS((rtx));
3738 static enum attr_itanium_requires_unit0
3739 ia64_safe_itanium_requires_unit0 (insn)
3740 rtx insn;
3742 if (recog_memoized (insn) >= 0)
3743 return get_attr_itanium_requires_unit0 (insn);
3744 else
3745 return ITANIUM_REQUIRES_UNIT0_NO;
3748 static enum attr_itanium_class
3749 ia64_safe_itanium_class (insn)
3750 rtx insn;
3752 if (recog_memoized (insn) >= 0)
3753 return get_attr_itanium_class (insn);
3754 else
3755 return ITANIUM_CLASS_UNKNOWN;
3758 static enum attr_type
3759 ia64_safe_type (insn)
3760 rtx insn;
3762 if (recog_memoized (insn) >= 0)
3763 return get_attr_type (insn);
3764 else
3765 return TYPE_UNKNOWN;
3768 /* The following collection of routines emit instruction group stop bits as
3769 necessary to avoid dependencies. */
3771 /* Need to track some additional registers as far as serialization is
3772 concerned so we can properly handle br.call and br.ret. We could
3773 make these registers visible to gcc, but since these registers are
3774 never explicitly used in gcc generated code, it seems wasteful to
3775 do so (plus it would make the call and return patterns needlessly
3776 complex). */
3777 #define REG_GP (GR_REG (1))
3778 #define REG_RP (BR_REG (0))
3779 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
3780 /* This is used for volatile asms which may require a stop bit immediately
3781 before and after them. */
3782 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
3783 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
3784 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
3786 /* For each register, we keep track of how it has been written in the
3787 current instruction group.
3789 If a register is written unconditionally (no qualifying predicate),
3790 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
3792 If a register is written if its qualifying predicate P is true, we
3793 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
3794 may be written again by the complement of P (P^1) and when this happens,
3795 WRITE_COUNT gets set to 2.
3797 The result of this is that whenever an insn attempts to write a register
3798 whose WRITE_COUNT is two, we need to issue a insn group barrier first.
3800 If a predicate register is written by a floating-point insn, we set
3801 WRITTEN_BY_FP to true.
3803 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
3804 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
3806 struct reg_write_state
3808 unsigned int write_count : 2;
3809 unsigned int first_pred : 16;
3810 unsigned int written_by_fp : 1;
3811 unsigned int written_by_and : 1;
3812 unsigned int written_by_or : 1;
3815 /* Cumulative info for the current instruction group. */
3816 struct reg_write_state rws_sum[NUM_REGS];
3817 /* Info for the current instruction. This gets copied to rws_sum after a
3818 stop bit is emitted. */
3819 struct reg_write_state rws_insn[NUM_REGS];
3821 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
3822 RTL for one instruction. */
3823 struct reg_flags
3825 unsigned int is_write : 1; /* Is register being written? */
3826 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
3827 unsigned int is_branch : 1; /* Is register used as part of a branch? */
3828 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
3829 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
3830 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
3833 static void rws_update PARAMS ((struct reg_write_state *, int,
3834 struct reg_flags, int));
3835 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
3836 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
3837 static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
3838 static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
3839 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
3840 static void init_insn_group_barriers PARAMS ((void));
3841 static int group_barrier_needed_p PARAMS ((rtx));
3842 static int safe_group_barrier_needed_p PARAMS ((rtx));
3844 /* Update *RWS for REGNO, which is being written by the current instruction,
3845 with predicate PRED, and associated register flags in FLAGS. */
3847 static void
3848 rws_update (rws, regno, flags, pred)
3849 struct reg_write_state *rws;
3850 int regno;
3851 struct reg_flags flags;
3852 int pred;
3854 rws[regno].write_count += pred ? 1 : 2;
3855 rws[regno].written_by_fp |= flags.is_fp;
3856 /* ??? Not tracking and/or across differing predicates. */
3857 rws[regno].written_by_and = flags.is_and;
3858 rws[regno].written_by_or = flags.is_or;
3859 rws[regno].first_pred = pred;
3862 /* Handle an access to register REGNO of type FLAGS using predicate register
3863 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
3864 a dependency with an earlier instruction in the same group. */
3866 static int
3867 rws_access_regno (regno, flags, pred)
3868 int regno;
3869 struct reg_flags flags;
3870 int pred;
3872 int need_barrier = 0;
3874 if (regno >= NUM_REGS)
3875 abort ();
3877 if (! PR_REGNO_P (regno))
3878 flags.is_and = flags.is_or = 0;
3880 if (flags.is_write)
3882 int write_count;
3884 /* One insn writes same reg multiple times? */
3885 if (rws_insn[regno].write_count > 0)
3886 abort ();
3888 /* Update info for current instruction. */
3889 rws_update (rws_insn, regno, flags, pred);
3890 write_count = rws_sum[regno].write_count;
3892 switch (write_count)
3894 case 0:
3895 /* The register has not been written yet. */
3896 rws_update (rws_sum, regno, flags, pred);
3897 break;
3899 case 1:
3900 /* The register has been written via a predicate. If this is
3901 not a complementary predicate, then we need a barrier. */
3902 /* ??? This assumes that P and P+1 are always complementary
3903 predicates for P even. */
3904 if (flags.is_and && rws_sum[regno].written_by_and)
3906 else if (flags.is_or && rws_sum[regno].written_by_or)
3908 else if ((rws_sum[regno].first_pred ^ 1) != pred)
3909 need_barrier = 1;
3910 rws_update (rws_sum, regno, flags, pred);
3911 break;
3913 case 2:
3914 /* The register has been unconditionally written already. We
3915 need a barrier. */
3916 if (flags.is_and && rws_sum[regno].written_by_and)
3918 else if (flags.is_or && rws_sum[regno].written_by_or)
3920 else
3921 need_barrier = 1;
3922 rws_sum[regno].written_by_and = flags.is_and;
3923 rws_sum[regno].written_by_or = flags.is_or;
3924 break;
3926 default:
3927 abort ();
3930 else
3932 if (flags.is_branch)
3934 /* Branches have several RAW exceptions that allow to avoid
3935 barriers. */
3937 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
3938 /* RAW dependencies on branch regs are permissible as long
3939 as the writer is a non-branch instruction. Since we
3940 never generate code that uses a branch register written
3941 by a branch instruction, handling this case is
3942 easy. */
3943 return 0;
3945 if (REGNO_REG_CLASS (regno) == PR_REGS
3946 && ! rws_sum[regno].written_by_fp)
3947 /* The predicates of a branch are available within the
3948 same insn group as long as the predicate was written by
3949 something other than a floating-point instruction. */
3950 return 0;
3953 if (flags.is_and && rws_sum[regno].written_by_and)
3954 return 0;
3955 if (flags.is_or && rws_sum[regno].written_by_or)
3956 return 0;
3958 switch (rws_sum[regno].write_count)
3960 case 0:
3961 /* The register has not been written yet. */
3962 break;
3964 case 1:
3965 /* The register has been written via a predicate. If this is
3966 not a complementary predicate, then we need a barrier. */
3967 /* ??? This assumes that P and P+1 are always complementary
3968 predicates for P even. */
3969 if ((rws_sum[regno].first_pred ^ 1) != pred)
3970 need_barrier = 1;
3971 break;
3973 case 2:
3974 /* The register has been unconditionally written already. We
3975 need a barrier. */
3976 need_barrier = 1;
3977 break;
3979 default:
3980 abort ();
3984 return need_barrier;
3987 static int
3988 rws_access_reg (reg, flags, pred)
3989 rtx reg;
3990 struct reg_flags flags;
3991 int pred;
3993 int regno = REGNO (reg);
3994 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
3996 if (n == 1)
3997 return rws_access_regno (regno, flags, pred);
3998 else
4000 int need_barrier = 0;
4001 while (--n >= 0)
4002 need_barrier |= rws_access_regno (regno + n, flags, pred);
4003 return need_barrier;
4007 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4008 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4010 static void
4011 update_set_flags (x, pflags, ppred, pcond)
4012 rtx x;
4013 struct reg_flags *pflags;
4014 int *ppred;
4015 rtx *pcond;
4017 rtx src = SET_SRC (x);
4019 *pcond = 0;
4021 switch (GET_CODE (src))
4023 case CALL:
4024 return;
4026 case IF_THEN_ELSE:
4027 if (SET_DEST (x) == pc_rtx)
4028 /* X is a conditional branch. */
4029 return;
4030 else
4032 int is_complemented = 0;
4034 /* X is a conditional move. */
4035 rtx cond = XEXP (src, 0);
4036 if (GET_CODE (cond) == EQ)
4037 is_complemented = 1;
4038 cond = XEXP (cond, 0);
4039 if (GET_CODE (cond) != REG
4040 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4041 abort ();
4042 *pcond = cond;
4043 if (XEXP (src, 1) == SET_DEST (x)
4044 || XEXP (src, 2) == SET_DEST (x))
4046 /* X is a conditional move that conditionally writes the
4047 destination. */
4049 /* We need another complement in this case. */
4050 if (XEXP (src, 1) == SET_DEST (x))
4051 is_complemented = ! is_complemented;
4053 *ppred = REGNO (cond);
4054 if (is_complemented)
4055 ++*ppred;
4058 /* ??? If this is a conditional write to the dest, then this
4059 instruction does not actually read one source. This probably
4060 doesn't matter, because that source is also the dest. */
4061 /* ??? Multiple writes to predicate registers are allowed
4062 if they are all AND type compares, or if they are all OR
4063 type compares. We do not generate such instructions
4064 currently. */
4066 /* ... fall through ... */
4068 default:
4069 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4070 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4071 /* Set pflags->is_fp to 1 so that we know we're dealing
4072 with a floating point comparison when processing the
4073 destination of the SET. */
4074 pflags->is_fp = 1;
4076 /* Discover if this is a parallel comparison. We only handle
4077 and.orcm and or.andcm at present, since we must retain a
4078 strict inverse on the predicate pair. */
4079 else if (GET_CODE (src) == AND)
4080 pflags->is_and = 1;
4081 else if (GET_CODE (src) == IOR)
4082 pflags->is_or = 1;
4084 break;
4088 /* Subroutine of rtx_needs_barrier; this function determines whether the
4089 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4090 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4091 for this insn. */
4093 static int
4094 set_src_needs_barrier (x, flags, pred, cond)
4095 rtx x;
4096 struct reg_flags flags;
4097 int pred;
4098 rtx cond;
4100 int need_barrier = 0;
4101 rtx dst;
4102 rtx src = SET_SRC (x);
4104 if (GET_CODE (src) == CALL)
4105 /* We don't need to worry about the result registers that
4106 get written by subroutine call. */
4107 return rtx_needs_barrier (src, flags, pred);
4108 else if (SET_DEST (x) == pc_rtx)
4110 /* X is a conditional branch. */
4111 /* ??? This seems redundant, as the caller sets this bit for
4112 all JUMP_INSNs. */
4113 flags.is_branch = 1;
4114 return rtx_needs_barrier (src, flags, pred);
4117 need_barrier = rtx_needs_barrier (src, flags, pred);
4119 /* This instruction unconditionally uses a predicate register. */
4120 if (cond)
4121 need_barrier |= rws_access_reg (cond, flags, 0);
4123 dst = SET_DEST (x);
4124 if (GET_CODE (dst) == ZERO_EXTRACT)
4126 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4127 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4128 dst = XEXP (dst, 0);
4130 return need_barrier;
4133 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4134 Return 1 is this access creates a dependency with an earlier instruction
4135 in the same group. */
4137 static int
4138 rtx_needs_barrier (x, flags, pred)
4139 rtx x;
4140 struct reg_flags flags;
4141 int pred;
4143 int i, j;
4144 int is_complemented = 0;
4145 int need_barrier = 0;
4146 const char *format_ptr;
4147 struct reg_flags new_flags;
4148 rtx cond = 0;
4150 if (! x)
4151 return 0;
4153 new_flags = flags;
4155 switch (GET_CODE (x))
4157 case SET:
4158 update_set_flags (x, &new_flags, &pred, &cond);
4159 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4160 if (GET_CODE (SET_SRC (x)) != CALL)
4162 new_flags.is_write = 1;
4163 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4165 break;
4167 case CALL:
4168 new_flags.is_write = 0;
4169 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4171 /* Avoid multiple register writes, in case this is a pattern with
4172 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4173 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4175 new_flags.is_write = 1;
4176 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4177 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4178 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4180 break;
4182 case COND_EXEC:
4183 /* X is a predicated instruction. */
4185 cond = COND_EXEC_TEST (x);
4186 if (pred)
4187 abort ();
4188 need_barrier = rtx_needs_barrier (cond, flags, 0);
4190 if (GET_CODE (cond) == EQ)
4191 is_complemented = 1;
4192 cond = XEXP (cond, 0);
4193 if (GET_CODE (cond) != REG
4194 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4195 abort ();
4196 pred = REGNO (cond);
4197 if (is_complemented)
4198 ++pred;
4200 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4201 return need_barrier;
4203 case CLOBBER:
4204 case USE:
4205 /* Clobber & use are for earlier compiler-phases only. */
4206 break;
4208 case ASM_OPERANDS:
4209 case ASM_INPUT:
4210 /* We always emit stop bits for traditional asms. We emit stop bits
4211 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4212 if (GET_CODE (x) != ASM_OPERANDS
4213 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4215 /* Avoid writing the register multiple times if we have multiple
4216 asm outputs. This avoids an abort in rws_access_reg. */
4217 if (! rws_insn[REG_VOLATILE].write_count)
4219 new_flags.is_write = 1;
4220 rws_access_regno (REG_VOLATILE, new_flags, pred);
4222 return 1;
4225 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4226 We can not just fall through here since then we would be confused
4227 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4228 traditional asms unlike their normal usage. */
4230 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4231 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4232 need_barrier = 1;
4233 break;
4235 case PARALLEL:
4236 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4238 rtx pat = XVECEXP (x, 0, i);
4239 if (GET_CODE (pat) == SET)
4241 update_set_flags (pat, &new_flags, &pred, &cond);
4242 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4244 else if (GET_CODE (pat) == USE
4245 || GET_CODE (pat) == CALL
4246 || GET_CODE (pat) == ASM_OPERANDS)
4247 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4248 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4249 abort ();
4251 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4253 rtx pat = XVECEXP (x, 0, i);
4254 if (GET_CODE (pat) == SET)
4256 if (GET_CODE (SET_SRC (pat)) != CALL)
4258 new_flags.is_write = 1;
4259 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4260 pred);
4263 else if (GET_CODE (pat) == CLOBBER)
4264 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4266 break;
4268 case SUBREG:
4269 x = SUBREG_REG (x);
4270 /* FALLTHRU */
4271 case REG:
4272 if (REGNO (x) == AR_UNAT_REGNUM)
4274 for (i = 0; i < 64; ++i)
4275 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4277 else
4278 need_barrier = rws_access_reg (x, flags, pred);
4279 break;
4281 case MEM:
4282 /* Find the regs used in memory address computation. */
4283 new_flags.is_write = 0;
4284 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4285 break;
4287 case CONST_INT: case CONST_DOUBLE:
4288 case SYMBOL_REF: case LABEL_REF: case CONST:
4289 break;
4291 /* Operators with side-effects. */
4292 case POST_INC: case POST_DEC:
4293 if (GET_CODE (XEXP (x, 0)) != REG)
4294 abort ();
4296 new_flags.is_write = 0;
4297 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4298 new_flags.is_write = 1;
4299 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4300 break;
4302 case POST_MODIFY:
4303 if (GET_CODE (XEXP (x, 0)) != REG)
4304 abort ();
4306 new_flags.is_write = 0;
4307 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4308 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4309 new_flags.is_write = 1;
4310 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4311 break;
4313 /* Handle common unary and binary ops for efficiency. */
4314 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4315 case MOD: case UDIV: case UMOD: case AND: case IOR:
4316 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4317 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4318 case NE: case EQ: case GE: case GT: case LE:
4319 case LT: case GEU: case GTU: case LEU: case LTU:
4320 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4321 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4322 break;
4324 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4325 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4326 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4327 case SQRT: case FFS:
4328 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4329 break;
4331 case UNSPEC:
4332 switch (XINT (x, 1))
4334 case 1: /* st8.spill */
4335 case 2: /* ld8.fill */
4337 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4338 HOST_WIDE_INT bit = (offset >> 3) & 63;
4340 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4341 new_flags.is_write = (XINT (x, 1) == 1);
4342 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4343 new_flags, pred);
4344 break;
4347 case 3: /* stf.spill */
4348 case 4: /* ldf.spill */
4349 case 8: /* popcnt */
4350 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4351 break;
4353 case 7: /* pred_rel_mutex */
4354 case 9: /* pic call */
4355 case 12: /* mf */
4356 case 19: /* fetchadd_acq */
4357 case 20: /* mov = ar.bsp */
4358 case 21: /* flushrs */
4359 case 22: /* bundle selector */
4360 case 23: /* cycle display */
4361 break;
4363 case 5: /* recip_approx */
4364 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4365 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4366 break;
4368 case 13: /* cmpxchg_acq */
4369 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4370 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4371 break;
4373 default:
4374 abort ();
4376 break;
4378 case UNSPEC_VOLATILE:
4379 switch (XINT (x, 1))
4381 case 0: /* alloc */
4382 /* Alloc must always be the first instruction. Currently, we
4383 only emit it at the function start, so we don't need to worry
4384 about emitting a stop bit before it. */
4385 need_barrier = rws_access_regno (AR_PFS_REGNUM, flags, pred);
4387 new_flags.is_write = 1;
4388 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4389 return need_barrier;
4391 case 1: /* blockage */
4392 case 2: /* insn group barrier */
4393 return 0;
4395 case 5: /* set_bsp */
4396 need_barrier = 1;
4397 break;
4399 case 7: /* pred.rel.mutex */
4400 case 8: /* safe_across_calls all */
4401 case 9: /* safe_across_calls normal */
4402 return 0;
4404 default:
4405 abort ();
4407 break;
4409 case RETURN:
4410 new_flags.is_write = 0;
4411 need_barrier = rws_access_regno (REG_RP, flags, pred);
4412 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
4414 new_flags.is_write = 1;
4415 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4416 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4417 break;
4419 default:
4420 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4421 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4422 switch (format_ptr[i])
4424 case '0': /* unused field */
4425 case 'i': /* integer */
4426 case 'n': /* note */
4427 case 'w': /* wide integer */
4428 case 's': /* pointer to string */
4429 case 'S': /* optional pointer to string */
4430 break;
4432 case 'e':
4433 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4434 need_barrier = 1;
4435 break;
4437 case 'E':
4438 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4439 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4440 need_barrier = 1;
4441 break;
4443 default:
4444 abort ();
4446 break;
4448 return need_barrier;
4451 /* Clear out the state for group_barrier_needed_p at the start of a
4452 sequence of insns. */
4454 static void
4455 init_insn_group_barriers ()
4457 memset (rws_sum, 0, sizeof (rws_sum));
4460 /* Cumulative info for the current instruction group. */
4461 struct reg_write_state rws_sum[NUM_REGS];
4463 /* Given the current state, recorded by previous calls to this function,
4464 determine whether a group barrier (a stop bit) is necessary before INSN.
4465 Return nonzero if so. */
4467 static int
4468 group_barrier_needed_p (insn)
4469 rtx insn;
4471 rtx pat;
4472 int need_barrier = 0;
4473 struct reg_flags flags;
4475 memset (&flags, 0, sizeof (flags));
4476 switch (GET_CODE (insn))
4478 case NOTE:
4479 break;
4481 case BARRIER:
4482 /* A barrier doesn't imply an instruction group boundary. */
4483 break;
4485 case CODE_LABEL:
4486 memset (rws_insn, 0, sizeof (rws_insn));
4487 return 1;
4489 case CALL_INSN:
4490 flags.is_branch = 1;
4491 flags.is_sibcall = SIBLING_CALL_P (insn);
4492 memset (rws_insn, 0, sizeof (rws_insn));
4493 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
4494 break;
4496 case JUMP_INSN:
4497 flags.is_branch = 1;
4498 /* FALLTHRU */
4500 case INSN:
4501 if (GET_CODE (PATTERN (insn)) == USE
4502 || GET_CODE (PATTERN (insn)) == CLOBBER)
4503 /* Don't care about USE and CLOBBER "insns"---those are used to
4504 indicate to the optimizer that it shouldn't get rid of
4505 certain operations. */
4506 break;
4508 pat = PATTERN (insn);
4510 /* Ug. Hack hacks hacked elsewhere. */
4511 switch (recog_memoized (insn))
4513 /* We play dependency tricks with the epilogue in order
4514 to get proper schedules. Undo this for dv analysis. */
4515 case CODE_FOR_epilogue_deallocate_stack:
4516 pat = XVECEXP (pat, 0, 0);
4517 break;
4519 /* The pattern we use for br.cloop confuses the code above.
4520 The second element of the vector is representative. */
4521 case CODE_FOR_doloop_end_internal:
4522 pat = XVECEXP (pat, 0, 1);
4523 break;
4525 /* Doesn't generate code. */
4526 case CODE_FOR_pred_rel_mutex:
4527 return 0;
4529 default:
4530 break;
4533 memset (rws_insn, 0, sizeof (rws_insn));
4534 need_barrier = rtx_needs_barrier (pat, flags, 0);
4536 /* Check to see if the previous instruction was a volatile
4537 asm. */
4538 if (! need_barrier)
4539 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
4541 break;
4543 default:
4544 abort ();
4546 return need_barrier;
4549 /* Like group_barrier_needed_p, but do not clobber the current state. */
4551 static int
4552 safe_group_barrier_needed_p (insn)
4553 rtx insn;
4555 struct reg_write_state rws_saved[NUM_REGS];
4556 int t;
4557 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
4558 t = group_barrier_needed_p (insn);
4559 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
4560 return t;
4563 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
4564 as necessary to eliminate dependendencies. This function assumes that
4565 a final instruction scheduling pass has been run which has already
4566 inserted most of the necessary stop bits. This function only inserts
4567 new ones at basic block boundaries, since these are invisible to the
4568 scheduler. */
4570 static void
4571 emit_insn_group_barriers (dump, insns)
4572 FILE *dump;
4573 rtx insns;
4575 rtx insn;
4576 rtx last_label = 0;
4577 int insns_since_last_label = 0;
4579 init_insn_group_barriers ();
4581 for (insn = insns; insn; insn = NEXT_INSN (insn))
4583 if (GET_CODE (insn) == CODE_LABEL)
4585 if (insns_since_last_label)
4586 last_label = insn;
4587 insns_since_last_label = 0;
4589 else if (GET_CODE (insn) == NOTE
4590 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4592 if (insns_since_last_label)
4593 last_label = insn;
4594 insns_since_last_label = 0;
4596 else if (GET_CODE (insn) == INSN
4597 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4598 && XINT (PATTERN (insn), 1) == 2)
4600 init_insn_group_barriers ();
4601 last_label = 0;
4603 else if (INSN_P (insn))
4605 insns_since_last_label = 1;
4607 if (group_barrier_needed_p (insn))
4609 if (last_label)
4611 if (dump)
4612 fprintf (dump, "Emitting stop before label %d\n",
4613 INSN_UID (last_label));
4614 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
4615 insn = last_label;
4617 init_insn_group_barriers ();
4618 last_label = 0;
4625 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4626 This function has to emit all necessary group barriers. */
4628 static void
4629 emit_all_insn_group_barriers (dump, insns)
4630 FILE *dump ATTRIBUTE_UNUSED;
4631 rtx insns;
4633 rtx insn;
4635 init_insn_group_barriers ();
4637 for (insn = insns; insn; insn = NEXT_INSN (insn))
4639 if (GET_CODE (insn) == INSN
4640 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4641 && XINT (PATTERN (insn), 1) == 2)
4642 init_insn_group_barriers ();
4643 else if (INSN_P (insn))
4645 if (group_barrier_needed_p (insn))
4647 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4648 init_insn_group_barriers ();
4649 group_barrier_needed_p (insn);
4655 static int errata_find_address_regs PARAMS ((rtx *, void *));
4656 static void errata_emit_nops PARAMS ((rtx));
4657 static void fixup_errata PARAMS ((void));
4659 /* This structure is used to track some details about the previous insns
4660 groups so we can determine if it may be necessary to insert NOPs to
4661 workaround hardware errata. */
4662 static struct group
4664 HARD_REG_SET p_reg_set;
4665 HARD_REG_SET gr_reg_conditionally_set;
4666 } last_group[2];
4668 /* Index into the last_group array. */
4669 static int group_idx;
4671 /* Called through for_each_rtx; determines if a hard register that was
4672 conditionally set in the previous group is used as an address register.
4673 It ensures that for_each_rtx returns 1 in that case. */
4674 static int
4675 errata_find_address_regs (xp, data)
4676 rtx *xp;
4677 void *data ATTRIBUTE_UNUSED;
4679 rtx x = *xp;
4680 if (GET_CODE (x) != MEM)
4681 return 0;
4682 x = XEXP (x, 0);
4683 if (GET_CODE (x) == POST_MODIFY)
4684 x = XEXP (x, 0);
4685 if (GET_CODE (x) == REG)
4687 struct group *prev_group = last_group + (group_idx ^ 1);
4688 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
4689 REGNO (x)))
4690 return 1;
4691 return -1;
4693 return 0;
4696 /* Called for each insn; this function keeps track of the state in
4697 last_group and emits additional NOPs if necessary to work around
4698 an Itanium A/B step erratum. */
4699 static void
4700 errata_emit_nops (insn)
4701 rtx insn;
4703 struct group *this_group = last_group + group_idx;
4704 struct group *prev_group = last_group + (group_idx ^ 1);
4705 rtx pat = PATTERN (insn);
4706 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
4707 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
4708 enum attr_type type;
4709 rtx set = real_pat;
4711 if (GET_CODE (real_pat) == USE
4712 || GET_CODE (real_pat) == CLOBBER
4713 || GET_CODE (real_pat) == ASM_INPUT
4714 || GET_CODE (real_pat) == ADDR_VEC
4715 || GET_CODE (real_pat) == ADDR_DIFF_VEC
4716 || asm_noperands (PATTERN (insn)) >= 0)
4717 return;
4719 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
4720 parts of it. */
4722 if (GET_CODE (set) == PARALLEL)
4724 int i;
4725 set = XVECEXP (real_pat, 0, 0);
4726 for (i = 1; i < XVECLEN (real_pat, 0); i++)
4727 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
4728 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
4730 set = 0;
4731 break;
4735 if (set && GET_CODE (set) != SET)
4736 set = 0;
4738 type = get_attr_type (insn);
4740 if (type == TYPE_F
4741 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
4742 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
4744 if ((type == TYPE_M || type == TYPE_A) && cond && set
4745 && REG_P (SET_DEST (set))
4746 && GET_CODE (SET_SRC (set)) != PLUS
4747 && GET_CODE (SET_SRC (set)) != MINUS
4748 && (GET_CODE (SET_SRC (set)) != ASHIFT
4749 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
4750 && (GET_CODE (SET_SRC (set)) != MEM
4751 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
4752 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
4754 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
4755 || ! REG_P (XEXP (cond, 0)))
4756 abort ();
4758 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
4759 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
4761 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
4763 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4764 emit_insn_before (gen_nop (), insn);
4765 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4766 group_idx = 0;
4767 memset (last_group, 0, sizeof last_group);
4771 /* Emit extra nops if they are required to work around hardware errata. */
4773 static void
4774 fixup_errata ()
4776 rtx insn;
4778 if (! TARGET_B_STEP)
4779 return;
4781 group_idx = 0;
4782 memset (last_group, 0, sizeof last_group);
4784 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
4786 if (!INSN_P (insn))
4787 continue;
4789 if (ia64_safe_type (insn) == TYPE_S)
4791 group_idx ^= 1;
4792 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
4794 else
4795 errata_emit_nops (insn);
4799 /* Instruction scheduling support. */
4800 /* Describe one bundle. */
4802 struct bundle
4804 /* Zero if there's no possibility of a stop in this bundle other than
4805 at the end, otherwise the position of the optional stop bit. */
4806 int possible_stop;
4807 /* The types of the three slots. */
4808 enum attr_type t[3];
4809 /* The pseudo op to be emitted into the assembler output. */
4810 const char *name;
4813 #define NR_BUNDLES 10
4815 /* A list of all available bundles. */
4817 static const struct bundle bundle[NR_BUNDLES] =
4819 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
4820 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
4821 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
4822 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
4823 #if NR_BUNDLES == 10
4824 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
4825 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
4826 #endif
4827 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
4828 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
4829 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
4830 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
4831 it matches an L type insn. Otherwise we'll try to generate L type
4832 nops. */
4833 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
4836 /* Describe a packet of instructions. Packets consist of two bundles that
4837 are visible to the hardware in one scheduling window. */
4839 struct ia64_packet
4841 const struct bundle *t1, *t2;
4842 /* Precomputed value of the first split issue in this packet if a cycle
4843 starts at its beginning. */
4844 int first_split;
4845 /* For convenience, the insn types are replicated here so we don't have
4846 to go through T1 and T2 all the time. */
4847 enum attr_type t[6];
4850 /* An array containing all possible packets. */
4851 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
4852 static struct ia64_packet packets[NR_PACKETS];
4854 /* Map attr_type to a string with the name. */
4856 static const char *type_names[] =
4858 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
4861 /* Nonzero if we should insert stop bits into the schedule. */
4862 int ia64_final_schedule = 0;
4864 static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
4865 static rtx ia64_single_set PARAMS ((rtx));
4866 static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
4867 static void ia64_emit_insn_before PARAMS ((rtx, rtx));
4868 static void maybe_rotate PARAMS ((FILE *));
4869 static void finish_last_head PARAMS ((FILE *, int));
4870 static void rotate_one_bundle PARAMS ((FILE *));
4871 static void rotate_two_bundles PARAMS ((FILE *));
4872 static void cycle_end_fill_slots PARAMS ((FILE *));
4873 static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
4874 static int get_split PARAMS ((const struct ia64_packet *, int));
4875 static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
4876 const struct ia64_packet *, int));
4877 static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
4878 rtx *, enum attr_type *, int));
4879 static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
4880 static void dump_current_packet PARAMS ((FILE *));
4881 static void schedule_stop PARAMS ((FILE *));
4882 static rtx gen_nop_type PARAMS ((enum attr_type));
4883 static void ia64_emit_nops PARAMS ((void));
4885 /* Map a bundle number to its pseudo-op. */
4887 const char *
4888 get_bundle_name (b)
4889 int b;
4891 return bundle[b].name;
4894 /* Compute the slot which will cause a split issue in packet P if the
4895 current cycle begins at slot BEGIN. */
4897 static int
4898 itanium_split_issue (p, begin)
4899 const struct ia64_packet *p;
4900 int begin;
4902 int type_count[TYPE_S];
4903 int i;
4904 int split = 6;
4906 if (begin < 3)
4908 /* Always split before and after MMF. */
4909 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
4910 return 3;
4911 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
4912 return 3;
4913 /* Always split after MBB and BBB. */
4914 if (p->t[1] == TYPE_B)
4915 return 3;
4916 /* Split after first bundle in MIB BBB combination. */
4917 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
4918 return 3;
4921 memset (type_count, 0, sizeof type_count);
4922 for (i = begin; i < split; i++)
4924 enum attr_type t0 = p->t[i];
4925 /* An MLX bundle reserves the same units as an MFI bundle. */
4926 enum attr_type t = (t0 == TYPE_L ? TYPE_F
4927 : t0 == TYPE_X ? TYPE_I
4928 : t0);
4929 int max = (t == TYPE_B ? 3 : t == TYPE_F ? 1 : 2);
4930 if (type_count[t] == max)
4931 return i;
4932 type_count[t]++;
4934 return split;
4937 /* Return the maximum number of instructions a cpu can issue. */
4940 ia64_issue_rate ()
4942 return 6;
4945 /* Helper function - like single_set, but look inside COND_EXEC. */
4947 static rtx
4948 ia64_single_set (insn)
4949 rtx insn;
4951 rtx x = PATTERN (insn);
4952 if (GET_CODE (x) == COND_EXEC)
4953 x = COND_EXEC_CODE (x);
4954 if (GET_CODE (x) == SET)
4955 return x;
4956 return single_set_2 (insn, x);
4959 /* Adjust the cost of a scheduling dependency. Return the new cost of
4960 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4963 ia64_adjust_cost (insn, link, dep_insn, cost)
4964 rtx insn, link, dep_insn;
4965 int cost;
4967 enum attr_type dep_type;
4968 enum attr_itanium_class dep_class;
4969 enum attr_itanium_class insn_class;
4970 rtx dep_set, set, src, addr;
4972 if (GET_CODE (PATTERN (insn)) == CLOBBER
4973 || GET_CODE (PATTERN (insn)) == USE
4974 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
4975 || GET_CODE (PATTERN (dep_insn)) == USE
4976 /* @@@ Not accurate for indirect calls. */
4977 || GET_CODE (insn) == CALL_INSN
4978 || ia64_safe_type (insn) == TYPE_S)
4979 return 0;
4981 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
4982 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
4983 return 0;
4985 dep_type = ia64_safe_type (dep_insn);
4986 dep_class = ia64_safe_itanium_class (dep_insn);
4987 insn_class = ia64_safe_itanium_class (insn);
4989 /* Compares that feed a conditional branch can execute in the same
4990 cycle. */
4991 dep_set = ia64_single_set (dep_insn);
4992 set = ia64_single_set (insn);
4994 if (dep_type != TYPE_F
4995 && dep_set
4996 && GET_CODE (SET_DEST (dep_set)) == REG
4997 && PR_REG (REGNO (SET_DEST (dep_set)))
4998 && GET_CODE (insn) == JUMP_INSN)
4999 return 0;
5001 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5003 /* ??? Can't find any information in the documenation about whether
5004 a sequence
5005 st [rx] = ra
5006 ld rb = [ry]
5007 splits issue. Assume it doesn't. */
5008 return 0;
5011 src = set ? SET_SRC (set) : 0;
5012 addr = 0;
5013 if (set && GET_CODE (SET_DEST (set)) == MEM)
5014 addr = XEXP (SET_DEST (set), 0);
5015 else if (set && GET_CODE (src) == MEM)
5016 addr = XEXP (src, 0);
5017 else if (set && GET_CODE (src) == ZERO_EXTEND
5018 && GET_CODE (XEXP (src, 0)) == MEM)
5019 addr = XEXP (XEXP (src, 0), 0);
5020 else if (set && GET_CODE (src) == UNSPEC
5021 && XVECLEN (XEXP (src, 0), 0) > 0
5022 && GET_CODE (XVECEXP (src, 0, 0)) == MEM)
5023 addr = XEXP (XVECEXP (src, 0, 0), 0);
5024 if (addr && GET_CODE (addr) == POST_MODIFY)
5025 addr = XEXP (addr, 0);
5027 set = ia64_single_set (dep_insn);
5029 if ((dep_class == ITANIUM_CLASS_IALU
5030 || dep_class == ITANIUM_CLASS_ILOG
5031 || dep_class == ITANIUM_CLASS_LD)
5032 && (insn_class == ITANIUM_CLASS_LD
5033 || insn_class == ITANIUM_CLASS_ST))
5035 if (! addr || ! set)
5036 abort ();
5037 /* This isn't completely correct - an IALU that feeds an address has
5038 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5039 otherwise. Unfortunately there's no good way to describe this. */
5040 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5041 return cost + 1;
5043 if ((dep_class == ITANIUM_CLASS_IALU
5044 || dep_class == ITANIUM_CLASS_ILOG
5045 || dep_class == ITANIUM_CLASS_LD)
5046 && (insn_class == ITANIUM_CLASS_MMMUL
5047 || insn_class == ITANIUM_CLASS_MMSHF
5048 || insn_class == ITANIUM_CLASS_MMSHFI))
5049 return 3;
5050 if (dep_class == ITANIUM_CLASS_FMAC
5051 && (insn_class == ITANIUM_CLASS_FMISC
5052 || insn_class == ITANIUM_CLASS_FCVTFX
5053 || insn_class == ITANIUM_CLASS_XMPY))
5054 return 7;
5055 if ((dep_class == ITANIUM_CLASS_FMAC
5056 || dep_class == ITANIUM_CLASS_FMISC
5057 || dep_class == ITANIUM_CLASS_FCVTFX
5058 || dep_class == ITANIUM_CLASS_XMPY)
5059 && insn_class == ITANIUM_CLASS_STF)
5060 return 8;
5061 if ((dep_class == ITANIUM_CLASS_MMMUL
5062 || dep_class == ITANIUM_CLASS_MMSHF
5063 || dep_class == ITANIUM_CLASS_MMSHFI)
5064 && (insn_class == ITANIUM_CLASS_LD
5065 || insn_class == ITANIUM_CLASS_ST
5066 || insn_class == ITANIUM_CLASS_IALU
5067 || insn_class == ITANIUM_CLASS_ILOG
5068 || insn_class == ITANIUM_CLASS_ISHF))
5069 return 4;
5071 return cost;
5074 /* Describe the current state of the Itanium pipeline. */
5075 static struct
5077 /* The first slot that is used in the current cycle. */
5078 int first_slot;
5079 /* The next slot to fill. */
5080 int cur;
5081 /* The packet we have selected for the current issue window. */
5082 const struct ia64_packet *packet;
5083 /* The position of the split issue that occurs due to issue width
5084 limitations (6 if there's no split issue). */
5085 int split;
5086 /* Record data about the insns scheduled so far in the same issue
5087 window. The elements up to but not including FIRST_SLOT belong
5088 to the previous cycle, the ones starting with FIRST_SLOT belong
5089 to the current cycle. */
5090 enum attr_type types[6];
5091 rtx insns[6];
5092 int stopbit[6];
5093 /* Nonzero if we decided to schedule a stop bit. */
5094 int last_was_stop;
5095 } sched_data;
5097 /* Temporary arrays; they have enough elements to hold all insns that
5098 can be ready at the same time while scheduling of the current block.
5099 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5100 static rtx *sched_ready;
5101 static enum attr_type *sched_types;
5103 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5104 of packet P. */
5106 static int
5107 insn_matches_slot (p, itype, slot, insn)
5108 const struct ia64_packet *p;
5109 enum attr_type itype;
5110 int slot;
5111 rtx insn;
5113 enum attr_itanium_requires_unit0 u0;
5114 enum attr_type stype = p->t[slot];
5116 if (insn)
5118 u0 = ia64_safe_itanium_requires_unit0 (insn);
5119 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5121 int i;
5122 for (i = sched_data.first_slot; i < slot; i++)
5123 if (p->t[i] == stype)
5124 return 0;
5126 if (GET_CODE (insn) == CALL_INSN)
5128 /* Reject calls in multiway branch packets. We want to limit
5129 the number of multiway branches we generate (since the branch
5130 predictor is limited), and this seems to work fairly well.
5131 (If we didn't do this, we'd have to add another test here to
5132 force calls into the third slot of the bundle.) */
5133 if (slot < 3)
5135 if (p->t[1] == TYPE_B)
5136 return 0;
5138 else
5140 if (p->t[4] == TYPE_B)
5141 return 0;
5146 if (itype == stype)
5147 return 1;
5148 if (itype == TYPE_A)
5149 return stype == TYPE_M || stype == TYPE_I;
5150 return 0;
5153 /* Like emit_insn_before, but skip cycle_display insns. This makes the
5154 assembly output a bit prettier. */
5156 static void
5157 ia64_emit_insn_before (insn, before)
5158 rtx insn, before;
5160 rtx prev = PREV_INSN (before);
5161 if (prev && GET_CODE (prev) == INSN
5162 && GET_CODE (PATTERN (prev)) == UNSPEC
5163 && XINT (PATTERN (prev), 1) == 23)
5164 before = prev;
5165 emit_insn_before (insn, before);
5168 #if 0
5169 /* Generate a nop insn of the given type. Note we never generate L type
5170 nops. */
5172 static rtx
5173 gen_nop_type (t)
5174 enum attr_type t;
5176 switch (t)
5178 case TYPE_M:
5179 return gen_nop_m ();
5180 case TYPE_I:
5181 return gen_nop_i ();
5182 case TYPE_B:
5183 return gen_nop_b ();
5184 case TYPE_F:
5185 return gen_nop_f ();
5186 case TYPE_X:
5187 return gen_nop_x ();
5188 default:
5189 abort ();
5192 #endif
5194 /* When rotating a bundle out of the issue window, insert a bundle selector
5195 insn in front of it. DUMP is the scheduling dump file or NULL. START
5196 is either 0 or 3, depending on whether we want to emit a bundle selector
5197 for the first bundle or the second bundle in the current issue window.
5199 The selector insns are emitted this late because the selected packet can
5200 be changed until parts of it get rotated out. */
5202 static void
5203 finish_last_head (dump, start)
5204 FILE *dump;
5205 int start;
5207 const struct ia64_packet *p = sched_data.packet;
5208 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5209 int bundle_type = b - bundle;
5210 rtx insn;
5211 int i;
5213 if (! ia64_final_schedule)
5214 return;
5216 for (i = start; sched_data.insns[i] == 0; i++)
5217 if (i == start + 3)
5218 abort ();
5219 insn = sched_data.insns[i];
5221 if (dump)
5222 fprintf (dump, "// Emitting template before %d: %s\n",
5223 INSN_UID (insn), b->name);
5225 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5228 /* We can't schedule more insns this cycle. Fix up the scheduling state
5229 and advance FIRST_SLOT and CUR.
5230 We have to distribute the insns that are currently found between
5231 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5232 far, they are stored successively in the fields starting at FIRST_SLOT;
5233 now they must be moved to the correct slots.
5234 DUMP is the current scheduling dump file, or NULL. */
5236 static void
5237 cycle_end_fill_slots (dump)
5238 FILE *dump;
5240 const struct ia64_packet *packet = sched_data.packet;
5241 int slot, i;
5242 enum attr_type tmp_types[6];
5243 rtx tmp_insns[6];
5245 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5246 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5248 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5250 enum attr_type t = tmp_types[i];
5251 if (t != ia64_safe_type (tmp_insns[i]))
5252 abort ();
5253 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5255 if (slot > sched_data.split)
5256 abort ();
5257 if (dump)
5258 fprintf (dump, "// Packet needs %s, have %s\n", type_names[packet->t[slot]],
5259 type_names[t]);
5260 sched_data.types[slot] = packet->t[slot];
5261 sched_data.insns[slot] = 0;
5262 sched_data.stopbit[slot] = 0;
5263 slot++;
5265 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5266 actual slot type later. */
5267 sched_data.types[slot] = packet->t[slot];
5268 sched_data.insns[slot] = tmp_insns[i];
5269 sched_data.stopbit[slot] = 0;
5270 slot++;
5273 /* This isn't right - there's no need to pad out until the forced split;
5274 the CPU will automatically split if an insn isn't ready. */
5275 #if 0
5276 while (slot < sched_data.split)
5278 sched_data.types[slot] = packet->t[slot];
5279 sched_data.insns[slot] = 0;
5280 sched_data.stopbit[slot] = 0;
5281 slot++;
5283 #endif
5285 sched_data.first_slot = sched_data.cur = slot;
5288 /* Bundle rotations, as described in the Itanium optimization manual.
5289 We can rotate either one or both bundles out of the issue window.
5290 DUMP is the current scheduling dump file, or NULL. */
5292 static void
5293 rotate_one_bundle (dump)
5294 FILE *dump;
5296 if (dump)
5297 fprintf (dump, "// Rotating one bundle.\n");
5299 finish_last_head (dump, 0);
5300 if (sched_data.cur > 3)
5302 sched_data.cur -= 3;
5303 sched_data.first_slot -= 3;
5304 memmove (sched_data.types,
5305 sched_data.types + 3,
5306 sched_data.cur * sizeof *sched_data.types);
5307 memmove (sched_data.stopbit,
5308 sched_data.stopbit + 3,
5309 sched_data.cur * sizeof *sched_data.stopbit);
5310 memmove (sched_data.insns,
5311 sched_data.insns + 3,
5312 sched_data.cur * sizeof *sched_data.insns);
5314 else
5316 sched_data.cur = 0;
5317 sched_data.first_slot = 0;
5321 static void
5322 rotate_two_bundles (dump)
5323 FILE *dump;
5325 if (dump)
5326 fprintf (dump, "// Rotating two bundles.\n");
5328 if (sched_data.cur == 0)
5329 return;
5331 finish_last_head (dump, 0);
5332 if (sched_data.cur > 3)
5333 finish_last_head (dump, 3);
5334 sched_data.cur = 0;
5335 sched_data.first_slot = 0;
5338 /* We're beginning a new block. Initialize data structures as necessary. */
5340 void
5341 ia64_sched_init (dump, sched_verbose, max_ready)
5342 FILE *dump ATTRIBUTE_UNUSED;
5343 int sched_verbose ATTRIBUTE_UNUSED;
5344 int max_ready;
5346 static int initialized = 0;
5348 if (! initialized)
5350 int b1, b2, i;
5352 initialized = 1;
5354 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
5356 const struct bundle *t1 = bundle + b1;
5357 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
5359 const struct bundle *t2 = bundle + b2;
5361 packets[i].t1 = t1;
5362 packets[i].t2 = t2;
5365 for (i = 0; i < NR_PACKETS; i++)
5367 int j;
5368 for (j = 0; j < 3; j++)
5369 packets[i].t[j] = packets[i].t1->t[j];
5370 for (j = 0; j < 3; j++)
5371 packets[i].t[j + 3] = packets[i].t2->t[j];
5372 packets[i].first_split = itanium_split_issue (packets + i, 0);
5377 init_insn_group_barriers ();
5379 memset (&sched_data, 0, sizeof sched_data);
5380 sched_types = (enum attr_type *) xmalloc (max_ready
5381 * sizeof (enum attr_type));
5382 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
5385 /* See if the packet P can match the insns we have already scheduled. Return
5386 nonzero if so. In *PSLOT, we store the first slot that is available for
5387 more instructions if we choose this packet.
5388 SPLIT holds the last slot we can use, there's a split issue after it so
5389 scheduling beyond it would cause us to use more than one cycle. */
5391 static int
5392 packet_matches_p (p, split, pslot)
5393 const struct ia64_packet *p;
5394 int split;
5395 int *pslot;
5397 int filled = sched_data.cur;
5398 int first = sched_data.first_slot;
5399 int i, slot;
5401 /* First, check if the first of the two bundles must be a specific one (due
5402 to stop bits). */
5403 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
5404 return 0;
5405 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
5406 return 0;
5408 for (i = 0; i < first; i++)
5409 if (! insn_matches_slot (p, sched_data.types[i], i,
5410 sched_data.insns[i]))
5411 return 0;
5412 for (i = slot = first; i < filled; i++)
5414 while (slot < split)
5416 if (insn_matches_slot (p, sched_data.types[i], slot,
5417 sched_data.insns[i]))
5418 break;
5419 slot++;
5421 if (slot == split)
5422 return 0;
5423 slot++;
5426 if (pslot)
5427 *pslot = slot;
5428 return 1;
5431 /* A frontend for itanium_split_issue. For a packet P and a slot
5432 number FIRST that describes the start of the current clock cycle,
5433 return the slot number of the first split issue. This function
5434 uses the cached number found in P if possible. */
5436 static int
5437 get_split (p, first)
5438 const struct ia64_packet *p;
5439 int first;
5441 if (first == 0)
5442 return p->first_split;
5443 return itanium_split_issue (p, first);
5446 /* Given N_READY insns in the array READY, whose types are found in the
5447 corresponding array TYPES, return the insn that is best suited to be
5448 scheduled in slot SLOT of packet P. */
5450 static int
5451 find_best_insn (ready, types, n_ready, p, slot)
5452 rtx *ready;
5453 enum attr_type *types;
5454 int n_ready;
5455 const struct ia64_packet *p;
5456 int slot;
5458 int best = -1;
5459 int best_pri = 0;
5460 while (n_ready-- > 0)
5462 rtx insn = ready[n_ready];
5463 if (! insn)
5464 continue;
5465 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
5466 break;
5467 /* If we have equally good insns, one of which has a stricter
5468 slot requirement, prefer the one with the stricter requirement. */
5469 if (best >= 0 && types[n_ready] == TYPE_A)
5470 continue;
5471 if (insn_matches_slot (p, types[n_ready], slot, insn))
5473 best = n_ready;
5474 best_pri = INSN_PRIORITY (ready[best]);
5476 /* If there's no way we could get a stricter requirement, stop
5477 looking now. */
5478 if (types[n_ready] != TYPE_A
5479 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
5480 break;
5481 break;
5484 return best;
5487 /* Select the best packet to use given the current scheduler state and the
5488 current ready list.
5489 READY is an array holding N_READY ready insns; TYPES is a corresponding
5490 array that holds their types. Store the best packet in *PPACKET and the
5491 number of insns that can be scheduled in the current cycle in *PBEST. */
5493 static void
5494 find_best_packet (pbest, ppacket, ready, types, n_ready)
5495 int *pbest;
5496 const struct ia64_packet **ppacket;
5497 rtx *ready;
5498 enum attr_type *types;
5499 int n_ready;
5501 int first = sched_data.first_slot;
5502 int best = 0;
5503 int lowest_end = 6;
5504 const struct ia64_packet *best_packet = NULL;
5505 int i;
5507 for (i = 0; i < NR_PACKETS; i++)
5509 const struct ia64_packet *p = packets + i;
5510 int slot;
5511 int split = get_split (p, first);
5512 int win = 0;
5513 int first_slot, last_slot;
5514 int b_nops = 0;
5516 if (! packet_matches_p (p, split, &first_slot))
5517 continue;
5519 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
5521 win = 0;
5522 last_slot = 6;
5523 for (slot = first_slot; slot < split; slot++)
5525 int insn_nr;
5527 /* Disallow a degenerate case where the first bundle doesn't
5528 contain anything but NOPs! */
5529 if (first_slot == 0 && win == 0 && slot == 3)
5531 win = -1;
5532 break;
5535 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
5536 if (insn_nr >= 0)
5538 sched_ready[insn_nr] = 0;
5539 last_slot = slot;
5540 win++;
5542 else if (p->t[slot] == TYPE_B)
5543 b_nops++;
5545 /* We must disallow MBB/BBB packets if any of their B slots would be
5546 filled with nops. */
5547 if (last_slot < 3)
5549 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
5550 win = -1;
5552 else
5554 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
5555 win = -1;
5558 if (win > best
5559 || (win == best && last_slot < lowest_end))
5561 best = win;
5562 lowest_end = last_slot;
5563 best_packet = p;
5566 *pbest = best;
5567 *ppacket = best_packet;
5570 /* Reorder the ready list so that the insns that can be issued in this cycle
5571 are found in the correct order at the end of the list.
5572 DUMP is the scheduling dump file, or NULL. READY points to the start,
5573 E_READY to the end of the ready list. MAY_FAIL determines what should be
5574 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5575 otherwise we return 0.
5576 Return 1 if any insns can be scheduled in this cycle. */
5578 static int
5579 itanium_reorder (dump, ready, e_ready, may_fail)
5580 FILE *dump;
5581 rtx *ready;
5582 rtx *e_ready;
5583 int may_fail;
5585 const struct ia64_packet *best_packet;
5586 int n_ready = e_ready - ready;
5587 int first = sched_data.first_slot;
5588 int i, best, best_split, filled;
5590 for (i = 0; i < n_ready; i++)
5591 sched_types[i] = ia64_safe_type (ready[i]);
5593 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
5595 if (best == 0)
5597 if (may_fail)
5598 return 0;
5599 abort ();
5602 if (dump)
5604 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
5605 best_packet->t1->name,
5606 best_packet->t2 ? best_packet->t2->name : NULL, best);
5609 best_split = itanium_split_issue (best_packet, first);
5610 packet_matches_p (best_packet, best_split, &filled);
5612 for (i = filled; i < best_split; i++)
5614 int insn_nr;
5616 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
5617 if (insn_nr >= 0)
5619 rtx insn = ready[insn_nr];
5620 memmove (ready + insn_nr, ready + insn_nr + 1,
5621 (n_ready - insn_nr - 1) * sizeof (rtx));
5622 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
5623 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
5624 ready[--n_ready] = insn;
5628 sched_data.packet = best_packet;
5629 sched_data.split = best_split;
5630 return 1;
5633 /* Dump information about the current scheduling state to file DUMP. */
5635 static void
5636 dump_current_packet (dump)
5637 FILE *dump;
5639 int i;
5640 fprintf (dump, "// %d slots filled:", sched_data.cur);
5641 for (i = 0; i < sched_data.first_slot; i++)
5643 rtx insn = sched_data.insns[i];
5644 fprintf (dump, " %s", type_names[sched_data.types[i]]);
5645 if (insn)
5646 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
5647 if (sched_data.stopbit[i])
5648 fprintf (dump, " ;;");
5650 fprintf (dump, " :::");
5651 for (i = sched_data.first_slot; i < sched_data.cur; i++)
5653 rtx insn = sched_data.insns[i];
5654 enum attr_type t = ia64_safe_type (insn);
5655 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
5657 fprintf (dump, "\n");
5660 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
5661 NULL. */
5663 static void
5664 schedule_stop (dump)
5665 FILE *dump;
5667 const struct ia64_packet *best = sched_data.packet;
5668 int i;
5669 int best_stop = 6;
5671 if (dump)
5672 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
5674 if (sched_data.cur == 0)
5676 if (dump)
5677 fprintf (dump, "// At start of bundle, so nothing to do.\n");
5679 rotate_two_bundles (NULL);
5680 return;
5683 for (i = -1; i < NR_PACKETS; i++)
5685 /* This is a slight hack to give the current packet the first chance.
5686 This is done to avoid e.g. switching from MIB to MBB bundles. */
5687 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
5688 int split = get_split (p, sched_data.first_slot);
5689 const struct bundle *compare;
5690 int next, stoppos;
5692 if (! packet_matches_p (p, split, &next))
5693 continue;
5695 compare = next > 3 ? p->t2 : p->t1;
5697 stoppos = 3;
5698 if (compare->possible_stop)
5699 stoppos = compare->possible_stop;
5700 if (next > 3)
5701 stoppos += 3;
5703 if (stoppos < next || stoppos >= best_stop)
5705 if (compare->possible_stop == 0)
5706 continue;
5707 stoppos = (next > 3 ? 6 : 3);
5709 if (stoppos < next || stoppos >= best_stop)
5710 continue;
5712 if (dump)
5713 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
5714 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
5715 stoppos);
5717 best_stop = stoppos;
5718 best = p;
5721 sched_data.packet = best;
5722 cycle_end_fill_slots (dump);
5723 while (sched_data.cur < best_stop)
5725 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
5726 sched_data.insns[sched_data.cur] = 0;
5727 sched_data.stopbit[sched_data.cur] = 0;
5728 sched_data.cur++;
5730 sched_data.stopbit[sched_data.cur - 1] = 1;
5731 sched_data.first_slot = best_stop;
5733 if (dump)
5734 dump_current_packet (dump);
5737 /* If necessary, perform one or two rotations on the scheduling state.
5738 This should only be called if we are starting a new cycle. */
5740 static void
5741 maybe_rotate (dump)
5742 FILE *dump;
5744 if (sched_data.cur == 6)
5745 rotate_two_bundles (dump);
5746 else if (sched_data.cur >= 3)
5747 rotate_one_bundle (dump);
5748 sched_data.first_slot = sched_data.cur;
5751 /* We are about to being issuing insns for this clock cycle.
5752 Override the default sort algorithm to better slot instructions. */
5755 ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, reorder_type)
5756 FILE *dump ATTRIBUTE_UNUSED;
5757 int sched_verbose ATTRIBUTE_UNUSED;
5758 rtx *ready;
5759 int *pn_ready;
5760 int reorder_type;
5762 int n_ready = *pn_ready;
5763 rtx *e_ready = ready + n_ready;
5764 rtx *insnp;
5765 rtx highest;
5767 if (sched_verbose)
5769 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
5770 dump_current_packet (dump);
5773 if (reorder_type == 0)
5774 maybe_rotate (sched_verbose ? dump : NULL);
5776 /* First, move all USEs, CLOBBERs and other crud out of the way. */
5777 highest = ready[n_ready - 1];
5778 for (insnp = ready; insnp < e_ready; insnp++)
5779 if (insnp < e_ready)
5781 rtx insn = *insnp;
5782 enum attr_type t = ia64_safe_type (insn);
5783 if (t == TYPE_UNKNOWN)
5785 highest = ready[n_ready - 1];
5786 ready[n_ready - 1] = insn;
5787 *insnp = highest;
5788 if (ia64_final_schedule && group_barrier_needed_p (insn))
5790 schedule_stop (sched_verbose ? dump : NULL);
5791 sched_data.last_was_stop = 1;
5792 maybe_rotate (sched_verbose ? dump : NULL);
5794 else if (GET_CODE (PATTERN (insn)) == ASM_INPUT
5795 || asm_noperands (PATTERN (insn)) >= 0)
5797 /* It must be an asm of some kind. */
5798 cycle_end_fill_slots (sched_verbose ? dump : NULL);
5800 return 1;
5804 if (ia64_final_schedule)
5806 int nr_need_stop = 0;
5808 for (insnp = ready; insnp < e_ready; insnp++)
5809 if (safe_group_barrier_needed_p (*insnp))
5810 nr_need_stop++;
5812 /* Schedule a stop bit if
5813 - all insns require a stop bit, or
5814 - we are starting a new cycle and _any_ insns require a stop bit.
5815 The reason for the latter is that if our schedule is accurate, then
5816 the additional stop won't decrease performance at this point (since
5817 there's a split issue at this point anyway), but it gives us more
5818 freedom when scheduling the currently ready insns. */
5819 if ((reorder_type == 0 && nr_need_stop)
5820 || (reorder_type == 1 && n_ready == nr_need_stop))
5822 schedule_stop (sched_verbose ? dump : NULL);
5823 sched_data.last_was_stop = 1;
5824 maybe_rotate (sched_verbose ? dump : NULL);
5825 if (reorder_type == 1)
5826 return 0;
5828 else
5830 int deleted = 0;
5831 insnp = e_ready;
5832 /* Move down everything that needs a stop bit, preserving relative
5833 order. */
5834 while (insnp-- > ready + deleted)
5835 while (insnp >= ready + deleted)
5837 rtx insn = *insnp;
5838 if (! safe_group_barrier_needed_p (insn))
5839 break;
5840 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
5841 *ready = insn;
5842 deleted++;
5844 n_ready -= deleted;
5845 ready += deleted;
5846 if (deleted != nr_need_stop)
5847 abort ();
5851 return itanium_reorder (sched_verbose ? dump : NULL,
5852 ready, e_ready, reorder_type == 1);
5855 /* Like ia64_sched_reorder, but called after issuing each insn.
5856 Override the default sort algorithm to better slot instructions. */
5859 ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
5860 FILE *dump ATTRIBUTE_UNUSED;
5861 int sched_verbose ATTRIBUTE_UNUSED;
5862 rtx *ready;
5863 int *pn_ready;
5864 int clock_var ATTRIBUTE_UNUSED;
5866 if (sched_data.last_was_stop)
5867 return 0;
5869 /* Detect one special case and try to optimize it.
5870 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
5871 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
5872 if (sched_data.first_slot == 1
5873 && sched_data.stopbit[0]
5874 && ((sched_data.cur == 4
5875 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
5876 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
5877 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
5878 || (sched_data.cur == 3
5879 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
5880 && (sched_data.types[2] != TYPE_M && sched_data.types[2] != TYPE_I
5881 && sched_data.types[2] != TYPE_A))))
5884 int i, best;
5885 rtx stop = PREV_INSN (sched_data.insns[1]);
5886 rtx pat;
5888 sched_data.stopbit[0] = 0;
5889 sched_data.stopbit[2] = 1;
5890 if (GET_CODE (stop) != INSN)
5891 abort ();
5893 pat = PATTERN (stop);
5894 /* Ignore cycle displays. */
5895 if (GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 23)
5896 stop = PREV_INSN (stop);
5897 pat = PATTERN (stop);
5898 if (GET_CODE (pat) != UNSPEC_VOLATILE
5899 || XINT (pat, 1) != 2
5900 || INTVAL (XVECEXP (pat, 0, 0)) != 1)
5901 abort ();
5902 XVECEXP (pat, 0, 0) = GEN_INT (3);
5904 sched_data.types[5] = sched_data.types[3];
5905 sched_data.types[4] = sched_data.types[2];
5906 sched_data.types[3] = sched_data.types[1];
5907 sched_data.insns[5] = sched_data.insns[3];
5908 sched_data.insns[4] = sched_data.insns[2];
5909 sched_data.insns[3] = sched_data.insns[1];
5910 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
5911 sched_data.cur += 2;
5912 sched_data.first_slot = 3;
5913 for (i = 0; i < NR_PACKETS; i++)
5915 const struct ia64_packet *p = packets + i;
5916 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
5918 sched_data.packet = p;
5919 break;
5922 rotate_one_bundle (sched_verbose ? dump : NULL);
5924 best = 6;
5925 for (i = 0; i < NR_PACKETS; i++)
5927 const struct ia64_packet *p = packets + i;
5928 int split = get_split (p, sched_data.first_slot);
5929 int next;
5931 /* Disallow multiway branches here. */
5932 if (p->t[1] == TYPE_B)
5933 continue;
5935 if (packet_matches_p (p, split, &next) && next < best)
5937 best = next;
5938 sched_data.packet = p;
5939 sched_data.split = split;
5942 if (best == 6)
5943 abort ();
5946 if (*pn_ready > 0)
5948 int more = ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, 1);
5949 if (more)
5950 return more;
5951 /* Did we schedule a stop? If so, finish this cycle. */
5952 if (sched_data.cur == sched_data.first_slot)
5953 return 0;
5956 if (sched_verbose)
5957 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
5959 cycle_end_fill_slots (sched_verbose ? dump : NULL);
5960 if (sched_verbose)
5961 dump_current_packet (dump);
5962 return 0;
5965 /* We are about to issue INSN. Return the number of insns left on the
5966 ready queue that can be issued this cycle. */
5969 ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
5970 FILE *dump;
5971 int sched_verbose;
5972 rtx insn;
5973 int can_issue_more ATTRIBUTE_UNUSED;
5975 enum attr_type t = ia64_safe_type (insn);
5977 if (sched_data.last_was_stop)
5979 int t = sched_data.first_slot;
5980 if (t == 0)
5981 t = 3;
5982 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
5983 init_insn_group_barriers ();
5984 sched_data.last_was_stop = 0;
5987 if (t == TYPE_UNKNOWN)
5989 if (sched_verbose)
5990 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
5991 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
5992 || asm_noperands (PATTERN (insn)) >= 0)
5994 /* This must be some kind of asm. Clear the scheduling state. */
5995 rotate_two_bundles (sched_verbose ? dump : NULL);
5996 if (ia64_final_schedule)
5997 group_barrier_needed_p (insn);
5999 return 1;
6002 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6003 important state info. Don't delete this test. */
6004 if (ia64_final_schedule
6005 && group_barrier_needed_p (insn))
6006 abort ();
6008 sched_data.stopbit[sched_data.cur] = 0;
6009 sched_data.insns[sched_data.cur] = insn;
6010 sched_data.types[sched_data.cur] = t;
6012 sched_data.cur++;
6013 if (sched_verbose)
6014 fprintf (dump, "// Scheduling insn %d of type %s\n",
6015 INSN_UID (insn), type_names[t]);
6017 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6019 schedule_stop (sched_verbose ? dump : NULL);
6020 sched_data.last_was_stop = 1;
6023 return 1;
6026 /* Free data allocated by ia64_sched_init. */
6028 void
6029 ia64_sched_finish (dump, sched_verbose)
6030 FILE *dump;
6031 int sched_verbose;
6033 if (sched_verbose)
6034 fprintf (dump, "// Finishing schedule.\n");
6035 rotate_two_bundles (NULL);
6036 free (sched_types);
6037 free (sched_ready);
6040 /* Emit pseudo-ops for the assembler to describe predicate relations.
6041 At present this assumes that we only consider predicate pairs to
6042 be mutex, and that the assembler can deduce proper values from
6043 straight-line code. */
6045 static void
6046 emit_predicate_relation_info ()
6048 int i;
6050 for (i = n_basic_blocks - 1; i >= 0; --i)
6052 basic_block bb = BASIC_BLOCK (i);
6053 int r;
6054 rtx head = bb->head;
6056 /* We only need such notes at code labels. */
6057 if (GET_CODE (head) != CODE_LABEL)
6058 continue;
6059 if (GET_CODE (NEXT_INSN (head)) == NOTE
6060 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6061 head = NEXT_INSN (head);
6063 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6064 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6066 rtx p = gen_rtx_REG (BImode, r);
6067 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6068 if (head == bb->end)
6069 bb->end = n;
6070 head = n;
6074 /* Look for conditional calls that do not return, and protect predicate
6075 relations around them. Otherwise the assembler will assume the call
6076 returns, and complain about uses of call-clobbered predicates after
6077 the call. */
6078 for (i = n_basic_blocks - 1; i >= 0; --i)
6080 basic_block bb = BASIC_BLOCK (i);
6081 rtx insn = bb->head;
6083 while (1)
6085 if (GET_CODE (insn) == CALL_INSN
6086 && GET_CODE (PATTERN (insn)) == COND_EXEC
6087 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6089 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6090 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6091 if (bb->head == insn)
6092 bb->head = b;
6093 if (bb->end == insn)
6094 bb->end = a;
6097 if (insn == bb->end)
6098 break;
6099 insn = NEXT_INSN (insn);
6104 /* Generate a NOP instruction of type T. We will never generate L type
6105 nops. */
6107 static rtx
6108 gen_nop_type (t)
6109 enum attr_type t;
6111 switch (t)
6113 case TYPE_M:
6114 return gen_nop_m ();
6115 case TYPE_I:
6116 return gen_nop_i ();
6117 case TYPE_B:
6118 return gen_nop_b ();
6119 case TYPE_F:
6120 return gen_nop_f ();
6121 case TYPE_X:
6122 return gen_nop_x ();
6123 default:
6124 abort ();
6128 /* After the last scheduling pass, fill in NOPs. It's easier to do this
6129 here than while scheduling. */
6131 static void
6132 ia64_emit_nops ()
6134 rtx insn;
6135 const struct bundle *b = 0;
6136 int bundle_pos = 0;
6138 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6140 rtx pat;
6141 enum attr_type t;
6142 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
6143 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
6144 continue;
6145 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 22)
6146 || GET_CODE (insn) == CODE_LABEL)
6148 if (b)
6149 while (bundle_pos < 3)
6151 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6152 bundle_pos++;
6154 if (GET_CODE (insn) != CODE_LABEL)
6155 b = bundle + INTVAL (XVECEXP (pat, 0, 0));
6156 else
6157 b = 0;
6158 bundle_pos = 0;
6159 continue;
6161 else if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 2)
6163 int t = INTVAL (XVECEXP (pat, 0, 0));
6164 if (b)
6165 while (bundle_pos < t)
6167 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6168 bundle_pos++;
6170 continue;
6173 if (bundle_pos == 3)
6174 b = 0;
6176 if (b && INSN_P (insn))
6178 t = ia64_safe_type (insn);
6179 if (asm_noperands (PATTERN (insn)) >= 0
6180 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
6182 while (bundle_pos < 3)
6184 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6185 bundle_pos++;
6187 continue;
6190 if (t == TYPE_UNKNOWN)
6191 continue;
6192 while (bundle_pos < 3)
6194 if (t == b->t[bundle_pos]
6195 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
6196 || b->t[bundle_pos] == TYPE_I)))
6197 break;
6199 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6200 bundle_pos++;
6202 if (bundle_pos < 3)
6203 bundle_pos++;
6208 /* Perform machine dependent operations on the rtl chain INSNS. */
6210 void
6211 ia64_reorg (insns)
6212 rtx insns;
6214 /* If optimizing, we'll have split before scheduling. */
6215 if (optimize == 0)
6216 split_all_insns (0);
6218 /* Make sure the CFG and global_live_at_start are correct
6219 for emit_predicate_relation_info. */
6220 find_basic_blocks (insns, max_reg_num (), NULL);
6221 life_analysis (insns, NULL, PROP_DEATH_NOTES);
6223 if (ia64_flag_schedule_insns2)
6225 ia64_final_schedule = 1;
6226 schedule_ebbs (rtl_dump_file);
6227 ia64_final_schedule = 0;
6229 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6230 place as they were during scheduling. */
6231 emit_insn_group_barriers (rtl_dump_file, insns);
6232 ia64_emit_nops ();
6234 else
6235 emit_all_insn_group_barriers (rtl_dump_file, insns);
6237 fixup_errata ();
6238 emit_predicate_relation_info ();
6241 /* Return true if REGNO is used by the epilogue. */
6244 ia64_epilogue_uses (regno)
6245 int regno;
6247 /* When a function makes a call through a function descriptor, we
6248 will write a (potentially) new value to "gp". After returning
6249 from such a call, we need to make sure the function restores the
6250 original gp-value, even if the function itself does not use the
6251 gp anymore. */
6252 if (regno == R_GR (1)
6253 && TARGET_CONST_GP
6254 && !(TARGET_AUTO_PIC || TARGET_NO_PIC))
6255 return 1;
6257 /* For functions defined with the syscall_linkage attribute, all input
6258 registers are marked as live at all function exits. This prevents the
6259 register allocator from using the input registers, which in turn makes it
6260 possible to restart a system call after an interrupt without having to
6261 save/restore the input registers. This also prevents kernel data from
6262 leaking to application code. */
6264 if (IN_REGNO_P (regno)
6265 && lookup_attribute ("syscall_linkage",
6266 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
6267 return 1;
6269 /* Conditional return patterns can't represent the use of `b0' as
6270 the return address, so we force the value live this way. */
6271 if (regno == R_BR (0))
6272 return 1;
6274 if (regs_ever_live[AR_LC_REGNUM] && regno == AR_LC_REGNUM)
6275 return 1;
6276 if (! current_function_is_leaf && regno == AR_PFS_REGNUM)
6277 return 1;
6278 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
6279 && regno == AR_UNAT_REGNUM)
6280 return 1;
6282 return 0;
6285 /* Return true if IDENTIFIER is a valid attribute for TYPE. */
6288 ia64_valid_type_attribute (type, attributes, identifier, args)
6289 tree type;
6290 tree attributes ATTRIBUTE_UNUSED;
6291 tree identifier;
6292 tree args;
6294 /* We only support an attribute for function calls. */
6296 if (TREE_CODE (type) != FUNCTION_TYPE
6297 && TREE_CODE (type) != METHOD_TYPE)
6298 return 0;
6300 /* The "syscall_linkage" attribute says the callee is a system call entry
6301 point. This affects ia64_epilogue_uses. */
6303 if (is_attribute_p ("syscall_linkage", identifier))
6304 return args == NULL_TREE;
6306 return 0;
6309 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6311 We add @ to the name if this goes in small data/bss. We can only put
6312 a variable in small data/bss if it is defined in this module or a module
6313 that we are statically linked with. We can't check the second condition,
6314 but TREE_STATIC gives us the first one. */
6316 /* ??? If we had IPA, we could check the second condition. We could support
6317 programmer added section attributes if the variable is not defined in this
6318 module. */
6320 /* ??? See the v850 port for a cleaner way to do this. */
6322 /* ??? We could also support own long data here. Generating movl/add/ld8
6323 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6324 code faster because there is one less load. This also includes incomplete
6325 types which can't go in sdata/sbss. */
6327 /* ??? See select_section. We must put short own readonly variables in
6328 sdata/sbss instead of the more natural rodata, because we can't perform
6329 the DECL_READONLY_SECTION test here. */
6331 extern struct obstack * saveable_obstack;
6333 void
6334 ia64_encode_section_info (decl)
6335 tree decl;
6337 const char *symbol_str;
6339 if (TREE_CODE (decl) == FUNCTION_DECL)
6341 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
6342 return;
6345 /* Careful not to prod global register variables. */
6346 if (TREE_CODE (decl) != VAR_DECL
6347 || GET_CODE (DECL_RTL (decl)) != MEM
6348 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
6349 return;
6351 symbol_str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
6353 /* We assume that -fpic is used only to create a shared library (dso).
6354 With -fpic, no global data can ever be sdata.
6355 Without -fpic, global common uninitialized data can never be sdata, since
6356 it can unify with a real definition in a dso. */
6357 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
6358 to access them. The linker may then be able to do linker relaxation to
6359 optimize references to them. Currently sdata implies use of gprel. */
6360 /* We need the DECL_EXTERNAL check for C++. static class data members get
6361 both TREE_STATIC and DECL_EXTERNAL set, to indicate that they are
6362 statically allocated, but the space is allocated somewhere else. Such
6363 decls can not be own data. */
6364 if (! TARGET_NO_SDATA
6365 && TREE_STATIC (decl) && ! DECL_EXTERNAL (decl)
6366 && ! (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
6367 && ! (TREE_PUBLIC (decl)
6368 && (flag_pic
6369 || (DECL_COMMON (decl)
6370 && (DECL_INITIAL (decl) == 0
6371 || DECL_INITIAL (decl) == error_mark_node))))
6372 /* Either the variable must be declared without a section attribute,
6373 or the section must be sdata or sbss. */
6374 && (DECL_SECTION_NAME (decl) == 0
6375 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6376 ".sdata")
6377 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6378 ".sbss")))
6380 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
6382 /* If the variable has already been defined in the output file, then it
6383 is too late to put it in sdata if it wasn't put there in the first
6384 place. The test is here rather than above, because if it is already
6385 in sdata, then it can stay there. */
6387 if (TREE_ASM_WRITTEN (decl))
6390 /* If this is an incomplete type with size 0, then we can't put it in
6391 sdata because it might be too big when completed. */
6392 else if (size > 0
6393 && size <= (HOST_WIDE_INT) ia64_section_threshold
6394 && symbol_str[0] != SDATA_NAME_FLAG_CHAR)
6396 size_t len = strlen (symbol_str);
6397 char *newstr = alloca (len + 1);
6398 const char *string;
6400 *newstr = SDATA_NAME_FLAG_CHAR;
6401 memcpy (newstr + 1, symbol_str, len + 1);
6403 string = ggc_alloc_string (newstr, len + 1);
6404 XSTR (XEXP (DECL_RTL (decl), 0), 0) = string;
6407 /* This decl is marked as being in small data/bss but it shouldn't
6408 be; one likely explanation for this is that the decl has been
6409 moved into a different section from the one it was in when
6410 ENCODE_SECTION_INFO was first called. Remove the '@'.*/
6411 else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
6413 XSTR (XEXP (DECL_RTL (decl), 0), 0)
6414 = ggc_strdup (symbol_str + 1);
6418 /* Output assembly directives for prologue regions. */
6420 /* The current basic block number. */
6422 static int block_num;
6424 /* True if we need a copy_state command at the start of the next block. */
6426 static int need_copy_state;
6428 /* The function emits unwind directives for the start of an epilogue. */
6430 static void
6431 process_epilogue ()
6433 /* If this isn't the last block of the function, then we need to label the
6434 current state, and copy it back in at the start of the next block. */
6436 if (block_num != n_basic_blocks - 1)
6438 fprintf (asm_out_file, "\t.label_state 1\n");
6439 need_copy_state = 1;
6442 fprintf (asm_out_file, "\t.restore sp\n");
6445 /* This function processes a SET pattern looking for specific patterns
6446 which result in emitting an assembly directive required for unwinding. */
6448 static int
6449 process_set (asm_out_file, pat)
6450 FILE *asm_out_file;
6451 rtx pat;
6453 rtx src = SET_SRC (pat);
6454 rtx dest = SET_DEST (pat);
6455 int src_regno, dest_regno;
6457 /* Look for the ALLOC insn. */
6458 if (GET_CODE (src) == UNSPEC_VOLATILE
6459 && XINT (src, 1) == 0
6460 && GET_CODE (dest) == REG)
6462 dest_regno = REGNO (dest);
6464 /* If this isn't the final destination for ar.pfs, the alloc
6465 shouldn't have been marked frame related. */
6466 if (dest_regno != current_frame_info.reg_save_ar_pfs)
6467 abort ();
6469 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
6470 ia64_dbx_register_number (dest_regno));
6471 return 1;
6474 /* Look for SP = .... */
6475 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
6477 if (GET_CODE (src) == PLUS)
6479 rtx op0 = XEXP (src, 0);
6480 rtx op1 = XEXP (src, 1);
6481 if (op0 == dest && GET_CODE (op1) == CONST_INT)
6483 if (INTVAL (op1) < 0)
6485 fputs ("\t.fframe ", asm_out_file);
6486 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
6487 -INTVAL (op1));
6488 fputc ('\n', asm_out_file);
6490 else
6491 process_epilogue ();
6493 else
6494 abort ();
6496 else if (GET_CODE (src) == REG
6497 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
6498 process_epilogue ();
6499 else
6500 abort ();
6502 return 1;
6505 /* Register move we need to look at. */
6506 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
6508 src_regno = REGNO (src);
6509 dest_regno = REGNO (dest);
6511 switch (src_regno)
6513 case BR_REG (0):
6514 /* Saving return address pointer. */
6515 if (dest_regno != current_frame_info.reg_save_b0)
6516 abort ();
6517 fprintf (asm_out_file, "\t.save rp, r%d\n",
6518 ia64_dbx_register_number (dest_regno));
6519 return 1;
6521 case PR_REG (0):
6522 if (dest_regno != current_frame_info.reg_save_pr)
6523 abort ();
6524 fprintf (asm_out_file, "\t.save pr, r%d\n",
6525 ia64_dbx_register_number (dest_regno));
6526 return 1;
6528 case AR_UNAT_REGNUM:
6529 if (dest_regno != current_frame_info.reg_save_ar_unat)
6530 abort ();
6531 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
6532 ia64_dbx_register_number (dest_regno));
6533 return 1;
6535 case AR_LC_REGNUM:
6536 if (dest_regno != current_frame_info.reg_save_ar_lc)
6537 abort ();
6538 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
6539 ia64_dbx_register_number (dest_regno));
6540 return 1;
6542 case STACK_POINTER_REGNUM:
6543 if (dest_regno != HARD_FRAME_POINTER_REGNUM
6544 || ! frame_pointer_needed)
6545 abort ();
6546 fprintf (asm_out_file, "\t.vframe r%d\n",
6547 ia64_dbx_register_number (dest_regno));
6548 return 1;
6550 default:
6551 /* Everything else should indicate being stored to memory. */
6552 abort ();
6556 /* Memory store we need to look at. */
6557 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
6559 long off;
6560 rtx base;
6561 const char *saveop;
6563 if (GET_CODE (XEXP (dest, 0)) == REG)
6565 base = XEXP (dest, 0);
6566 off = 0;
6568 else if (GET_CODE (XEXP (dest, 0)) == PLUS
6569 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
6571 base = XEXP (XEXP (dest, 0), 0);
6572 off = INTVAL (XEXP (XEXP (dest, 0), 1));
6574 else
6575 abort ();
6577 if (base == hard_frame_pointer_rtx)
6579 saveop = ".savepsp";
6580 off = - off;
6582 else if (base == stack_pointer_rtx)
6583 saveop = ".savesp";
6584 else
6585 abort ();
6587 src_regno = REGNO (src);
6588 switch (src_regno)
6590 case BR_REG (0):
6591 if (current_frame_info.reg_save_b0 != 0)
6592 abort ();
6593 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
6594 return 1;
6596 case PR_REG (0):
6597 if (current_frame_info.reg_save_pr != 0)
6598 abort ();
6599 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
6600 return 1;
6602 case AR_LC_REGNUM:
6603 if (current_frame_info.reg_save_ar_lc != 0)
6604 abort ();
6605 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
6606 return 1;
6608 case AR_PFS_REGNUM:
6609 if (current_frame_info.reg_save_ar_pfs != 0)
6610 abort ();
6611 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
6612 return 1;
6614 case AR_UNAT_REGNUM:
6615 if (current_frame_info.reg_save_ar_unat != 0)
6616 abort ();
6617 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
6618 return 1;
6620 case GR_REG (4):
6621 case GR_REG (5):
6622 case GR_REG (6):
6623 case GR_REG (7):
6624 fprintf (asm_out_file, "\t.save.g 0x%x\n",
6625 1 << (src_regno - GR_REG (4)));
6626 return 1;
6628 case BR_REG (1):
6629 case BR_REG (2):
6630 case BR_REG (3):
6631 case BR_REG (4):
6632 case BR_REG (5):
6633 fprintf (asm_out_file, "\t.save.b 0x%x\n",
6634 1 << (src_regno - BR_REG (1)));
6635 return 1;
6637 case FR_REG (2):
6638 case FR_REG (3):
6639 case FR_REG (4):
6640 case FR_REG (5):
6641 fprintf (asm_out_file, "\t.save.f 0x%x\n",
6642 1 << (src_regno - FR_REG (2)));
6643 return 1;
6645 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
6646 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
6647 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
6648 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
6649 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
6650 1 << (src_regno - FR_REG (12)));
6651 return 1;
6653 default:
6654 return 0;
6658 return 0;
6662 /* This function looks at a single insn and emits any directives
6663 required to unwind this insn. */
6664 void
6665 process_for_unwind_directive (asm_out_file, insn)
6666 FILE *asm_out_file;
6667 rtx insn;
6669 if (flag_unwind_tables
6670 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
6672 rtx pat;
6674 if (GET_CODE (insn) == NOTE
6675 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
6677 block_num = NOTE_BASIC_BLOCK (insn)->index;
6679 /* Restore unwind state from immediately before the epilogue. */
6680 if (need_copy_state)
6682 fprintf (asm_out_file, "\t.body\n");
6683 fprintf (asm_out_file, "\t.copy_state 1\n");
6684 need_copy_state = 0;
6688 if (! RTX_FRAME_RELATED_P (insn))
6689 return;
6691 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
6692 if (pat)
6693 pat = XEXP (pat, 0);
6694 else
6695 pat = PATTERN (insn);
6697 switch (GET_CODE (pat))
6699 case SET:
6700 process_set (asm_out_file, pat);
6701 break;
6703 case PARALLEL:
6705 int par_index;
6706 int limit = XVECLEN (pat, 0);
6707 for (par_index = 0; par_index < limit; par_index++)
6709 rtx x = XVECEXP (pat, 0, par_index);
6710 if (GET_CODE (x) == SET)
6711 process_set (asm_out_file, x);
6713 break;
6716 default:
6717 abort ();
6723 void
6724 ia64_init_builtins ()
6726 tree psi_type_node = build_pointer_type (integer_type_node);
6727 tree pdi_type_node = build_pointer_type (long_integer_type_node);
6728 tree endlink = void_list_node;
6730 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
6731 tree si_ftype_psi_si_si
6732 = build_function_type (integer_type_node,
6733 tree_cons (NULL_TREE, psi_type_node,
6734 tree_cons (NULL_TREE, integer_type_node,
6735 tree_cons (NULL_TREE,
6736 integer_type_node,
6737 endlink))));
6739 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
6740 tree di_ftype_pdi_di_di
6741 = build_function_type (long_integer_type_node,
6742 tree_cons (NULL_TREE, pdi_type_node,
6743 tree_cons (NULL_TREE,
6744 long_integer_type_node,
6745 tree_cons (NULL_TREE,
6746 long_integer_type_node,
6747 endlink))));
6748 /* __sync_synchronize */
6749 tree void_ftype_void
6750 = build_function_type (void_type_node, endlink);
6752 /* __sync_lock_test_and_set_si */
6753 tree si_ftype_psi_si
6754 = build_function_type (integer_type_node,
6755 tree_cons (NULL_TREE, psi_type_node,
6756 tree_cons (NULL_TREE, integer_type_node, endlink)));
6758 /* __sync_lock_test_and_set_di */
6759 tree di_ftype_pdi_di
6760 = build_function_type (long_integer_type_node,
6761 tree_cons (NULL_TREE, pdi_type_node,
6762 tree_cons (NULL_TREE, long_integer_type_node,
6763 endlink)));
6765 /* __sync_lock_release_si */
6766 tree void_ftype_psi
6767 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
6768 endlink));
6770 /* __sync_lock_release_di */
6771 tree void_ftype_pdi
6772 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
6773 endlink));
6775 #define def_builtin(name, type, code) \
6776 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL_PTR)
6778 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
6779 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
6780 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
6781 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
6782 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
6783 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
6784 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
6785 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
6787 def_builtin ("__sync_synchronize", void_ftype_void,
6788 IA64_BUILTIN_SYNCHRONIZE);
6790 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
6791 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
6792 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
6793 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
6794 def_builtin ("__sync_lock_release_si", void_ftype_psi,
6795 IA64_BUILTIN_LOCK_RELEASE_SI);
6796 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
6797 IA64_BUILTIN_LOCK_RELEASE_DI);
6799 def_builtin ("__builtin_ia64_bsp",
6800 build_function_type (ptr_type_node, endlink),
6801 IA64_BUILTIN_BSP);
6803 def_builtin ("__builtin_ia64_flushrs",
6804 build_function_type (void_type_node, endlink),
6805 IA64_BUILTIN_FLUSHRS);
6807 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
6808 IA64_BUILTIN_FETCH_AND_ADD_SI);
6809 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
6810 IA64_BUILTIN_FETCH_AND_SUB_SI);
6811 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
6812 IA64_BUILTIN_FETCH_AND_OR_SI);
6813 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
6814 IA64_BUILTIN_FETCH_AND_AND_SI);
6815 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
6816 IA64_BUILTIN_FETCH_AND_XOR_SI);
6817 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
6818 IA64_BUILTIN_FETCH_AND_NAND_SI);
6820 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
6821 IA64_BUILTIN_ADD_AND_FETCH_SI);
6822 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
6823 IA64_BUILTIN_SUB_AND_FETCH_SI);
6824 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
6825 IA64_BUILTIN_OR_AND_FETCH_SI);
6826 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
6827 IA64_BUILTIN_AND_AND_FETCH_SI);
6828 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
6829 IA64_BUILTIN_XOR_AND_FETCH_SI);
6830 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
6831 IA64_BUILTIN_NAND_AND_FETCH_SI);
6833 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
6834 IA64_BUILTIN_FETCH_AND_ADD_DI);
6835 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
6836 IA64_BUILTIN_FETCH_AND_SUB_DI);
6837 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
6838 IA64_BUILTIN_FETCH_AND_OR_DI);
6839 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
6840 IA64_BUILTIN_FETCH_AND_AND_DI);
6841 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
6842 IA64_BUILTIN_FETCH_AND_XOR_DI);
6843 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
6844 IA64_BUILTIN_FETCH_AND_NAND_DI);
6846 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
6847 IA64_BUILTIN_ADD_AND_FETCH_DI);
6848 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
6849 IA64_BUILTIN_SUB_AND_FETCH_DI);
6850 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
6851 IA64_BUILTIN_OR_AND_FETCH_DI);
6852 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
6853 IA64_BUILTIN_AND_AND_FETCH_DI);
6854 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
6855 IA64_BUILTIN_XOR_AND_FETCH_DI);
6856 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
6857 IA64_BUILTIN_NAND_AND_FETCH_DI);
6859 #undef def_builtin
6862 /* Expand fetch_and_op intrinsics. The basic code sequence is:
6865 tmp = [ptr];
6866 do {
6867 ret = tmp;
6868 ar.ccv = tmp;
6869 tmp <op>= value;
6870 cmpxchgsz.acq tmp = [ptr], tmp
6871 } while (tmp != ret)
6874 static rtx
6875 ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
6876 optab binoptab;
6877 enum machine_mode mode;
6878 tree arglist;
6879 rtx target;
6881 rtx ret, label, tmp, ccv, insn, mem, value;
6882 tree arg0, arg1;
6884 arg0 = TREE_VALUE (arglist);
6885 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
6886 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
6887 value = expand_expr (arg1, NULL_RTX, mode, 0);
6889 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
6890 MEM_VOLATILE_P (mem) = 1;
6892 if (target && register_operand (target, mode))
6893 ret = target;
6894 else
6895 ret = gen_reg_rtx (mode);
6897 emit_insn (gen_mf ());
6899 /* Special case for fetchadd instructions. */
6900 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
6902 if (mode == SImode)
6903 insn = gen_fetchadd_acq_si (ret, mem, value);
6904 else
6905 insn = gen_fetchadd_acq_di (ret, mem, value);
6906 emit_insn (insn);
6907 return ret;
6910 tmp = gen_reg_rtx (mode);
6911 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
6912 emit_move_insn (tmp, mem);
6914 label = gen_label_rtx ();
6915 emit_label (label);
6916 emit_move_insn (ret, tmp);
6917 emit_move_insn (ccv, tmp);
6919 /* Perform the specific operation. Special case NAND by noticing
6920 one_cmpl_optab instead. */
6921 if (binoptab == one_cmpl_optab)
6923 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
6924 binoptab = and_optab;
6926 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
6928 if (mode == SImode)
6929 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
6930 else
6931 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
6932 emit_insn (insn);
6934 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, 0, label);
6936 return ret;
6939 /* Expand op_and_fetch intrinsics. The basic code sequence is:
6942 tmp = [ptr];
6943 do {
6944 old = tmp;
6945 ar.ccv = tmp;
6946 ret = tmp + value;
6947 cmpxchgsz.acq tmp = [ptr], ret
6948 } while (tmp != old)
6951 static rtx
6952 ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
6953 optab binoptab;
6954 enum machine_mode mode;
6955 tree arglist;
6956 rtx target;
6958 rtx old, label, tmp, ret, ccv, insn, mem, value;
6959 tree arg0, arg1;
6961 arg0 = TREE_VALUE (arglist);
6962 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
6963 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
6964 value = expand_expr (arg1, NULL_RTX, mode, 0);
6966 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
6967 MEM_VOLATILE_P (mem) = 1;
6969 if (target && ! register_operand (target, mode))
6970 target = NULL_RTX;
6972 emit_insn (gen_mf ());
6973 tmp = gen_reg_rtx (mode);
6974 old = gen_reg_rtx (mode);
6975 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
6977 emit_move_insn (tmp, mem);
6979 label = gen_label_rtx ();
6980 emit_label (label);
6981 emit_move_insn (old, tmp);
6982 emit_move_insn (ccv, tmp);
6984 /* Perform the specific operation. Special case NAND by noticing
6985 one_cmpl_optab instead. */
6986 if (binoptab == one_cmpl_optab)
6988 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
6989 binoptab = and_optab;
6991 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
6993 if (mode == SImode)
6994 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
6995 else
6996 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
6997 emit_insn (insn);
6999 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, 0, label);
7001 return ret;
7004 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7006 ar.ccv = oldval
7008 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7009 return ret
7011 For bool_ it's the same except return ret == oldval.
7014 static rtx
7015 ia64_expand_compare_and_swap (mode, boolp, arglist, target)
7016 enum machine_mode mode;
7017 int boolp;
7018 tree arglist;
7019 rtx target;
7021 tree arg0, arg1, arg2;
7022 rtx mem, old, new, ccv, tmp, insn;
7024 arg0 = TREE_VALUE (arglist);
7025 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7026 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7027 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7028 old = expand_expr (arg1, NULL_RTX, mode, 0);
7029 new = expand_expr (arg2, NULL_RTX, mode, 0);
7031 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7032 MEM_VOLATILE_P (mem) = 1;
7034 if (! register_operand (old, mode))
7035 old = copy_to_mode_reg (mode, old);
7036 if (! register_operand (new, mode))
7037 new = copy_to_mode_reg (mode, new);
7039 if (! boolp && target && register_operand (target, mode))
7040 tmp = target;
7041 else
7042 tmp = gen_reg_rtx (mode);
7044 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7045 emit_move_insn (ccv, old);
7046 emit_insn (gen_mf ());
7047 if (mode == SImode)
7048 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7049 else
7050 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7051 emit_insn (insn);
7053 if (boolp)
7055 if (! target)
7056 target = gen_reg_rtx (mode);
7057 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
7059 else
7060 return tmp;
7063 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7065 static rtx
7066 ia64_expand_lock_test_and_set (mode, arglist, target)
7067 enum machine_mode mode;
7068 tree arglist;
7069 rtx target;
7071 tree arg0, arg1;
7072 rtx mem, new, ret, insn;
7074 arg0 = TREE_VALUE (arglist);
7075 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7076 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7077 new = expand_expr (arg1, NULL_RTX, mode, 0);
7079 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7080 MEM_VOLATILE_P (mem) = 1;
7081 if (! register_operand (new, mode))
7082 new = copy_to_mode_reg (mode, new);
7084 if (target && register_operand (target, mode))
7085 ret = target;
7086 else
7087 ret = gen_reg_rtx (mode);
7089 if (mode == SImode)
7090 insn = gen_xchgsi (ret, mem, new);
7091 else
7092 insn = gen_xchgdi (ret, mem, new);
7093 emit_insn (insn);
7095 return ret;
7098 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7100 static rtx
7101 ia64_expand_lock_release (mode, arglist, target)
7102 enum machine_mode mode;
7103 tree arglist;
7104 rtx target ATTRIBUTE_UNUSED;
7106 tree arg0;
7107 rtx mem;
7109 arg0 = TREE_VALUE (arglist);
7110 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7112 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7113 MEM_VOLATILE_P (mem) = 1;
7115 emit_move_insn (mem, const0_rtx);
7117 return const0_rtx;
7121 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
7122 tree exp;
7123 rtx target;
7124 rtx subtarget ATTRIBUTE_UNUSED;
7125 enum machine_mode mode ATTRIBUTE_UNUSED;
7126 int ignore ATTRIBUTE_UNUSED;
7128 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7129 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
7130 tree arglist = TREE_OPERAND (exp, 1);
7132 switch (fcode)
7134 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7135 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7136 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7137 case IA64_BUILTIN_LOCK_RELEASE_SI:
7138 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7139 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7140 case IA64_BUILTIN_FETCH_AND_OR_SI:
7141 case IA64_BUILTIN_FETCH_AND_AND_SI:
7142 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7143 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7144 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7145 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7146 case IA64_BUILTIN_OR_AND_FETCH_SI:
7147 case IA64_BUILTIN_AND_AND_FETCH_SI:
7148 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7149 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7150 mode = SImode;
7151 break;
7153 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7154 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7155 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7156 case IA64_BUILTIN_LOCK_RELEASE_DI:
7157 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7158 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7159 case IA64_BUILTIN_FETCH_AND_OR_DI:
7160 case IA64_BUILTIN_FETCH_AND_AND_DI:
7161 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7162 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7163 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7164 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7165 case IA64_BUILTIN_OR_AND_FETCH_DI:
7166 case IA64_BUILTIN_AND_AND_FETCH_DI:
7167 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7168 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7169 mode = DImode;
7170 break;
7172 default:
7173 break;
7176 switch (fcode)
7178 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7179 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7180 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
7182 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7183 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7184 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
7186 case IA64_BUILTIN_SYNCHRONIZE:
7187 emit_insn (gen_mf ());
7188 return const0_rtx;
7190 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7191 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7192 return ia64_expand_lock_test_and_set (mode, arglist, target);
7194 case IA64_BUILTIN_LOCK_RELEASE_SI:
7195 case IA64_BUILTIN_LOCK_RELEASE_DI:
7196 return ia64_expand_lock_release (mode, arglist, target);
7198 case IA64_BUILTIN_BSP:
7199 if (! target || ! register_operand (target, DImode))
7200 target = gen_reg_rtx (DImode);
7201 emit_insn (gen_bsp_value (target));
7202 return target;
7204 case IA64_BUILTIN_FLUSHRS:
7205 emit_insn (gen_flushrs ());
7206 return const0_rtx;
7208 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7209 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7210 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
7212 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7213 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7214 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
7216 case IA64_BUILTIN_FETCH_AND_OR_SI:
7217 case IA64_BUILTIN_FETCH_AND_OR_DI:
7218 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
7220 case IA64_BUILTIN_FETCH_AND_AND_SI:
7221 case IA64_BUILTIN_FETCH_AND_AND_DI:
7222 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
7224 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7225 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7226 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
7228 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7229 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7230 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
7232 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7233 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7234 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
7236 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7237 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7238 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
7240 case IA64_BUILTIN_OR_AND_FETCH_SI:
7241 case IA64_BUILTIN_OR_AND_FETCH_DI:
7242 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
7244 case IA64_BUILTIN_AND_AND_FETCH_SI:
7245 case IA64_BUILTIN_AND_AND_FETCH_DI:
7246 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
7248 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7249 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7250 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
7252 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7253 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7254 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
7256 default:
7257 break;
7260 return NULL_RTX;