* config/arm/arm.c: Fix comment a typo.
[official-gcc.git] / gcc / config / ia64 / ia64.c
blob1146f137c8e23bbe6aa938d8354e0f7bdc73a8e2
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-attr.h"
35 #include "flags.h"
36 #include "recog.h"
37 #include "expr.h"
38 #include "optabs.h"
39 #include "obstack.h"
40 #include "except.h"
41 #include "function.h"
42 #include "ggc.h"
43 #include "basic-block.h"
44 #include "toplev.h"
45 #include "sched-int.h"
46 #include "timevar.h"
47 #include "target.h"
48 #include "target-def.h"
50 /* This is used for communication between ASM_OUTPUT_LABEL and
51 ASM_OUTPUT_LABELREF. */
52 int ia64_asm_output_label = 0;
54 /* Define the information needed to generate branch and scc insns. This is
55 stored from the compare operation. */
56 struct rtx_def * ia64_compare_op0;
57 struct rtx_def * ia64_compare_op1;
59 /* Register names for ia64_expand_prologue. */
60 static const char * const ia64_reg_numbers[96] =
61 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
62 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
63 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
64 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
65 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
66 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
67 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
68 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
69 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
70 "r104","r105","r106","r107","r108","r109","r110","r111",
71 "r112","r113","r114","r115","r116","r117","r118","r119",
72 "r120","r121","r122","r123","r124","r125","r126","r127"};
74 /* ??? These strings could be shared with REGISTER_NAMES. */
75 static const char * const ia64_input_reg_names[8] =
76 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
78 /* ??? These strings could be shared with REGISTER_NAMES. */
79 static const char * const ia64_local_reg_names[80] =
80 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
81 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
82 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
83 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
84 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
85 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
86 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
87 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
88 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
89 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
91 /* ??? These strings could be shared with REGISTER_NAMES. */
92 static const char * const ia64_output_reg_names[8] =
93 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
95 /* String used with the -mfixed-range= option. */
96 const char *ia64_fixed_range_string;
98 /* Determines whether we run our final scheduling pass or not. We always
99 avoid the normal second scheduling pass. */
100 static int ia64_flag_schedule_insns2;
102 /* Variables which are this size or smaller are put in the sdata/sbss
103 sections. */
105 unsigned int ia64_section_threshold;
107 static int find_gr_spill PARAMS ((int));
108 static int next_scratch_gr_reg PARAMS ((void));
109 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
110 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
111 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
112 static void finish_spill_pointers PARAMS ((void));
113 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
114 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
115 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
116 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
117 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
118 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
120 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
121 static void fix_range PARAMS ((const char *));
122 static void ia64_add_gc_roots PARAMS ((void));
123 static void ia64_init_machine_status PARAMS ((struct function *));
124 static void ia64_mark_machine_status PARAMS ((struct function *));
125 static void ia64_free_machine_status PARAMS ((struct function *));
126 static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
127 static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
128 static void emit_predicate_relation_info PARAMS ((void));
129 static void process_epilogue PARAMS ((void));
130 static int process_set PARAMS ((FILE *, rtx));
132 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
133 tree, rtx));
134 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
135 tree, rtx));
136 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
137 tree, rtx));
138 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
139 tree, rtx));
140 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
141 const struct attribute_spec ia64_attribute_table[];
142 static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
143 static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
144 static void ia64_output_function_end_prologue PARAMS ((FILE *));
146 static int ia64_issue_rate PARAMS ((void));
147 static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
148 static void ia64_sched_init PARAMS ((FILE *, int, int));
149 static void ia64_sched_finish PARAMS ((FILE *, int));
150 static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *,
151 int *, int, int));
152 static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
153 static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
154 static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
155 static rtx ia64_cycle_display PARAMS ((int, rtx));
158 /* Initialize the GCC target structure. */
159 #undef TARGET_ATTRIBUTE_TABLE
160 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
162 #undef TARGET_INIT_BUILTINS
163 #define TARGET_INIT_BUILTINS ia64_init_builtins
165 #undef TARGET_EXPAND_BUILTIN
166 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
168 #undef TARGET_ASM_FUNCTION_PROLOGUE
169 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
170 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
171 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
172 #undef TARGET_ASM_FUNCTION_EPILOGUE
173 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
175 #undef TARGET_SCHED_ADJUST_COST
176 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
177 #undef TARGET_SCHED_ISSUE_RATE
178 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
179 #undef TARGET_SCHED_VARIABLE_ISSUE
180 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
181 #undef TARGET_SCHED_INIT
182 #define TARGET_SCHED_INIT ia64_sched_init
183 #undef TARGET_SCHED_FINISH
184 #define TARGET_SCHED_FINISH ia64_sched_finish
185 #undef TARGET_SCHED_REORDER
186 #define TARGET_SCHED_REORDER ia64_sched_reorder
187 #undef TARGET_SCHED_REORDER2
188 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
189 #undef TARGET_SCHED_CYCLE_DISPLAY
190 #define TARGET_SCHED_CYCLE_DISPLAY ia64_cycle_display
192 struct gcc_target targetm = TARGET_INITIALIZER;
194 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
197 call_operand (op, mode)
198 rtx op;
199 enum machine_mode mode;
201 if (mode != GET_MODE (op))
202 return 0;
204 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
205 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
208 /* Return 1 if OP refers to a symbol in the sdata section. */
211 sdata_symbolic_operand (op, mode)
212 rtx op;
213 enum machine_mode mode ATTRIBUTE_UNUSED;
215 switch (GET_CODE (op))
217 case CONST:
218 if (GET_CODE (XEXP (op, 0)) != PLUS
219 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
220 break;
221 op = XEXP (XEXP (op, 0), 0);
222 /* FALLTHRU */
224 case SYMBOL_REF:
225 if (CONSTANT_POOL_ADDRESS_P (op))
226 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
227 else
228 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
230 default:
231 break;
234 return 0;
237 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
240 got_symbolic_operand (op, mode)
241 rtx op;
242 enum machine_mode mode ATTRIBUTE_UNUSED;
244 switch (GET_CODE (op))
246 case CONST:
247 op = XEXP (op, 0);
248 if (GET_CODE (op) != PLUS)
249 return 0;
250 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
251 return 0;
252 op = XEXP (op, 1);
253 if (GET_CODE (op) != CONST_INT)
254 return 0;
256 return 1;
258 /* Ok if we're not using GOT entries at all. */
259 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
260 return 1;
262 /* "Ok" while emitting rtl, since otherwise we won't be provided
263 with the entire offset during emission, which makes it very
264 hard to split the offset into high and low parts. */
265 if (rtx_equal_function_value_matters)
266 return 1;
268 /* Force the low 14 bits of the constant to zero so that we do not
269 use up so many GOT entries. */
270 return (INTVAL (op) & 0x3fff) == 0;
272 case SYMBOL_REF:
273 case LABEL_REF:
274 return 1;
276 default:
277 break;
279 return 0;
282 /* Return 1 if OP refers to a symbol. */
285 symbolic_operand (op, mode)
286 rtx op;
287 enum machine_mode mode ATTRIBUTE_UNUSED;
289 switch (GET_CODE (op))
291 case CONST:
292 case SYMBOL_REF:
293 case LABEL_REF:
294 return 1;
296 default:
297 break;
299 return 0;
302 /* Return 1 if OP refers to a function. */
305 function_operand (op, mode)
306 rtx op;
307 enum machine_mode mode ATTRIBUTE_UNUSED;
309 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
310 return 1;
311 else
312 return 0;
315 /* Return 1 if OP is setjmp or a similar function. */
317 /* ??? This is an unsatisfying solution. Should rethink. */
320 setjmp_operand (op, mode)
321 rtx op;
322 enum machine_mode mode ATTRIBUTE_UNUSED;
324 const char *name;
325 int retval = 0;
327 if (GET_CODE (op) != SYMBOL_REF)
328 return 0;
330 name = XSTR (op, 0);
332 /* The following code is borrowed from special_function_p in calls.c. */
334 /* Disregard prefix _, __ or __x. */
335 if (name[0] == '_')
337 if (name[1] == '_' && name[2] == 'x')
338 name += 3;
339 else if (name[1] == '_')
340 name += 2;
341 else
342 name += 1;
345 if (name[0] == 's')
347 retval
348 = ((name[1] == 'e'
349 && (! strcmp (name, "setjmp")
350 || ! strcmp (name, "setjmp_syscall")))
351 || (name[1] == 'i'
352 && ! strcmp (name, "sigsetjmp"))
353 || (name[1] == 'a'
354 && ! strcmp (name, "savectx")));
356 else if ((name[0] == 'q' && name[1] == 's'
357 && ! strcmp (name, "qsetjmp"))
358 || (name[0] == 'v' && name[1] == 'f'
359 && ! strcmp (name, "vfork")))
360 retval = 1;
362 return retval;
365 /* Return 1 if OP is a general operand, but when pic exclude symbolic
366 operands. */
368 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
369 from PREDICATE_CODES. */
372 move_operand (op, mode)
373 rtx op;
374 enum machine_mode mode;
376 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
377 return 0;
379 return general_operand (op, mode);
382 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
385 gr_register_operand (op, mode)
386 rtx op;
387 enum machine_mode mode;
389 if (! register_operand (op, mode))
390 return 0;
391 if (GET_CODE (op) == SUBREG)
392 op = SUBREG_REG (op);
393 if (GET_CODE (op) == REG)
395 unsigned int regno = REGNO (op);
396 if (regno < FIRST_PSEUDO_REGISTER)
397 return GENERAL_REGNO_P (regno);
399 return 1;
402 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
405 fr_register_operand (op, mode)
406 rtx op;
407 enum machine_mode mode;
409 if (! register_operand (op, mode))
410 return 0;
411 if (GET_CODE (op) == SUBREG)
412 op = SUBREG_REG (op);
413 if (GET_CODE (op) == REG)
415 unsigned int regno = REGNO (op);
416 if (regno < FIRST_PSEUDO_REGISTER)
417 return FR_REGNO_P (regno);
419 return 1;
422 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
425 grfr_register_operand (op, mode)
426 rtx op;
427 enum machine_mode mode;
429 if (! register_operand (op, mode))
430 return 0;
431 if (GET_CODE (op) == SUBREG)
432 op = SUBREG_REG (op);
433 if (GET_CODE (op) == REG)
435 unsigned int regno = REGNO (op);
436 if (regno < FIRST_PSEUDO_REGISTER)
437 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
439 return 1;
442 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
445 gr_nonimmediate_operand (op, mode)
446 rtx op;
447 enum machine_mode mode;
449 if (! nonimmediate_operand (op, mode))
450 return 0;
451 if (GET_CODE (op) == SUBREG)
452 op = SUBREG_REG (op);
453 if (GET_CODE (op) == REG)
455 unsigned int regno = REGNO (op);
456 if (regno < FIRST_PSEUDO_REGISTER)
457 return GENERAL_REGNO_P (regno);
459 return 1;
462 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
465 fr_nonimmediate_operand (op, mode)
466 rtx op;
467 enum machine_mode mode;
469 if (! nonimmediate_operand (op, mode))
470 return 0;
471 if (GET_CODE (op) == SUBREG)
472 op = SUBREG_REG (op);
473 if (GET_CODE (op) == REG)
475 unsigned int regno = REGNO (op);
476 if (regno < FIRST_PSEUDO_REGISTER)
477 return FR_REGNO_P (regno);
479 return 1;
482 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
485 grfr_nonimmediate_operand (op, mode)
486 rtx op;
487 enum machine_mode mode;
489 if (! nonimmediate_operand (op, mode))
490 return 0;
491 if (GET_CODE (op) == SUBREG)
492 op = SUBREG_REG (op);
493 if (GET_CODE (op) == REG)
495 unsigned int regno = REGNO (op);
496 if (regno < FIRST_PSEUDO_REGISTER)
497 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
499 return 1;
502 /* Return 1 if OP is a GR register operand, or zero. */
505 gr_reg_or_0_operand (op, mode)
506 rtx op;
507 enum machine_mode mode;
509 return (op == const0_rtx || gr_register_operand (op, mode));
512 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
515 gr_reg_or_5bit_operand (op, mode)
516 rtx op;
517 enum machine_mode mode;
519 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
520 || GET_CODE (op) == CONSTANT_P_RTX
521 || gr_register_operand (op, mode));
524 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
527 gr_reg_or_6bit_operand (op, mode)
528 rtx op;
529 enum machine_mode mode;
531 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
532 || GET_CODE (op) == CONSTANT_P_RTX
533 || gr_register_operand (op, mode));
536 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
539 gr_reg_or_8bit_operand (op, mode)
540 rtx op;
541 enum machine_mode mode;
543 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
544 || GET_CODE (op) == CONSTANT_P_RTX
545 || gr_register_operand (op, mode));
548 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
551 grfr_reg_or_8bit_operand (op, mode)
552 rtx op;
553 enum machine_mode mode;
555 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
556 || GET_CODE (op) == CONSTANT_P_RTX
557 || grfr_register_operand (op, mode));
560 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
561 operand. */
564 gr_reg_or_8bit_adjusted_operand (op, mode)
565 rtx op;
566 enum machine_mode mode;
568 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
569 || GET_CODE (op) == CONSTANT_P_RTX
570 || gr_register_operand (op, mode));
573 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
574 immediate and an 8 bit adjusted immediate operand. This is necessary
575 because when we emit a compare, we don't know what the condition will be,
576 so we need the union of the immediates accepted by GT and LT. */
579 gr_reg_or_8bit_and_adjusted_operand (op, mode)
580 rtx op;
581 enum machine_mode mode;
583 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
584 && CONST_OK_FOR_L (INTVAL (op)))
585 || GET_CODE (op) == CONSTANT_P_RTX
586 || gr_register_operand (op, mode));
589 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
592 gr_reg_or_14bit_operand (op, mode)
593 rtx op;
594 enum machine_mode mode;
596 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
597 || GET_CODE (op) == CONSTANT_P_RTX
598 || gr_register_operand (op, mode));
601 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
604 gr_reg_or_22bit_operand (op, mode)
605 rtx op;
606 enum machine_mode mode;
608 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
609 || GET_CODE (op) == CONSTANT_P_RTX
610 || gr_register_operand (op, mode));
613 /* Return 1 if OP is a 6 bit immediate operand. */
616 shift_count_operand (op, mode)
617 rtx op;
618 enum machine_mode mode ATTRIBUTE_UNUSED;
620 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
621 || GET_CODE (op) == CONSTANT_P_RTX);
624 /* Return 1 if OP is a 5 bit immediate operand. */
627 shift_32bit_count_operand (op, mode)
628 rtx op;
629 enum machine_mode mode ATTRIBUTE_UNUSED;
631 return ((GET_CODE (op) == CONST_INT
632 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
633 || GET_CODE (op) == CONSTANT_P_RTX);
636 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
639 shladd_operand (op, mode)
640 rtx op;
641 enum machine_mode mode ATTRIBUTE_UNUSED;
643 return (GET_CODE (op) == CONST_INT
644 && (INTVAL (op) == 2 || INTVAL (op) == 4
645 || INTVAL (op) == 8 || INTVAL (op) == 16));
648 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
651 fetchadd_operand (op, mode)
652 rtx op;
653 enum machine_mode mode ATTRIBUTE_UNUSED;
655 return (GET_CODE (op) == CONST_INT
656 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
657 INTVAL (op) == -4 || INTVAL (op) == -1 ||
658 INTVAL (op) == 1 || INTVAL (op) == 4 ||
659 INTVAL (op) == 8 || INTVAL (op) == 16));
662 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
665 fr_reg_or_fp01_operand (op, mode)
666 rtx op;
667 enum machine_mode mode;
669 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
670 || fr_register_operand (op, mode));
673 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
674 POST_MODIFY with a REG as displacement. */
677 destination_operand (op, mode)
678 rtx op;
679 enum machine_mode mode;
681 if (! nonimmediate_operand (op, mode))
682 return 0;
683 if (GET_CODE (op) == MEM
684 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
685 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
686 return 0;
687 return 1;
690 /* Like memory_operand, but don't allow post-increments. */
693 not_postinc_memory_operand (op, mode)
694 rtx op;
695 enum machine_mode mode;
697 return (memory_operand (op, mode)
698 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
701 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
702 signed immediate operand. */
705 normal_comparison_operator (op, mode)
706 register rtx op;
707 enum machine_mode mode;
709 enum rtx_code code = GET_CODE (op);
710 return ((mode == VOIDmode || GET_MODE (op) == mode)
711 && (code == EQ || code == NE
712 || code == GT || code == LE || code == GTU || code == LEU));
715 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
716 signed immediate operand. */
719 adjusted_comparison_operator (op, mode)
720 register rtx op;
721 enum machine_mode mode;
723 enum rtx_code code = GET_CODE (op);
724 return ((mode == VOIDmode || GET_MODE (op) == mode)
725 && (code == LT || code == GE || code == LTU || code == GEU));
728 /* Return 1 if this is a signed inequality operator. */
731 signed_inequality_operator (op, mode)
732 register rtx op;
733 enum machine_mode mode;
735 enum rtx_code code = GET_CODE (op);
736 return ((mode == VOIDmode || GET_MODE (op) == mode)
737 && (code == GE || code == GT
738 || code == LE || code == LT));
741 /* Return 1 if this operator is valid for predication. */
744 predicate_operator (op, mode)
745 register rtx op;
746 enum machine_mode mode;
748 enum rtx_code code = GET_CODE (op);
749 return ((GET_MODE (op) == mode || mode == VOIDmode)
750 && (code == EQ || code == NE));
753 /* Return 1 if this operator can be used in a conditional operation. */
756 condop_operator (op, mode)
757 register rtx op;
758 enum machine_mode mode;
760 enum rtx_code code = GET_CODE (op);
761 return ((GET_MODE (op) == mode || mode == VOIDmode)
762 && (code == PLUS || code == MINUS || code == AND
763 || code == IOR || code == XOR));
766 /* Return 1 if this is the ar.lc register. */
769 ar_lc_reg_operand (op, mode)
770 register rtx op;
771 enum machine_mode mode;
773 return (GET_MODE (op) == DImode
774 && (mode == DImode || mode == VOIDmode)
775 && GET_CODE (op) == REG
776 && REGNO (op) == AR_LC_REGNUM);
779 /* Return 1 if this is the ar.ccv register. */
782 ar_ccv_reg_operand (op, mode)
783 register rtx op;
784 enum machine_mode mode;
786 return ((GET_MODE (op) == mode || mode == VOIDmode)
787 && GET_CODE (op) == REG
788 && REGNO (op) == AR_CCV_REGNUM);
791 /* Return 1 if this is the ar.pfs register. */
794 ar_pfs_reg_operand (op, mode)
795 register rtx op;
796 enum machine_mode mode;
798 return ((GET_MODE (op) == mode || mode == VOIDmode)
799 && GET_CODE (op) == REG
800 && REGNO (op) == AR_PFS_REGNUM);
803 /* Like general_operand, but don't allow (mem (addressof)). */
806 general_tfmode_operand (op, mode)
807 rtx op;
808 enum machine_mode mode;
810 if (! general_operand (op, mode))
811 return 0;
812 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
813 return 0;
814 return 1;
817 /* Similarly. */
820 destination_tfmode_operand (op, mode)
821 rtx op;
822 enum machine_mode mode;
824 if (! destination_operand (op, mode))
825 return 0;
826 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
827 return 0;
828 return 1;
831 /* Similarly. */
834 tfreg_or_fp01_operand (op, mode)
835 rtx op;
836 enum machine_mode mode;
838 if (GET_CODE (op) == SUBREG)
839 return 0;
840 return fr_reg_or_fp01_operand (op, mode);
843 /* Return 1 if the operands of a move are ok. */
846 ia64_move_ok (dst, src)
847 rtx dst, src;
849 /* If we're under init_recog_no_volatile, we'll not be able to use
850 memory_operand. So check the code directly and don't worry about
851 the validity of the underlying address, which should have been
852 checked elsewhere anyway. */
853 if (GET_CODE (dst) != MEM)
854 return 1;
855 if (GET_CODE (src) == MEM)
856 return 0;
857 if (register_operand (src, VOIDmode))
858 return 1;
860 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
861 if (INTEGRAL_MODE_P (GET_MODE (dst)))
862 return src == const0_rtx;
863 else
864 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
867 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
868 Return the length of the field, or <= 0 on failure. */
871 ia64_depz_field_mask (rop, rshift)
872 rtx rop, rshift;
874 unsigned HOST_WIDE_INT op = INTVAL (rop);
875 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
877 /* Get rid of the zero bits we're shifting in. */
878 op >>= shift;
880 /* We must now have a solid block of 1's at bit 0. */
881 return exact_log2 (op + 1);
884 /* Expand a symbolic constant load. */
885 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
887 void
888 ia64_expand_load_address (dest, src, scratch)
889 rtx dest, src, scratch;
891 rtx temp;
893 /* The destination could be a MEM during initial rtl generation,
894 which isn't a valid destination for the PIC load address patterns. */
895 if (! register_operand (dest, DImode))
896 temp = gen_reg_rtx (DImode);
897 else
898 temp = dest;
900 if (TARGET_AUTO_PIC)
901 emit_insn (gen_load_gprel64 (temp, src));
902 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
903 emit_insn (gen_load_fptr (temp, src));
904 else if (sdata_symbolic_operand (src, DImode))
905 emit_insn (gen_load_gprel (temp, src));
906 else if (GET_CODE (src) == CONST
907 && GET_CODE (XEXP (src, 0)) == PLUS
908 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
909 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
911 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
912 rtx sym = XEXP (XEXP (src, 0), 0);
913 HOST_WIDE_INT ofs, hi, lo;
915 /* Split the offset into a sign extended 14-bit low part
916 and a complementary high part. */
917 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
918 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
919 hi = ofs - lo;
921 if (! scratch)
922 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
924 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
925 scratch));
926 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
928 else
930 rtx insn;
931 if (! scratch)
932 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
934 insn = emit_insn (gen_load_symptr (temp, src, scratch));
935 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
938 if (temp != dest)
939 emit_move_insn (dest, temp);
943 ia64_gp_save_reg (setjmp_p)
944 int setjmp_p;
946 rtx save = cfun->machine->ia64_gp_save;
948 if (save != NULL)
950 /* We can't save GP in a pseudo if we are calling setjmp, because
951 pseudos won't be restored by longjmp. For now, we save it in r4. */
952 /* ??? It would be more efficient to save this directly into a stack
953 slot. Unfortunately, the stack slot address gets cse'd across
954 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
955 place. */
957 /* ??? Get the barf bag, Virginia. We've got to replace this thing
958 in place, since this rtx is used in exception handling receivers.
959 Moreover, we must get this rtx out of regno_reg_rtx or reload
960 will do the wrong thing. */
961 unsigned int old_regno = REGNO (save);
962 if (setjmp_p && old_regno != GR_REG (4))
964 REGNO (save) = GR_REG (4);
965 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
968 else
970 if (setjmp_p)
971 save = gen_rtx_REG (DImode, GR_REG (4));
972 else if (! optimize)
973 save = gen_rtx_REG (DImode, LOC_REG (0));
974 else
975 save = gen_reg_rtx (DImode);
976 cfun->machine->ia64_gp_save = save;
979 return save;
982 /* Split a post-reload TImode reference into two DImode components. */
985 ia64_split_timode (out, in, scratch)
986 rtx out[2];
987 rtx in, scratch;
989 switch (GET_CODE (in))
991 case REG:
992 out[0] = gen_rtx_REG (DImode, REGNO (in));
993 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
994 return NULL_RTX;
996 case MEM:
998 rtx base = XEXP (in, 0);
1000 switch (GET_CODE (base))
1002 case REG:
1003 out[0] = adjust_address (in, DImode, 0);
1004 break;
1005 case POST_MODIFY:
1006 base = XEXP (base, 0);
1007 out[0] = adjust_address (in, DImode, 0);
1008 break;
1010 /* Since we're changing the mode, we need to change to POST_MODIFY
1011 as well to preserve the size of the increment. Either that or
1012 do the update in two steps, but we've already got this scratch
1013 register handy so let's use it. */
1014 case POST_INC:
1015 base = XEXP (base, 0);
1016 out[0]
1017 = change_address (in, DImode,
1018 gen_rtx_POST_MODIFY
1019 (Pmode, base, plus_constant (base, 16)));
1020 break;
1021 case POST_DEC:
1022 base = XEXP (base, 0);
1023 out[0]
1024 = change_address (in, DImode,
1025 gen_rtx_POST_MODIFY
1026 (Pmode, base, plus_constant (base, -16)));
1027 break;
1028 default:
1029 abort ();
1032 if (scratch == NULL_RTX)
1033 abort ();
1034 out[1] = change_address (in, DImode, scratch);
1035 return gen_adddi3 (scratch, base, GEN_INT (8));
1038 case CONST_INT:
1039 case CONST_DOUBLE:
1040 split_double (in, &out[0], &out[1]);
1041 return NULL_RTX;
1043 default:
1044 abort ();
1048 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1049 through memory plus an extra GR scratch register. Except that you can
1050 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1051 SECONDARY_RELOAD_CLASS, but not both.
1053 We got into problems in the first place by allowing a construct like
1054 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1055 This solution attempts to prevent this situation from ocurring. When
1056 we see something like the above, we spill the inner register to memory. */
1059 spill_tfmode_operand (in, force)
1060 rtx in;
1061 int force;
1063 if (GET_CODE (in) == SUBREG
1064 && GET_MODE (SUBREG_REG (in)) == TImode
1065 && GET_CODE (SUBREG_REG (in)) == REG)
1067 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
1068 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1070 else if (force && GET_CODE (in) == REG)
1072 rtx mem = gen_mem_addressof (in, NULL_TREE);
1073 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1075 else if (GET_CODE (in) == MEM
1076 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1077 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
1078 else
1079 return in;
1082 /* Emit comparison instruction if necessary, returning the expression
1083 that holds the compare result in the proper mode. */
1086 ia64_expand_compare (code, mode)
1087 enum rtx_code code;
1088 enum machine_mode mode;
1090 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1091 rtx cmp;
1093 /* If we have a BImode input, then we already have a compare result, and
1094 do not need to emit another comparison. */
1095 if (GET_MODE (op0) == BImode)
1097 if ((code == NE || code == EQ) && op1 == const0_rtx)
1098 cmp = op0;
1099 else
1100 abort ();
1102 else
1104 cmp = gen_reg_rtx (BImode);
1105 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1106 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1107 code = NE;
1110 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1113 /* Emit the appropriate sequence for a call. */
1115 void
1116 ia64_expand_call (retval, addr, nextarg, sibcall_p)
1117 rtx retval;
1118 rtx addr;
1119 rtx nextarg;
1120 int sibcall_p;
1122 rtx insn, b0, pfs, gp_save, narg_rtx;
1123 int narg;
1125 addr = XEXP (addr, 0);
1126 b0 = gen_rtx_REG (DImode, R_BR (0));
1127 pfs = gen_rtx_REG (DImode, AR_PFS_REGNUM);
1129 if (! nextarg)
1130 narg = 0;
1131 else if (IN_REGNO_P (REGNO (nextarg)))
1132 narg = REGNO (nextarg) - IN_REG (0);
1133 else
1134 narg = REGNO (nextarg) - OUT_REG (0);
1135 narg_rtx = GEN_INT (narg);
1137 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1139 if (sibcall_p)
1140 insn = gen_sibcall_nopic (addr, narg_rtx, b0, pfs);
1141 else if (! retval)
1142 insn = gen_call_nopic (addr, narg_rtx, b0);
1143 else
1144 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1145 emit_call_insn (insn);
1146 return;
1149 if (sibcall_p)
1150 gp_save = NULL_RTX;
1151 else
1152 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1154 /* If this is an indirect call, then we have the address of a descriptor. */
1155 if (! symbolic_operand (addr, VOIDmode))
1157 rtx dest;
1159 if (! sibcall_p)
1160 emit_move_insn (gp_save, pic_offset_table_rtx);
1162 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1163 emit_move_insn (pic_offset_table_rtx,
1164 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
1166 if (sibcall_p)
1167 insn = gen_sibcall_pic (dest, narg_rtx, b0, pfs);
1168 else if (! retval)
1169 insn = gen_call_pic (dest, narg_rtx, b0);
1170 else
1171 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1172 emit_call_insn (insn);
1174 if (! sibcall_p)
1175 emit_move_insn (pic_offset_table_rtx, gp_save);
1177 else if (TARGET_CONST_GP)
1179 if (sibcall_p)
1180 insn = gen_sibcall_nopic (addr, narg_rtx, b0, pfs);
1181 else if (! retval)
1182 insn = gen_call_nopic (addr, narg_rtx, b0);
1183 else
1184 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1185 emit_call_insn (insn);
1187 else
1189 if (sibcall_p)
1190 emit_call_insn (gen_sibcall_pic (addr, narg_rtx, b0, pfs));
1191 else
1193 emit_move_insn (gp_save, pic_offset_table_rtx);
1195 if (! retval)
1196 insn = gen_call_pic (addr, narg_rtx, b0);
1197 else
1198 insn = gen_call_value_pic (retval, addr, narg_rtx, b0);
1199 emit_call_insn (insn);
1201 emit_move_insn (pic_offset_table_rtx, gp_save);
1206 /* Begin the assembly file. */
1208 void
1209 emit_safe_across_calls (f)
1210 FILE *f;
1212 unsigned int rs, re;
1213 int out_state;
1215 rs = 1;
1216 out_state = 0;
1217 while (1)
1219 while (rs < 64 && call_used_regs[PR_REG (rs)])
1220 rs++;
1221 if (rs >= 64)
1222 break;
1223 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1224 continue;
1225 if (out_state == 0)
1227 fputs ("\t.pred.safe_across_calls ", f);
1228 out_state = 1;
1230 else
1231 fputc (',', f);
1232 if (re == rs + 1)
1233 fprintf (f, "p%u", rs);
1234 else
1235 fprintf (f, "p%u-p%u", rs, re - 1);
1236 rs = re + 1;
1238 if (out_state)
1239 fputc ('\n', f);
1243 /* Structure to be filled in by ia64_compute_frame_size with register
1244 save masks and offsets for the current function. */
1246 struct ia64_frame_info
1248 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1249 the caller's scratch area. */
1250 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1251 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1252 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
1253 HARD_REG_SET mask; /* mask of saved registers. */
1254 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1255 registers or long-term scratches. */
1256 int n_spilled; /* number of spilled registers. */
1257 int reg_fp; /* register for fp. */
1258 int reg_save_b0; /* save register for b0. */
1259 int reg_save_pr; /* save register for prs. */
1260 int reg_save_ar_pfs; /* save register for ar.pfs. */
1261 int reg_save_ar_unat; /* save register for ar.unat. */
1262 int reg_save_ar_lc; /* save register for ar.lc. */
1263 int n_input_regs; /* number of input registers used. */
1264 int n_local_regs; /* number of local registers used. */
1265 int n_output_regs; /* number of output registers used. */
1266 int n_rotate_regs; /* number of rotating registers used. */
1268 char need_regstk; /* true if a .regstk directive needed. */
1269 char initialized; /* true if the data is finalized. */
1272 /* Current frame information calculated by ia64_compute_frame_size. */
1273 static struct ia64_frame_info current_frame_info;
1275 /* Helper function for ia64_compute_frame_size: find an appropriate general
1276 register to spill some special register to. SPECIAL_SPILL_MASK contains
1277 bits in GR0 to GR31 that have already been allocated by this routine.
1278 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1280 static int
1281 find_gr_spill (try_locals)
1282 int try_locals;
1284 int regno;
1286 /* If this is a leaf function, first try an otherwise unused
1287 call-clobbered register. */
1288 if (current_function_is_leaf)
1290 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1291 if (! regs_ever_live[regno]
1292 && call_used_regs[regno]
1293 && ! fixed_regs[regno]
1294 && ! global_regs[regno]
1295 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1297 current_frame_info.gr_used_mask |= 1 << regno;
1298 return regno;
1302 if (try_locals)
1304 regno = current_frame_info.n_local_regs;
1305 /* If there is a frame pointer, then we can't use loc79, because
1306 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1307 reg_name switching code in ia64_expand_prologue. */
1308 if (regno < (80 - frame_pointer_needed))
1310 current_frame_info.n_local_regs = regno + 1;
1311 return LOC_REG (0) + regno;
1315 /* Failed to find a general register to spill to. Must use stack. */
1316 return 0;
1319 /* In order to make for nice schedules, we try to allocate every temporary
1320 to a different register. We must of course stay away from call-saved,
1321 fixed, and global registers. We must also stay away from registers
1322 allocated in current_frame_info.gr_used_mask, since those include regs
1323 used all through the prologue.
1325 Any register allocated here must be used immediately. The idea is to
1326 aid scheduling, not to solve data flow problems. */
1328 static int last_scratch_gr_reg;
1330 static int
1331 next_scratch_gr_reg ()
1333 int i, regno;
1335 for (i = 0; i < 32; ++i)
1337 regno = (last_scratch_gr_reg + i + 1) & 31;
1338 if (call_used_regs[regno]
1339 && ! fixed_regs[regno]
1340 && ! global_regs[regno]
1341 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1343 last_scratch_gr_reg = regno;
1344 return regno;
1348 /* There must be _something_ available. */
1349 abort ();
1352 /* Helper function for ia64_compute_frame_size, called through
1353 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1355 static void
1356 mark_reg_gr_used_mask (reg, data)
1357 rtx reg;
1358 void *data ATTRIBUTE_UNUSED;
1360 unsigned int regno = REGNO (reg);
1361 if (regno < 32)
1362 current_frame_info.gr_used_mask |= 1 << regno;
1365 /* Returns the number of bytes offset between the frame pointer and the stack
1366 pointer for the current function. SIZE is the number of bytes of space
1367 needed for local variables. */
1369 static void
1370 ia64_compute_frame_size (size)
1371 HOST_WIDE_INT size;
1373 HOST_WIDE_INT total_size;
1374 HOST_WIDE_INT spill_size = 0;
1375 HOST_WIDE_INT extra_spill_size = 0;
1376 HOST_WIDE_INT pretend_args_size;
1377 HARD_REG_SET mask;
1378 int n_spilled = 0;
1379 int spilled_gr_p = 0;
1380 int spilled_fr_p = 0;
1381 unsigned int regno;
1382 int i;
1384 if (current_frame_info.initialized)
1385 return;
1387 memset (&current_frame_info, 0, sizeof current_frame_info);
1388 CLEAR_HARD_REG_SET (mask);
1390 /* Don't allocate scratches to the return register. */
1391 diddle_return_value (mark_reg_gr_used_mask, NULL);
1393 /* Don't allocate scratches to the EH scratch registers. */
1394 if (cfun->machine->ia64_eh_epilogue_sp)
1395 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1396 if (cfun->machine->ia64_eh_epilogue_bsp)
1397 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1399 /* Find the size of the register stack frame. We have only 80 local
1400 registers, because we reserve 8 for the inputs and 8 for the
1401 outputs. */
1403 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1404 since we'll be adjusting that down later. */
1405 regno = LOC_REG (78) + ! frame_pointer_needed;
1406 for (; regno >= LOC_REG (0); regno--)
1407 if (regs_ever_live[regno])
1408 break;
1409 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1411 /* For functions marked with the syscall_linkage attribute, we must mark
1412 all eight input registers as in use, so that locals aren't visible to
1413 the caller. */
1415 if (cfun->machine->n_varargs > 0
1416 || lookup_attribute ("syscall_linkage",
1417 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1418 current_frame_info.n_input_regs = 8;
1419 else
1421 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1422 if (regs_ever_live[regno])
1423 break;
1424 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1427 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1428 if (regs_ever_live[regno])
1429 break;
1430 i = regno - OUT_REG (0) + 1;
1432 /* When -p profiling, we need one output register for the mcount argument.
1433 Likwise for -a profiling for the bb_init_func argument. For -ax
1434 profiling, we need two output registers for the two bb_init_trace_func
1435 arguments. */
1436 if (profile_flag || profile_block_flag == 1)
1437 i = MAX (i, 1);
1438 else if (profile_block_flag == 2)
1439 i = MAX (i, 2);
1440 current_frame_info.n_output_regs = i;
1442 /* ??? No rotating register support yet. */
1443 current_frame_info.n_rotate_regs = 0;
1445 /* Discover which registers need spilling, and how much room that
1446 will take. Begin with floating point and general registers,
1447 which will always wind up on the stack. */
1449 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1450 if (regs_ever_live[regno] && ! call_used_regs[regno])
1452 SET_HARD_REG_BIT (mask, regno);
1453 spill_size += 16;
1454 n_spilled += 1;
1455 spilled_fr_p = 1;
1458 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1459 if (regs_ever_live[regno] && ! call_used_regs[regno])
1461 SET_HARD_REG_BIT (mask, regno);
1462 spill_size += 8;
1463 n_spilled += 1;
1464 spilled_gr_p = 1;
1467 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1468 if (regs_ever_live[regno] && ! call_used_regs[regno])
1470 SET_HARD_REG_BIT (mask, regno);
1471 spill_size += 8;
1472 n_spilled += 1;
1475 /* Now come all special registers that might get saved in other
1476 general registers. */
1478 if (frame_pointer_needed)
1480 current_frame_info.reg_fp = find_gr_spill (1);
1481 /* If we did not get a register, then we take LOC79. This is guaranteed
1482 to be free, even if regs_ever_live is already set, because this is
1483 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1484 as we don't count loc79 above. */
1485 if (current_frame_info.reg_fp == 0)
1487 current_frame_info.reg_fp = LOC_REG (79);
1488 current_frame_info.n_local_regs++;
1492 if (! current_function_is_leaf)
1494 /* Emit a save of BR0 if we call other functions. Do this even
1495 if this function doesn't return, as EH depends on this to be
1496 able to unwind the stack. */
1497 SET_HARD_REG_BIT (mask, BR_REG (0));
1499 current_frame_info.reg_save_b0 = find_gr_spill (1);
1500 if (current_frame_info.reg_save_b0 == 0)
1502 spill_size += 8;
1503 n_spilled += 1;
1506 /* Similarly for ar.pfs. */
1507 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1508 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1509 if (current_frame_info.reg_save_ar_pfs == 0)
1511 extra_spill_size += 8;
1512 n_spilled += 1;
1515 else
1517 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1519 SET_HARD_REG_BIT (mask, BR_REG (0));
1520 spill_size += 8;
1521 n_spilled += 1;
1525 /* Unwind descriptor hackery: things are most efficient if we allocate
1526 consecutive GR save registers for RP, PFS, FP in that order. However,
1527 it is absolutely critical that FP get the only hard register that's
1528 guaranteed to be free, so we allocated it first. If all three did
1529 happen to be allocated hard regs, and are consecutive, rearrange them
1530 into the preferred order now. */
1531 if (current_frame_info.reg_fp != 0
1532 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1533 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1535 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1536 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1537 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1540 /* See if we need to store the predicate register block. */
1541 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1542 if (regs_ever_live[regno] && ! call_used_regs[regno])
1543 break;
1544 if (regno <= PR_REG (63))
1546 SET_HARD_REG_BIT (mask, PR_REG (0));
1547 current_frame_info.reg_save_pr = find_gr_spill (1);
1548 if (current_frame_info.reg_save_pr == 0)
1550 extra_spill_size += 8;
1551 n_spilled += 1;
1554 /* ??? Mark them all as used so that register renaming and such
1555 are free to use them. */
1556 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1557 regs_ever_live[regno] = 1;
1560 /* If we're forced to use st8.spill, we're forced to save and restore
1561 ar.unat as well. */
1562 if (spilled_gr_p || cfun->machine->n_varargs)
1564 regs_ever_live[AR_UNAT_REGNUM] = 1;
1565 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1566 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1567 if (current_frame_info.reg_save_ar_unat == 0)
1569 extra_spill_size += 8;
1570 n_spilled += 1;
1574 if (regs_ever_live[AR_LC_REGNUM])
1576 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1577 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1578 if (current_frame_info.reg_save_ar_lc == 0)
1580 extra_spill_size += 8;
1581 n_spilled += 1;
1585 /* If we have an odd number of words of pretend arguments written to
1586 the stack, then the FR save area will be unaligned. We round the
1587 size of this area up to keep things 16 byte aligned. */
1588 if (spilled_fr_p)
1589 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1590 else
1591 pretend_args_size = current_function_pretend_args_size;
1593 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1594 + current_function_outgoing_args_size);
1595 total_size = IA64_STACK_ALIGN (total_size);
1597 /* We always use the 16-byte scratch area provided by the caller, but
1598 if we are a leaf function, there's no one to which we need to provide
1599 a scratch area. */
1600 if (current_function_is_leaf)
1601 total_size = MAX (0, total_size - 16);
1603 current_frame_info.total_size = total_size;
1604 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1605 current_frame_info.spill_size = spill_size;
1606 current_frame_info.extra_spill_size = extra_spill_size;
1607 COPY_HARD_REG_SET (current_frame_info.mask, mask);
1608 current_frame_info.n_spilled = n_spilled;
1609 current_frame_info.initialized = reload_completed;
1612 /* Compute the initial difference between the specified pair of registers. */
1614 HOST_WIDE_INT
1615 ia64_initial_elimination_offset (from, to)
1616 int from, to;
1618 HOST_WIDE_INT offset;
1620 ia64_compute_frame_size (get_frame_size ());
1621 switch (from)
1623 case FRAME_POINTER_REGNUM:
1624 if (to == HARD_FRAME_POINTER_REGNUM)
1626 if (current_function_is_leaf)
1627 offset = -current_frame_info.total_size;
1628 else
1629 offset = -(current_frame_info.total_size
1630 - current_function_outgoing_args_size - 16);
1632 else if (to == STACK_POINTER_REGNUM)
1634 if (current_function_is_leaf)
1635 offset = 0;
1636 else
1637 offset = 16 + current_function_outgoing_args_size;
1639 else
1640 abort ();
1641 break;
1643 case ARG_POINTER_REGNUM:
1644 /* Arguments start above the 16 byte save area, unless stdarg
1645 in which case we store through the 16 byte save area. */
1646 if (to == HARD_FRAME_POINTER_REGNUM)
1647 offset = 16 - current_function_pretend_args_size;
1648 else if (to == STACK_POINTER_REGNUM)
1649 offset = (current_frame_info.total_size
1650 + 16 - current_function_pretend_args_size);
1651 else
1652 abort ();
1653 break;
1655 case RETURN_ADDRESS_POINTER_REGNUM:
1656 offset = 0;
1657 break;
1659 default:
1660 abort ();
1663 return offset;
1666 /* If there are more than a trivial number of register spills, we use
1667 two interleaved iterators so that we can get two memory references
1668 per insn group.
1670 In order to simplify things in the prologue and epilogue expanders,
1671 we use helper functions to fix up the memory references after the
1672 fact with the appropriate offsets to a POST_MODIFY memory mode.
1673 The following data structure tracks the state of the two iterators
1674 while insns are being emitted. */
1676 struct spill_fill_data
1678 rtx init_after; /* point at which to emit intializations */
1679 rtx init_reg[2]; /* initial base register */
1680 rtx iter_reg[2]; /* the iterator registers */
1681 rtx *prev_addr[2]; /* address of last memory use */
1682 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
1683 HOST_WIDE_INT prev_off[2]; /* last offset */
1684 int n_iter; /* number of iterators in use */
1685 int next_iter; /* next iterator to use */
1686 unsigned int save_gr_used_mask;
1689 static struct spill_fill_data spill_fill_data;
1691 static void
1692 setup_spill_pointers (n_spills, init_reg, cfa_off)
1693 int n_spills;
1694 rtx init_reg;
1695 HOST_WIDE_INT cfa_off;
1697 int i;
1699 spill_fill_data.init_after = get_last_insn ();
1700 spill_fill_data.init_reg[0] = init_reg;
1701 spill_fill_data.init_reg[1] = init_reg;
1702 spill_fill_data.prev_addr[0] = NULL;
1703 spill_fill_data.prev_addr[1] = NULL;
1704 spill_fill_data.prev_insn[0] = NULL;
1705 spill_fill_data.prev_insn[1] = NULL;
1706 spill_fill_data.prev_off[0] = cfa_off;
1707 spill_fill_data.prev_off[1] = cfa_off;
1708 spill_fill_data.next_iter = 0;
1709 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1711 spill_fill_data.n_iter = 1 + (n_spills > 2);
1712 for (i = 0; i < spill_fill_data.n_iter; ++i)
1714 int regno = next_scratch_gr_reg ();
1715 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1716 current_frame_info.gr_used_mask |= 1 << regno;
1720 static void
1721 finish_spill_pointers ()
1723 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1726 static rtx
1727 spill_restore_mem (reg, cfa_off)
1728 rtx reg;
1729 HOST_WIDE_INT cfa_off;
1731 int iter = spill_fill_data.next_iter;
1732 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1733 rtx disp_rtx = GEN_INT (disp);
1734 rtx mem;
1736 if (spill_fill_data.prev_addr[iter])
1738 if (CONST_OK_FOR_N (disp))
1740 *spill_fill_data.prev_addr[iter]
1741 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1742 gen_rtx_PLUS (DImode,
1743 spill_fill_data.iter_reg[iter],
1744 disp_rtx));
1745 REG_NOTES (spill_fill_data.prev_insn[iter])
1746 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
1747 REG_NOTES (spill_fill_data.prev_insn[iter]));
1749 else
1751 /* ??? Could use register post_modify for loads. */
1752 if (! CONST_OK_FOR_I (disp))
1754 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1755 emit_move_insn (tmp, disp_rtx);
1756 disp_rtx = tmp;
1758 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1759 spill_fill_data.iter_reg[iter], disp_rtx));
1762 /* Micro-optimization: if we've created a frame pointer, it's at
1763 CFA 0, which may allow the real iterator to be initialized lower,
1764 slightly increasing parallelism. Also, if there are few saves
1765 it may eliminate the iterator entirely. */
1766 else if (disp == 0
1767 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1768 && frame_pointer_needed)
1770 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1771 set_mem_alias_set (mem, get_varargs_alias_set ());
1772 return mem;
1774 else
1776 rtx seq;
1778 if (disp == 0)
1779 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1780 spill_fill_data.init_reg[iter]);
1781 else
1783 start_sequence ();
1785 if (! CONST_OK_FOR_I (disp))
1787 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1788 emit_move_insn (tmp, disp_rtx);
1789 disp_rtx = tmp;
1792 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1793 spill_fill_data.init_reg[iter],
1794 disp_rtx));
1796 seq = gen_sequence ();
1797 end_sequence ();
1800 /* Careful for being the first insn in a sequence. */
1801 if (spill_fill_data.init_after)
1802 spill_fill_data.init_after
1803 = emit_insn_after (seq, spill_fill_data.init_after);
1804 else
1806 rtx first = get_insns ();
1807 if (first)
1808 spill_fill_data.init_after
1809 = emit_insn_before (seq, first);
1810 else
1811 spill_fill_data.init_after = emit_insn (seq);
1815 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
1817 /* ??? Not all of the spills are for varargs, but some of them are.
1818 The rest of the spills belong in an alias set of their own. But
1819 it doesn't actually hurt to include them here. */
1820 set_mem_alias_set (mem, get_varargs_alias_set ());
1822 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1823 spill_fill_data.prev_off[iter] = cfa_off;
1825 if (++iter >= spill_fill_data.n_iter)
1826 iter = 0;
1827 spill_fill_data.next_iter = iter;
1829 return mem;
1832 static void
1833 do_spill (move_fn, reg, cfa_off, frame_reg)
1834 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1835 rtx reg, frame_reg;
1836 HOST_WIDE_INT cfa_off;
1838 int iter = spill_fill_data.next_iter;
1839 rtx mem, insn;
1841 mem = spill_restore_mem (reg, cfa_off);
1842 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
1843 spill_fill_data.prev_insn[iter] = insn;
1845 if (frame_reg)
1847 rtx base;
1848 HOST_WIDE_INT off;
1850 RTX_FRAME_RELATED_P (insn) = 1;
1852 /* Don't even pretend that the unwind code can intuit its way
1853 through a pair of interleaved post_modify iterators. Just
1854 provide the correct answer. */
1856 if (frame_pointer_needed)
1858 base = hard_frame_pointer_rtx;
1859 off = - cfa_off;
1861 else
1863 base = stack_pointer_rtx;
1864 off = current_frame_info.total_size - cfa_off;
1867 REG_NOTES (insn)
1868 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1869 gen_rtx_SET (VOIDmode,
1870 gen_rtx_MEM (GET_MODE (reg),
1871 plus_constant (base, off)),
1872 frame_reg),
1873 REG_NOTES (insn));
1877 static void
1878 do_restore (move_fn, reg, cfa_off)
1879 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1880 rtx reg;
1881 HOST_WIDE_INT cfa_off;
1883 int iter = spill_fill_data.next_iter;
1884 rtx insn;
1886 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1887 GEN_INT (cfa_off)));
1888 spill_fill_data.prev_insn[iter] = insn;
1891 /* Wrapper functions that discards the CONST_INT spill offset. These
1892 exist so that we can give gr_spill/gr_fill the offset they need and
1893 use a consistant function interface. */
1895 static rtx
1896 gen_movdi_x (dest, src, offset)
1897 rtx dest, src;
1898 rtx offset ATTRIBUTE_UNUSED;
1900 return gen_movdi (dest, src);
1903 static rtx
1904 gen_fr_spill_x (dest, src, offset)
1905 rtx dest, src;
1906 rtx offset ATTRIBUTE_UNUSED;
1908 return gen_fr_spill (dest, src);
1911 static rtx
1912 gen_fr_restore_x (dest, src, offset)
1913 rtx dest, src;
1914 rtx offset ATTRIBUTE_UNUSED;
1916 return gen_fr_restore (dest, src);
1919 /* Called after register allocation to add any instructions needed for the
1920 prologue. Using a prologue insn is favored compared to putting all of the
1921 instructions in output_function_prologue(), since it allows the scheduler
1922 to intermix instructions with the saves of the caller saved registers. In
1923 some cases, it might be necessary to emit a barrier instruction as the last
1924 insn to prevent such scheduling.
1926 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
1927 so that the debug info generation code can handle them properly.
1929 The register save area is layed out like so:
1930 cfa+16
1931 [ varargs spill area ]
1932 [ fr register spill area ]
1933 [ br register spill area ]
1934 [ ar register spill area ]
1935 [ pr register spill area ]
1936 [ gr register spill area ] */
1938 /* ??? Get inefficient code when the frame size is larger than can fit in an
1939 adds instruction. */
1941 void
1942 ia64_expand_prologue ()
1944 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
1945 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
1946 rtx reg, alt_reg;
1948 ia64_compute_frame_size (get_frame_size ());
1949 last_scratch_gr_reg = 15;
1951 /* If there is no epilogue, then we don't need some prologue insns.
1952 We need to avoid emitting the dead prologue insns, because flow
1953 will complain about them. */
1954 if (optimize)
1956 edge e;
1958 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
1959 if ((e->flags & EDGE_FAKE) == 0
1960 && (e->flags & EDGE_FALLTHRU) != 0)
1961 break;
1962 epilogue_p = (e != NULL);
1964 else
1965 epilogue_p = 1;
1967 /* Set the local, input, and output register names. We need to do this
1968 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1969 half. If we use in/loc/out register names, then we get assembler errors
1970 in crtn.S because there is no alloc insn or regstk directive in there. */
1971 if (! TARGET_REG_NAMES)
1973 int inputs = current_frame_info.n_input_regs;
1974 int locals = current_frame_info.n_local_regs;
1975 int outputs = current_frame_info.n_output_regs;
1977 for (i = 0; i < inputs; i++)
1978 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
1979 for (i = 0; i < locals; i++)
1980 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
1981 for (i = 0; i < outputs; i++)
1982 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
1985 /* Set the frame pointer register name. The regnum is logically loc79,
1986 but of course we'll not have allocated that many locals. Rather than
1987 worrying about renumbering the existing rtxs, we adjust the name. */
1988 /* ??? This code means that we can never use one local register when
1989 there is a frame pointer. loc79 gets wasted in this case, as it is
1990 renamed to a register that will never be used. See also the try_locals
1991 code in find_gr_spill. */
1992 if (current_frame_info.reg_fp)
1994 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
1995 reg_names[HARD_FRAME_POINTER_REGNUM]
1996 = reg_names[current_frame_info.reg_fp];
1997 reg_names[current_frame_info.reg_fp] = tmp;
2000 /* Fix up the return address placeholder. */
2001 /* ??? We can fail if __builtin_return_address is used, and we didn't
2002 allocate a register in which to save b0. I can't think of a way to
2003 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
2004 then be sure that I got the right one. Further, reload doesn't seem
2005 to care if an eliminable register isn't used, and "eliminates" it
2006 anyway. */
2007 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
2008 && current_frame_info.reg_save_b0 != 0)
2009 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
2011 /* We don't need an alloc instruction if we've used no outputs or locals. */
2012 if (current_frame_info.n_local_regs == 0
2013 && current_frame_info.n_output_regs == 0
2014 && current_frame_info.n_input_regs <= current_function_args_info.words)
2016 /* If there is no alloc, but there are input registers used, then we
2017 need a .regstk directive. */
2018 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2019 ar_pfs_save_reg = NULL_RTX;
2021 else
2023 current_frame_info.need_regstk = 0;
2025 if (current_frame_info.reg_save_ar_pfs)
2026 regno = current_frame_info.reg_save_ar_pfs;
2027 else
2028 regno = next_scratch_gr_reg ();
2029 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2031 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2032 GEN_INT (current_frame_info.n_input_regs),
2033 GEN_INT (current_frame_info.n_local_regs),
2034 GEN_INT (current_frame_info.n_output_regs),
2035 GEN_INT (current_frame_info.n_rotate_regs)));
2036 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2039 /* Set up frame pointer, stack pointer, and spill iterators. */
2041 n_varargs = cfun->machine->n_varargs;
2042 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2043 stack_pointer_rtx, 0);
2045 if (frame_pointer_needed)
2047 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2048 RTX_FRAME_RELATED_P (insn) = 1;
2051 if (current_frame_info.total_size != 0)
2053 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2054 rtx offset;
2056 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2057 offset = frame_size_rtx;
2058 else
2060 regno = next_scratch_gr_reg ();
2061 offset = gen_rtx_REG (DImode, regno);
2062 emit_move_insn (offset, frame_size_rtx);
2065 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2066 stack_pointer_rtx, offset));
2068 if (! frame_pointer_needed)
2070 RTX_FRAME_RELATED_P (insn) = 1;
2071 if (GET_CODE (offset) != CONST_INT)
2073 REG_NOTES (insn)
2074 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2075 gen_rtx_SET (VOIDmode,
2076 stack_pointer_rtx,
2077 gen_rtx_PLUS (DImode,
2078 stack_pointer_rtx,
2079 frame_size_rtx)),
2080 REG_NOTES (insn));
2084 /* ??? At this point we must generate a magic insn that appears to
2085 modify the stack pointer, the frame pointer, and all spill
2086 iterators. This would allow the most scheduling freedom. For
2087 now, just hard stop. */
2088 emit_insn (gen_blockage ());
2091 /* Must copy out ar.unat before doing any integer spills. */
2092 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2094 if (current_frame_info.reg_save_ar_unat)
2095 ar_unat_save_reg
2096 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2097 else
2099 alt_regno = next_scratch_gr_reg ();
2100 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2101 current_frame_info.gr_used_mask |= 1 << alt_regno;
2104 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2105 insn = emit_move_insn (ar_unat_save_reg, reg);
2106 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2108 /* Even if we're not going to generate an epilogue, we still
2109 need to save the register so that EH works. */
2110 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2111 emit_insn (gen_rtx_USE (VOIDmode, ar_unat_save_reg));
2113 else
2114 ar_unat_save_reg = NULL_RTX;
2116 /* Spill all varargs registers. Do this before spilling any GR registers,
2117 since we want the UNAT bits for the GR registers to override the UNAT
2118 bits from varargs, which we don't care about. */
2120 cfa_off = -16;
2121 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2123 reg = gen_rtx_REG (DImode, regno);
2124 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2127 /* Locate the bottom of the register save area. */
2128 cfa_off = (current_frame_info.spill_cfa_off
2129 + current_frame_info.spill_size
2130 + current_frame_info.extra_spill_size);
2132 /* Save the predicate register block either in a register or in memory. */
2133 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2135 reg = gen_rtx_REG (DImode, PR_REG (0));
2136 if (current_frame_info.reg_save_pr != 0)
2138 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2139 insn = emit_move_insn (alt_reg, reg);
2141 /* ??? Denote pr spill/fill by a DImode move that modifies all
2142 64 hard registers. */
2143 RTX_FRAME_RELATED_P (insn) = 1;
2144 REG_NOTES (insn)
2145 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2146 gen_rtx_SET (VOIDmode, alt_reg, reg),
2147 REG_NOTES (insn));
2149 /* Even if we're not going to generate an epilogue, we still
2150 need to save the register so that EH works. */
2151 if (! epilogue_p)
2152 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2154 else
2156 alt_regno = next_scratch_gr_reg ();
2157 alt_reg = gen_rtx_REG (DImode, alt_regno);
2158 insn = emit_move_insn (alt_reg, reg);
2159 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2160 cfa_off -= 8;
2164 /* Handle AR regs in numerical order. All of them get special handling. */
2165 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2166 && current_frame_info.reg_save_ar_unat == 0)
2168 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2169 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2170 cfa_off -= 8;
2173 /* The alloc insn already copied ar.pfs into a general register. The
2174 only thing we have to do now is copy that register to a stack slot
2175 if we'd not allocated a local register for the job. */
2176 if (current_frame_info.reg_save_ar_pfs == 0
2177 && ! current_function_is_leaf)
2179 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2180 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2181 cfa_off -= 8;
2184 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2186 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2187 if (current_frame_info.reg_save_ar_lc != 0)
2189 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2190 insn = emit_move_insn (alt_reg, reg);
2191 RTX_FRAME_RELATED_P (insn) = 1;
2193 /* Even if we're not going to generate an epilogue, we still
2194 need to save the register so that EH works. */
2195 if (! epilogue_p)
2196 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2198 else
2200 alt_regno = next_scratch_gr_reg ();
2201 alt_reg = gen_rtx_REG (DImode, alt_regno);
2202 emit_move_insn (alt_reg, reg);
2203 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2204 cfa_off -= 8;
2208 /* We should now be at the base of the gr/br/fr spill area. */
2209 if (cfa_off != (current_frame_info.spill_cfa_off
2210 + current_frame_info.spill_size))
2211 abort ();
2213 /* Spill all general registers. */
2214 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2215 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2217 reg = gen_rtx_REG (DImode, regno);
2218 do_spill (gen_gr_spill, reg, cfa_off, reg);
2219 cfa_off -= 8;
2222 /* Handle BR0 specially -- it may be getting stored permanently in
2223 some GR register. */
2224 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2226 reg = gen_rtx_REG (DImode, BR_REG (0));
2227 if (current_frame_info.reg_save_b0 != 0)
2229 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2230 insn = emit_move_insn (alt_reg, reg);
2231 RTX_FRAME_RELATED_P (insn) = 1;
2233 /* Even if we're not going to generate an epilogue, we still
2234 need to save the register so that EH works. */
2235 if (! epilogue_p)
2236 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2238 else
2240 alt_regno = next_scratch_gr_reg ();
2241 alt_reg = gen_rtx_REG (DImode, alt_regno);
2242 emit_move_insn (alt_reg, reg);
2243 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2244 cfa_off -= 8;
2248 /* Spill the rest of the BR registers. */
2249 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2250 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2252 alt_regno = next_scratch_gr_reg ();
2253 alt_reg = gen_rtx_REG (DImode, alt_regno);
2254 reg = gen_rtx_REG (DImode, regno);
2255 emit_move_insn (alt_reg, reg);
2256 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2257 cfa_off -= 8;
2260 /* Align the frame and spill all FR registers. */
2261 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2262 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2264 if (cfa_off & 15)
2265 abort ();
2266 reg = gen_rtx_REG (TFmode, regno);
2267 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2268 cfa_off -= 16;
2271 if (cfa_off != current_frame_info.spill_cfa_off)
2272 abort ();
2274 finish_spill_pointers ();
2277 /* Called after register allocation to add any instructions needed for the
2278 epilogue. Using a epilogue insn is favored compared to putting all of the
2279 instructions in output_function_prologue(), since it allows the scheduler
2280 to intermix instructions with the saves of the caller saved registers. In
2281 some cases, it might be necessary to emit a barrier instruction as the last
2282 insn to prevent such scheduling. */
2284 void
2285 ia64_expand_epilogue (sibcall_p)
2286 int sibcall_p;
2288 rtx insn, reg, alt_reg, ar_unat_save_reg;
2289 int regno, alt_regno, cfa_off;
2291 ia64_compute_frame_size (get_frame_size ());
2293 /* If there is a frame pointer, then we use it instead of the stack
2294 pointer, so that the stack pointer does not need to be valid when
2295 the epilogue starts. See EXIT_IGNORE_STACK. */
2296 if (frame_pointer_needed)
2297 setup_spill_pointers (current_frame_info.n_spilled,
2298 hard_frame_pointer_rtx, 0);
2299 else
2300 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2301 current_frame_info.total_size);
2303 if (current_frame_info.total_size != 0)
2305 /* ??? At this point we must generate a magic insn that appears to
2306 modify the spill iterators and the frame pointer. This would
2307 allow the most scheduling freedom. For now, just hard stop. */
2308 emit_insn (gen_blockage ());
2311 /* Locate the bottom of the register save area. */
2312 cfa_off = (current_frame_info.spill_cfa_off
2313 + current_frame_info.spill_size
2314 + current_frame_info.extra_spill_size);
2316 /* Restore the predicate registers. */
2317 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2319 if (current_frame_info.reg_save_pr != 0)
2320 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2321 else
2323 alt_regno = next_scratch_gr_reg ();
2324 alt_reg = gen_rtx_REG (DImode, alt_regno);
2325 do_restore (gen_movdi_x, alt_reg, cfa_off);
2326 cfa_off -= 8;
2328 reg = gen_rtx_REG (DImode, PR_REG (0));
2329 emit_move_insn (reg, alt_reg);
2332 /* Restore the application registers. */
2334 /* Load the saved unat from the stack, but do not restore it until
2335 after the GRs have been restored. */
2336 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2338 if (current_frame_info.reg_save_ar_unat != 0)
2339 ar_unat_save_reg
2340 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2341 else
2343 alt_regno = next_scratch_gr_reg ();
2344 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2345 current_frame_info.gr_used_mask |= 1 << alt_regno;
2346 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2347 cfa_off -= 8;
2350 else
2351 ar_unat_save_reg = NULL_RTX;
2353 if (current_frame_info.reg_save_ar_pfs != 0)
2355 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2356 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2357 emit_move_insn (reg, alt_reg);
2359 else if (! current_function_is_leaf)
2361 alt_regno = next_scratch_gr_reg ();
2362 alt_reg = gen_rtx_REG (DImode, alt_regno);
2363 do_restore (gen_movdi_x, alt_reg, cfa_off);
2364 cfa_off -= 8;
2365 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2366 emit_move_insn (reg, alt_reg);
2369 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2371 if (current_frame_info.reg_save_ar_lc != 0)
2372 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2373 else
2375 alt_regno = next_scratch_gr_reg ();
2376 alt_reg = gen_rtx_REG (DImode, alt_regno);
2377 do_restore (gen_movdi_x, alt_reg, cfa_off);
2378 cfa_off -= 8;
2380 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2381 emit_move_insn (reg, alt_reg);
2384 /* We should now be at the base of the gr/br/fr spill area. */
2385 if (cfa_off != (current_frame_info.spill_cfa_off
2386 + current_frame_info.spill_size))
2387 abort ();
2389 /* Restore all general registers. */
2390 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2391 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2393 reg = gen_rtx_REG (DImode, regno);
2394 do_restore (gen_gr_restore, reg, cfa_off);
2395 cfa_off -= 8;
2398 /* Restore the branch registers. Handle B0 specially, as it may
2399 have gotten stored in some GR register. */
2400 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2402 if (current_frame_info.reg_save_b0 != 0)
2403 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2404 else
2406 alt_regno = next_scratch_gr_reg ();
2407 alt_reg = gen_rtx_REG (DImode, alt_regno);
2408 do_restore (gen_movdi_x, alt_reg, cfa_off);
2409 cfa_off -= 8;
2411 reg = gen_rtx_REG (DImode, BR_REG (0));
2412 emit_move_insn (reg, alt_reg);
2415 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2416 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2418 alt_regno = next_scratch_gr_reg ();
2419 alt_reg = gen_rtx_REG (DImode, alt_regno);
2420 do_restore (gen_movdi_x, alt_reg, cfa_off);
2421 cfa_off -= 8;
2422 reg = gen_rtx_REG (DImode, regno);
2423 emit_move_insn (reg, alt_reg);
2426 /* Restore floating point registers. */
2427 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2428 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2430 if (cfa_off & 15)
2431 abort ();
2432 reg = gen_rtx_REG (TFmode, regno);
2433 do_restore (gen_fr_restore_x, reg, cfa_off);
2434 cfa_off -= 16;
2437 /* Restore ar.unat for real. */
2438 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2440 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2441 emit_move_insn (reg, ar_unat_save_reg);
2444 if (cfa_off != current_frame_info.spill_cfa_off)
2445 abort ();
2447 finish_spill_pointers ();
2449 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2451 /* ??? At this point we must generate a magic insn that appears to
2452 modify the spill iterators, the stack pointer, and the frame
2453 pointer. This would allow the most scheduling freedom. For now,
2454 just hard stop. */
2455 emit_insn (gen_blockage ());
2458 if (cfun->machine->ia64_eh_epilogue_sp)
2459 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2460 else if (frame_pointer_needed)
2462 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2463 RTX_FRAME_RELATED_P (insn) = 1;
2465 else if (current_frame_info.total_size)
2467 rtx offset, frame_size_rtx;
2469 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2470 if (CONST_OK_FOR_I (current_frame_info.total_size))
2471 offset = frame_size_rtx;
2472 else
2474 regno = next_scratch_gr_reg ();
2475 offset = gen_rtx_REG (DImode, regno);
2476 emit_move_insn (offset, frame_size_rtx);
2479 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2480 offset));
2482 RTX_FRAME_RELATED_P (insn) = 1;
2483 if (GET_CODE (offset) != CONST_INT)
2485 REG_NOTES (insn)
2486 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2487 gen_rtx_SET (VOIDmode,
2488 stack_pointer_rtx,
2489 gen_rtx_PLUS (DImode,
2490 stack_pointer_rtx,
2491 frame_size_rtx)),
2492 REG_NOTES (insn));
2496 if (cfun->machine->ia64_eh_epilogue_bsp)
2497 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2499 if (! sibcall_p)
2500 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2501 else
2503 int fp = GR_REG (2);
2504 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2505 first available call clobbered register. If there was a frame_pointer
2506 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2507 so we have to make sure we're using the string "r2" when emitting
2508 the register name for the assmbler. */
2509 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2510 fp = HARD_FRAME_POINTER_REGNUM;
2512 /* We must emit an alloc to force the input registers to become output
2513 registers. Otherwise, if the callee tries to pass its parameters
2514 through to another call without an intervening alloc, then these
2515 values get lost. */
2516 /* ??? We don't need to preserve all input registers. We only need to
2517 preserve those input registers used as arguments to the sibling call.
2518 It is unclear how to compute that number here. */
2519 if (current_frame_info.n_input_regs != 0)
2520 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2521 GEN_INT (0), GEN_INT (0),
2522 GEN_INT (current_frame_info.n_input_regs),
2523 GEN_INT (0)));
2527 /* Return 1 if br.ret can do all the work required to return from a
2528 function. */
2531 ia64_direct_return ()
2533 if (reload_completed && ! frame_pointer_needed)
2535 ia64_compute_frame_size (get_frame_size ());
2537 return (current_frame_info.total_size == 0
2538 && current_frame_info.n_spilled == 0
2539 && current_frame_info.reg_save_b0 == 0
2540 && current_frame_info.reg_save_pr == 0
2541 && current_frame_info.reg_save_ar_pfs == 0
2542 && current_frame_info.reg_save_ar_unat == 0
2543 && current_frame_info.reg_save_ar_lc == 0);
2545 return 0;
2549 ia64_hard_regno_rename_ok (from, to)
2550 int from;
2551 int to;
2553 /* Don't clobber any of the registers we reserved for the prologue. */
2554 if (to == current_frame_info.reg_fp
2555 || to == current_frame_info.reg_save_b0
2556 || to == current_frame_info.reg_save_pr
2557 || to == current_frame_info.reg_save_ar_pfs
2558 || to == current_frame_info.reg_save_ar_unat
2559 || to == current_frame_info.reg_save_ar_lc)
2560 return 0;
2562 if (from == current_frame_info.reg_fp
2563 || from == current_frame_info.reg_save_b0
2564 || from == current_frame_info.reg_save_pr
2565 || from == current_frame_info.reg_save_ar_pfs
2566 || from == current_frame_info.reg_save_ar_unat
2567 || from == current_frame_info.reg_save_ar_lc)
2568 return 0;
2570 /* Don't use output registers outside the register frame. */
2571 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2572 return 0;
2574 /* Retain even/oddness on predicate register pairs. */
2575 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2576 return (from & 1) == (to & 1);
2578 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2579 if (from == GR_REG (4) && current_function_calls_setjmp)
2580 return 0;
2582 return 1;
2585 /* Emit the function prologue. */
2587 static void
2588 ia64_output_function_prologue (file, size)
2589 FILE *file;
2590 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2592 int mask, grsave, grsave_prev;
2594 if (current_frame_info.need_regstk)
2595 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2596 current_frame_info.n_input_regs,
2597 current_frame_info.n_local_regs,
2598 current_frame_info.n_output_regs,
2599 current_frame_info.n_rotate_regs);
2601 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2602 return;
2604 /* Emit the .prologue directive. */
2606 mask = 0;
2607 grsave = grsave_prev = 0;
2608 if (current_frame_info.reg_save_b0 != 0)
2610 mask |= 8;
2611 grsave = grsave_prev = current_frame_info.reg_save_b0;
2613 if (current_frame_info.reg_save_ar_pfs != 0
2614 && (grsave_prev == 0
2615 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2617 mask |= 4;
2618 if (grsave_prev == 0)
2619 grsave = current_frame_info.reg_save_ar_pfs;
2620 grsave_prev = current_frame_info.reg_save_ar_pfs;
2622 if (current_frame_info.reg_fp != 0
2623 && (grsave_prev == 0
2624 || current_frame_info.reg_fp == grsave_prev + 1))
2626 mask |= 2;
2627 if (grsave_prev == 0)
2628 grsave = HARD_FRAME_POINTER_REGNUM;
2629 grsave_prev = current_frame_info.reg_fp;
2631 if (current_frame_info.reg_save_pr != 0
2632 && (grsave_prev == 0
2633 || current_frame_info.reg_save_pr == grsave_prev + 1))
2635 mask |= 1;
2636 if (grsave_prev == 0)
2637 grsave = current_frame_info.reg_save_pr;
2640 if (mask)
2641 fprintf (file, "\t.prologue %d, %d\n", mask,
2642 ia64_dbx_register_number (grsave));
2643 else
2644 fputs ("\t.prologue\n", file);
2646 /* Emit a .spill directive, if necessary, to relocate the base of
2647 the register spill area. */
2648 if (current_frame_info.spill_cfa_off != -16)
2649 fprintf (file, "\t.spill %ld\n",
2650 (long) (current_frame_info.spill_cfa_off
2651 + current_frame_info.spill_size));
2654 /* Emit the .body directive at the scheduled end of the prologue. */
2656 static void
2657 ia64_output_function_end_prologue (file)
2658 FILE *file;
2660 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2661 return;
2663 fputs ("\t.body\n", file);
2666 /* Emit the function epilogue. */
2668 static void
2669 ia64_output_function_epilogue (file, size)
2670 FILE *file ATTRIBUTE_UNUSED;
2671 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2673 int i;
2675 /* Reset from the function's potential modifications. */
2676 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
2678 if (current_frame_info.reg_fp)
2680 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2681 reg_names[HARD_FRAME_POINTER_REGNUM]
2682 = reg_names[current_frame_info.reg_fp];
2683 reg_names[current_frame_info.reg_fp] = tmp;
2685 if (! TARGET_REG_NAMES)
2687 for (i = 0; i < current_frame_info.n_input_regs; i++)
2688 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2689 for (i = 0; i < current_frame_info.n_local_regs; i++)
2690 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2691 for (i = 0; i < current_frame_info.n_output_regs; i++)
2692 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2695 current_frame_info.initialized = 0;
2699 ia64_dbx_register_number (regno)
2700 int regno;
2702 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2703 from its home at loc79 to something inside the register frame. We
2704 must perform the same renumbering here for the debug info. */
2705 if (current_frame_info.reg_fp)
2707 if (regno == HARD_FRAME_POINTER_REGNUM)
2708 regno = current_frame_info.reg_fp;
2709 else if (regno == current_frame_info.reg_fp)
2710 regno = HARD_FRAME_POINTER_REGNUM;
2713 if (IN_REGNO_P (regno))
2714 return 32 + regno - IN_REG (0);
2715 else if (LOC_REGNO_P (regno))
2716 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2717 else if (OUT_REGNO_P (regno))
2718 return (32 + current_frame_info.n_input_regs
2719 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2720 else
2721 return regno;
2724 void
2725 ia64_initialize_trampoline (addr, fnaddr, static_chain)
2726 rtx addr, fnaddr, static_chain;
2728 rtx addr_reg, eight = GEN_INT (8);
2730 /* Load up our iterator. */
2731 addr_reg = gen_reg_rtx (Pmode);
2732 emit_move_insn (addr_reg, addr);
2734 /* The first two words are the fake descriptor:
2735 __ia64_trampoline, ADDR+16. */
2736 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2737 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2738 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2740 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2741 copy_to_reg (plus_constant (addr, 16)));
2742 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2744 /* The third word is the target descriptor. */
2745 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2746 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2748 /* The fourth word is the static chain. */
2749 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2752 /* Do any needed setup for a variadic function. CUM has not been updated
2753 for the last named argument which has type TYPE and mode MODE.
2755 We generate the actual spill instructions during prologue generation. */
2757 void
2758 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
2759 CUMULATIVE_ARGS cum;
2760 int int_mode;
2761 tree type;
2762 int * pretend_size;
2763 int second_time ATTRIBUTE_UNUSED;
2765 /* If this is a stdarg function, then skip the current argument. */
2766 if (! current_function_varargs)
2767 ia64_function_arg_advance (&cum, int_mode, type, 1);
2769 if (cum.words < MAX_ARGUMENT_SLOTS)
2771 int n = MAX_ARGUMENT_SLOTS - cum.words;
2772 *pretend_size = n * UNITS_PER_WORD;
2773 cfun->machine->n_varargs = n;
2777 /* Check whether TYPE is a homogeneous floating point aggregate. If
2778 it is, return the mode of the floating point type that appears
2779 in all leafs. If it is not, return VOIDmode.
2781 An aggregate is a homogeneous floating point aggregate is if all
2782 fields/elements in it have the same floating point type (e.g,
2783 SFmode). 128-bit quad-precision floats are excluded. */
2785 static enum machine_mode
2786 hfa_element_mode (type, nested)
2787 tree type;
2788 int nested;
2790 enum machine_mode element_mode = VOIDmode;
2791 enum machine_mode mode;
2792 enum tree_code code = TREE_CODE (type);
2793 int know_element_mode = 0;
2794 tree t;
2796 switch (code)
2798 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2799 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2800 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2801 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2802 case FUNCTION_TYPE:
2803 return VOIDmode;
2805 /* Fortran complex types are supposed to be HFAs, so we need to handle
2806 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2807 types though. */
2808 case COMPLEX_TYPE:
2809 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
2810 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
2811 * BITS_PER_UNIT, MODE_FLOAT, 0);
2812 else
2813 return VOIDmode;
2815 case REAL_TYPE:
2816 /* ??? Should exclude 128-bit long double here. */
2817 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2818 mode if this is contained within an aggregate. */
2819 if (nested)
2820 return TYPE_MODE (type);
2821 else
2822 return VOIDmode;
2824 case ARRAY_TYPE:
2825 return TYPE_MODE (TREE_TYPE (type));
2827 case RECORD_TYPE:
2828 case UNION_TYPE:
2829 case QUAL_UNION_TYPE:
2830 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
2832 if (TREE_CODE (t) != FIELD_DECL)
2833 continue;
2835 mode = hfa_element_mode (TREE_TYPE (t), 1);
2836 if (know_element_mode)
2838 if (mode != element_mode)
2839 return VOIDmode;
2841 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
2842 return VOIDmode;
2843 else
2845 know_element_mode = 1;
2846 element_mode = mode;
2849 return element_mode;
2851 default:
2852 /* If we reach here, we probably have some front-end specific type
2853 that the backend doesn't know about. This can happen via the
2854 aggregate_value_p call in init_function_start. All we can do is
2855 ignore unknown tree types. */
2856 return VOIDmode;
2859 return VOIDmode;
2862 /* Return rtx for register where argument is passed, or zero if it is passed
2863 on the stack. */
2865 /* ??? 128-bit quad-precision floats are always passed in general
2866 registers. */
2869 ia64_function_arg (cum, mode, type, named, incoming)
2870 CUMULATIVE_ARGS *cum;
2871 enum machine_mode mode;
2872 tree type;
2873 int named;
2874 int incoming;
2876 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
2877 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2878 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2879 / UNITS_PER_WORD);
2880 int offset = 0;
2881 enum machine_mode hfa_mode = VOIDmode;
2883 /* Integer and float arguments larger than 8 bytes start at the next even
2884 boundary. Aggregates larger than 8 bytes start at the next even boundary
2885 if the aggregate has 16 byte alignment. Net effect is that types with
2886 alignment greater than 8 start at the next even boundary. */
2887 /* ??? The ABI does not specify how to handle aggregates with alignment from
2888 9 to 15 bytes, or greater than 16. We handle them all as if they had
2889 16 byte alignment. Such aggregates can occur only if gcc extensions are
2890 used. */
2891 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2892 : (words > 1))
2893 && (cum->words & 1))
2894 offset = 1;
2896 /* If all argument slots are used, then it must go on the stack. */
2897 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2898 return 0;
2900 /* Check for and handle homogeneous FP aggregates. */
2901 if (type)
2902 hfa_mode = hfa_element_mode (type, 0);
2904 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2905 and unprototyped hfas are passed specially. */
2906 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2908 rtx loc[16];
2909 int i = 0;
2910 int fp_regs = cum->fp_regs;
2911 int int_regs = cum->words + offset;
2912 int hfa_size = GET_MODE_SIZE (hfa_mode);
2913 int byte_size;
2914 int args_byte_size;
2916 /* If prototyped, pass it in FR regs then GR regs.
2917 If not prototyped, pass it in both FR and GR regs.
2919 If this is an SFmode aggregate, then it is possible to run out of
2920 FR regs while GR regs are still left. In that case, we pass the
2921 remaining part in the GR regs. */
2923 /* Fill the FP regs. We do this always. We stop if we reach the end
2924 of the argument, the last FP register, or the last argument slot. */
2926 byte_size = ((mode == BLKmode)
2927 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2928 args_byte_size = int_regs * UNITS_PER_WORD;
2929 offset = 0;
2930 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2931 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
2933 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2934 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
2935 + fp_regs)),
2936 GEN_INT (offset));
2937 offset += hfa_size;
2938 args_byte_size += hfa_size;
2939 fp_regs++;
2942 /* If no prototype, then the whole thing must go in GR regs. */
2943 if (! cum->prototype)
2944 offset = 0;
2945 /* If this is an SFmode aggregate, then we might have some left over
2946 that needs to go in GR regs. */
2947 else if (byte_size != offset)
2948 int_regs += offset / UNITS_PER_WORD;
2950 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
2952 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
2954 enum machine_mode gr_mode = DImode;
2956 /* If we have an odd 4 byte hunk because we ran out of FR regs,
2957 then this goes in a GR reg left adjusted/little endian, right
2958 adjusted/big endian. */
2959 /* ??? Currently this is handled wrong, because 4-byte hunks are
2960 always right adjusted/little endian. */
2961 if (offset & 0x4)
2962 gr_mode = SImode;
2963 /* If we have an even 4 byte hunk because the aggregate is a
2964 multiple of 4 bytes in size, then this goes in a GR reg right
2965 adjusted/little endian. */
2966 else if (byte_size - offset == 4)
2967 gr_mode = SImode;
2968 /* Complex floats need to have float mode. */
2969 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
2970 gr_mode = hfa_mode;
2972 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2973 gen_rtx_REG (gr_mode, (basereg
2974 + int_regs)),
2975 GEN_INT (offset));
2976 offset += GET_MODE_SIZE (gr_mode);
2977 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
2978 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
2981 /* If we ended up using just one location, just return that one loc. */
2982 if (i == 1)
2983 return XEXP (loc[0], 0);
2984 else
2985 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
2988 /* Integral and aggregates go in general registers. If we have run out of
2989 FR registers, then FP values must also go in general registers. This can
2990 happen when we have a SFmode HFA. */
2991 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
2992 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
2993 return gen_rtx_REG (mode, basereg + cum->words + offset);
2995 /* If there is a prototype, then FP values go in a FR register when
2996 named, and in a GR registeer when unnamed. */
2997 else if (cum->prototype)
2999 if (! named)
3000 return gen_rtx_REG (mode, basereg + cum->words + offset);
3001 else
3002 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3004 /* If there is no prototype, then FP values go in both FR and GR
3005 registers. */
3006 else
3008 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3009 gen_rtx_REG (mode, (FR_ARG_FIRST
3010 + cum->fp_regs)),
3011 const0_rtx);
3012 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3013 gen_rtx_REG (mode,
3014 (basereg + cum->words
3015 + offset)),
3016 const0_rtx);
3018 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3022 /* Return number of words, at the beginning of the argument, that must be
3023 put in registers. 0 is the argument is entirely in registers or entirely
3024 in memory. */
3027 ia64_function_arg_partial_nregs (cum, mode, type, named)
3028 CUMULATIVE_ARGS *cum;
3029 enum machine_mode mode;
3030 tree type;
3031 int named ATTRIBUTE_UNUSED;
3033 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3034 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3035 / UNITS_PER_WORD);
3036 int offset = 0;
3038 /* Arguments with alignment larger than 8 bytes start at the next even
3039 boundary. */
3040 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3041 : (words > 1))
3042 && (cum->words & 1))
3043 offset = 1;
3045 /* If all argument slots are used, then it must go on the stack. */
3046 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3047 return 0;
3049 /* It doesn't matter whether the argument goes in FR or GR regs. If
3050 it fits within the 8 argument slots, then it goes entirely in
3051 registers. If it extends past the last argument slot, then the rest
3052 goes on the stack. */
3054 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3055 return 0;
3057 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3060 /* Update CUM to point after this argument. This is patterned after
3061 ia64_function_arg. */
3063 void
3064 ia64_function_arg_advance (cum, mode, type, named)
3065 CUMULATIVE_ARGS *cum;
3066 enum machine_mode mode;
3067 tree type;
3068 int named;
3070 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3071 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3072 / UNITS_PER_WORD);
3073 int offset = 0;
3074 enum machine_mode hfa_mode = VOIDmode;
3076 /* If all arg slots are already full, then there is nothing to do. */
3077 if (cum->words >= MAX_ARGUMENT_SLOTS)
3078 return;
3080 /* Arguments with alignment larger than 8 bytes start at the next even
3081 boundary. */
3082 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3083 : (words > 1))
3084 && (cum->words & 1))
3085 offset = 1;
3087 cum->words += words + offset;
3089 /* Check for and handle homogeneous FP aggregates. */
3090 if (type)
3091 hfa_mode = hfa_element_mode (type, 0);
3093 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3094 and unprototyped hfas are passed specially. */
3095 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3097 int fp_regs = cum->fp_regs;
3098 /* This is the original value of cum->words + offset. */
3099 int int_regs = cum->words - words;
3100 int hfa_size = GET_MODE_SIZE (hfa_mode);
3101 int byte_size;
3102 int args_byte_size;
3104 /* If prototyped, pass it in FR regs then GR regs.
3105 If not prototyped, pass it in both FR and GR regs.
3107 If this is an SFmode aggregate, then it is possible to run out of
3108 FR regs while GR regs are still left. In that case, we pass the
3109 remaining part in the GR regs. */
3111 /* Fill the FP regs. We do this always. We stop if we reach the end
3112 of the argument, the last FP register, or the last argument slot. */
3114 byte_size = ((mode == BLKmode)
3115 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3116 args_byte_size = int_regs * UNITS_PER_WORD;
3117 offset = 0;
3118 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3119 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3121 offset += hfa_size;
3122 args_byte_size += hfa_size;
3123 fp_regs++;
3126 cum->fp_regs = fp_regs;
3129 /* Integral and aggregates go in general registers. If we have run out of
3130 FR registers, then FP values must also go in general registers. This can
3131 happen when we have a SFmode HFA. */
3132 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3133 return;
3135 /* If there is a prototype, then FP values go in a FR register when
3136 named, and in a GR registeer when unnamed. */
3137 else if (cum->prototype)
3139 if (! named)
3140 return;
3141 else
3142 /* ??? Complex types should not reach here. */
3143 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3145 /* If there is no prototype, then FP values go in both FR and GR
3146 registers. */
3147 else
3148 /* ??? Complex types should not reach here. */
3149 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3151 return;
3154 /* Implement va_start. */
3156 void
3157 ia64_va_start (stdarg_p, valist, nextarg)
3158 int stdarg_p;
3159 tree valist;
3160 rtx nextarg;
3162 int arg_words;
3163 int ofs;
3165 arg_words = current_function_args_info.words;
3167 if (stdarg_p)
3168 ofs = 0;
3169 else
3170 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
3172 nextarg = plus_constant (nextarg, ofs);
3173 std_expand_builtin_va_start (1, valist, nextarg);
3176 /* Implement va_arg. */
3179 ia64_va_arg (valist, type)
3180 tree valist, type;
3182 tree t;
3184 /* Arguments with alignment larger than 8 bytes start at the next even
3185 boundary. */
3186 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3188 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3189 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3190 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3191 build_int_2 (-2 * UNITS_PER_WORD, -1));
3192 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3193 TREE_SIDE_EFFECTS (t) = 1;
3194 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3197 return std_expand_builtin_va_arg (valist, type);
3200 /* Return 1 if function return value returned in memory. Return 0 if it is
3201 in a register. */
3204 ia64_return_in_memory (valtype)
3205 tree valtype;
3207 enum machine_mode mode;
3208 enum machine_mode hfa_mode;
3209 HOST_WIDE_INT byte_size;
3211 mode = TYPE_MODE (valtype);
3212 byte_size = GET_MODE_SIZE (mode);
3213 if (mode == BLKmode)
3215 byte_size = int_size_in_bytes (valtype);
3216 if (byte_size < 0)
3217 return 1;
3220 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3222 hfa_mode = hfa_element_mode (valtype, 0);
3223 if (hfa_mode != VOIDmode)
3225 int hfa_size = GET_MODE_SIZE (hfa_mode);
3227 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3228 return 1;
3229 else
3230 return 0;
3232 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3233 return 1;
3234 else
3235 return 0;
3238 /* Return rtx for register that holds the function return value. */
3241 ia64_function_value (valtype, func)
3242 tree valtype;
3243 tree func ATTRIBUTE_UNUSED;
3245 enum machine_mode mode;
3246 enum machine_mode hfa_mode;
3248 mode = TYPE_MODE (valtype);
3249 hfa_mode = hfa_element_mode (valtype, 0);
3251 if (hfa_mode != VOIDmode)
3253 rtx loc[8];
3254 int i;
3255 int hfa_size;
3256 int byte_size;
3257 int offset;
3259 hfa_size = GET_MODE_SIZE (hfa_mode);
3260 byte_size = ((mode == BLKmode)
3261 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3262 offset = 0;
3263 for (i = 0; offset < byte_size; i++)
3265 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3266 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3267 GEN_INT (offset));
3268 offset += hfa_size;
3271 if (i == 1)
3272 return XEXP (loc[0], 0);
3273 else
3274 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3276 else if (FLOAT_TYPE_P (valtype) &&
3277 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
3278 return gen_rtx_REG (mode, FR_ARG_FIRST);
3279 else
3280 return gen_rtx_REG (mode, GR_RET_FIRST);
3283 /* Print a memory address as an operand to reference that memory location. */
3285 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3286 also call this from ia64_print_operand for memory addresses. */
3288 void
3289 ia64_print_operand_address (stream, address)
3290 FILE * stream ATTRIBUTE_UNUSED;
3291 rtx address ATTRIBUTE_UNUSED;
3295 /* Print an operand to an assembler instruction.
3296 C Swap and print a comparison operator.
3297 D Print an FP comparison operator.
3298 E Print 32 - constant, for SImode shifts as extract.
3299 e Print 64 - constant, for DImode rotates.
3300 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3301 a floating point register emitted normally.
3302 I Invert a predicate register by adding 1.
3303 J Select the proper predicate register for a condition.
3304 j Select the inverse predicate register for a condition.
3305 O Append .acq for volatile load.
3306 P Postincrement of a MEM.
3307 Q Append .rel for volatile store.
3308 S Shift amount for shladd instruction.
3309 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3310 for Intel assembler.
3311 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3312 for Intel assembler.
3313 r Print register name, or constant 0 as r0. HP compatibility for
3314 Linux kernel. */
3315 void
3316 ia64_print_operand (file, x, code)
3317 FILE * file;
3318 rtx x;
3319 int code;
3321 const char *str;
3323 switch (code)
3325 case 0:
3326 /* Handled below. */
3327 break;
3329 case 'C':
3331 enum rtx_code c = swap_condition (GET_CODE (x));
3332 fputs (GET_RTX_NAME (c), file);
3333 return;
3336 case 'D':
3337 switch (GET_CODE (x))
3339 case NE:
3340 str = "neq";
3341 break;
3342 case UNORDERED:
3343 str = "unord";
3344 break;
3345 case ORDERED:
3346 str = "ord";
3347 break;
3348 default:
3349 str = GET_RTX_NAME (GET_CODE (x));
3350 break;
3352 fputs (str, file);
3353 return;
3355 case 'E':
3356 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3357 return;
3359 case 'e':
3360 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3361 return;
3363 case 'F':
3364 if (x == CONST0_RTX (GET_MODE (x)))
3365 str = reg_names [FR_REG (0)];
3366 else if (x == CONST1_RTX (GET_MODE (x)))
3367 str = reg_names [FR_REG (1)];
3368 else if (GET_CODE (x) == REG)
3369 str = reg_names [REGNO (x)];
3370 else
3371 abort ();
3372 fputs (str, file);
3373 return;
3375 case 'I':
3376 fputs (reg_names [REGNO (x) + 1], file);
3377 return;
3379 case 'J':
3380 case 'j':
3382 unsigned int regno = REGNO (XEXP (x, 0));
3383 if (GET_CODE (x) == EQ)
3384 regno += 1;
3385 if (code == 'j')
3386 regno ^= 1;
3387 fputs (reg_names [regno], file);
3389 return;
3391 case 'O':
3392 if (MEM_VOLATILE_P (x))
3393 fputs(".acq", file);
3394 return;
3396 case 'P':
3398 HOST_WIDE_INT value;
3400 switch (GET_CODE (XEXP (x, 0)))
3402 default:
3403 return;
3405 case POST_MODIFY:
3406 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3407 if (GET_CODE (x) == CONST_INT)
3408 value = INTVAL (x);
3409 else if (GET_CODE (x) == REG)
3411 fprintf (file, ", %s", reg_names[REGNO (x)]);
3412 return;
3414 else
3415 abort ();
3416 break;
3418 case POST_INC:
3419 value = GET_MODE_SIZE (GET_MODE (x));
3420 break;
3422 case POST_DEC:
3423 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3424 break;
3427 putc (',', file);
3428 putc (' ', file);
3429 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
3430 return;
3433 case 'Q':
3434 if (MEM_VOLATILE_P (x))
3435 fputs(".rel", file);
3436 return;
3438 case 'S':
3439 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3440 return;
3442 case 'T':
3443 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3445 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3446 return;
3448 break;
3450 case 'U':
3451 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3453 const char *prefix = "0x";
3454 if (INTVAL (x) & 0x80000000)
3456 fprintf (file, "0xffffffff");
3457 prefix = "";
3459 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3460 return;
3462 break;
3464 case 'r':
3465 /* If this operand is the constant zero, write it as register zero.
3466 Any register, zero, or CONST_INT value is OK here. */
3467 if (GET_CODE (x) == REG)
3468 fputs (reg_names[REGNO (x)], file);
3469 else if (x == CONST0_RTX (GET_MODE (x)))
3470 fputs ("r0", file);
3471 else if (GET_CODE (x) == CONST_INT)
3472 output_addr_const (file, x);
3473 else
3474 output_operand_lossage ("invalid %%r value");
3475 return;
3477 case '+':
3479 const char *which;
3481 /* For conditional branches, returns or calls, substitute
3482 sptk, dptk, dpnt, or spnt for %s. */
3483 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3484 if (x)
3486 int pred_val = INTVAL (XEXP (x, 0));
3488 /* Guess top and bottom 10% statically predicted. */
3489 if (pred_val < REG_BR_PROB_BASE / 50)
3490 which = ".spnt";
3491 else if (pred_val < REG_BR_PROB_BASE / 2)
3492 which = ".dpnt";
3493 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3494 which = ".dptk";
3495 else
3496 which = ".sptk";
3498 else if (GET_CODE (current_output_insn) == CALL_INSN)
3499 which = ".sptk";
3500 else
3501 which = ".dptk";
3503 fputs (which, file);
3504 return;
3507 case ',':
3508 x = current_insn_predicate;
3509 if (x)
3511 unsigned int regno = REGNO (XEXP (x, 0));
3512 if (GET_CODE (x) == EQ)
3513 regno += 1;
3514 fprintf (file, "(%s) ", reg_names [regno]);
3516 return;
3518 default:
3519 output_operand_lossage ("ia64_print_operand: unknown code");
3520 return;
3523 switch (GET_CODE (x))
3525 /* This happens for the spill/restore instructions. */
3526 case POST_INC:
3527 case POST_DEC:
3528 case POST_MODIFY:
3529 x = XEXP (x, 0);
3530 /* ... fall through ... */
3532 case REG:
3533 fputs (reg_names [REGNO (x)], file);
3534 break;
3536 case MEM:
3538 rtx addr = XEXP (x, 0);
3539 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
3540 addr = XEXP (addr, 0);
3541 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3542 break;
3545 default:
3546 output_addr_const (file, x);
3547 break;
3550 return;
3553 /* Calulate the cost of moving data from a register in class FROM to
3554 one in class TO, using MODE. */
3557 ia64_register_move_cost (mode, from, to)
3558 enum machine_mode mode;
3559 enum reg_class from, to;
3561 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
3562 if (to == ADDL_REGS)
3563 to = GR_REGS;
3564 if (from == ADDL_REGS)
3565 from = GR_REGS;
3567 /* All costs are symmetric, so reduce cases by putting the
3568 lower number class as the destination. */
3569 if (from < to)
3571 enum reg_class tmp = to;
3572 to = from, from = tmp;
3575 /* Moving from FR<->GR in TFmode must be more expensive than 2,
3576 so that we get secondary memory reloads. Between FR_REGS,
3577 we have to make this at least as expensive as MEMORY_MOVE_COST
3578 to avoid spectacularly poor register class preferencing. */
3579 if (mode == TFmode)
3581 if (to != GR_REGS || from != GR_REGS)
3582 return MEMORY_MOVE_COST (mode, to, 0);
3583 else
3584 return 3;
3587 switch (to)
3589 case PR_REGS:
3590 /* Moving between PR registers takes two insns. */
3591 if (from == PR_REGS)
3592 return 3;
3593 /* Moving between PR and anything but GR is impossible. */
3594 if (from != GR_REGS)
3595 return MEMORY_MOVE_COST (mode, to, 0);
3596 break;
3598 case BR_REGS:
3599 /* Moving between BR and anything but GR is impossible. */
3600 if (from != GR_REGS && from != GR_AND_BR_REGS)
3601 return MEMORY_MOVE_COST (mode, to, 0);
3602 break;
3604 case AR_I_REGS:
3605 case AR_M_REGS:
3606 /* Moving between AR and anything but GR is impossible. */
3607 if (from != GR_REGS)
3608 return MEMORY_MOVE_COST (mode, to, 0);
3609 break;
3611 case GR_REGS:
3612 case FR_REGS:
3613 case GR_AND_FR_REGS:
3614 case GR_AND_BR_REGS:
3615 case ALL_REGS:
3616 break;
3618 default:
3619 abort ();
3622 return 2;
3625 /* This function returns the register class required for a secondary
3626 register when copying between one of the registers in CLASS, and X,
3627 using MODE. A return value of NO_REGS means that no secondary register
3628 is required. */
3630 enum reg_class
3631 ia64_secondary_reload_class (class, mode, x)
3632 enum reg_class class;
3633 enum machine_mode mode ATTRIBUTE_UNUSED;
3634 rtx x;
3636 int regno = -1;
3638 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3639 regno = true_regnum (x);
3641 switch (class)
3643 case BR_REGS:
3644 case AR_M_REGS:
3645 case AR_I_REGS:
3646 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
3647 interaction. We end up with two pseudos with overlapping lifetimes
3648 both of which are equiv to the same constant, and both which need
3649 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
3650 changes depending on the path length, which means the qty_first_reg
3651 check in make_regs_eqv can give different answers at different times.
3652 At some point I'll probably need a reload_indi pattern to handle
3653 this.
3655 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
3656 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
3657 non-general registers for good measure. */
3658 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
3659 return GR_REGS;
3661 /* This is needed if a pseudo used as a call_operand gets spilled to a
3662 stack slot. */
3663 if (GET_CODE (x) == MEM)
3664 return GR_REGS;
3665 break;
3667 case FR_REGS:
3668 /* Need to go through general regsters to get to other class regs. */
3669 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
3670 return GR_REGS;
3672 /* This can happen when a paradoxical subreg is an operand to the
3673 muldi3 pattern. */
3674 /* ??? This shouldn't be necessary after instruction scheduling is
3675 enabled, because paradoxical subregs are not accepted by
3676 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3677 stop the paradoxical subreg stupidity in the *_operand functions
3678 in recog.c. */
3679 if (GET_CODE (x) == MEM
3680 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3681 || GET_MODE (x) == QImode))
3682 return GR_REGS;
3684 /* This can happen because of the ior/and/etc patterns that accept FP
3685 registers as operands. If the third operand is a constant, then it
3686 needs to be reloaded into a FP register. */
3687 if (GET_CODE (x) == CONST_INT)
3688 return GR_REGS;
3690 /* This can happen because of register elimination in a muldi3 insn.
3691 E.g. `26107 * (unsigned long)&u'. */
3692 if (GET_CODE (x) == PLUS)
3693 return GR_REGS;
3694 break;
3696 case PR_REGS:
3697 /* ??? This happens if we cse/gcse a BImode value across a call,
3698 and the function has a nonlocal goto. This is because global
3699 does not allocate call crossing pseudos to hard registers when
3700 current_function_has_nonlocal_goto is true. This is relatively
3701 common for C++ programs that use exceptions. To reproduce,
3702 return NO_REGS and compile libstdc++. */
3703 if (GET_CODE (x) == MEM)
3704 return GR_REGS;
3706 /* This can happen when we take a BImode subreg of a DImode value,
3707 and that DImode value winds up in some non-GR register. */
3708 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
3709 return GR_REGS;
3710 break;
3712 case GR_REGS:
3713 /* Since we have no offsettable memory addresses, we need a temporary
3714 to hold the address of the second word. */
3715 if (mode == TImode)
3716 return GR_REGS;
3717 break;
3719 default:
3720 break;
3723 return NO_REGS;
3727 /* Emit text to declare externally defined variables and functions, because
3728 the Intel assembler does not support undefined externals. */
3730 void
3731 ia64_asm_output_external (file, decl, name)
3732 FILE *file;
3733 tree decl;
3734 const char *name;
3736 int save_referenced;
3738 /* GNU as does not need anything here. */
3739 if (TARGET_GNU_AS)
3740 return;
3742 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3743 the linker when we do this, so we need to be careful not to do this for
3744 builtin functions which have no library equivalent. Unfortunately, we
3745 can't tell here whether or not a function will actually be called by
3746 expand_expr, so we pull in library functions even if we may not need
3747 them later. */
3748 if (! strcmp (name, "__builtin_next_arg")
3749 || ! strcmp (name, "alloca")
3750 || ! strcmp (name, "__builtin_constant_p")
3751 || ! strcmp (name, "__builtin_args_info"))
3752 return;
3754 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3755 restore it. */
3756 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
3757 if (TREE_CODE (decl) == FUNCTION_DECL)
3759 fprintf (file, "%s", TYPE_ASM_OP);
3760 assemble_name (file, name);
3761 putc (',', file);
3762 fprintf (file, TYPE_OPERAND_FMT, "function");
3763 putc ('\n', file);
3765 ASM_GLOBALIZE_LABEL (file, name);
3766 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
3769 /* Parse the -mfixed-range= option string. */
3771 static void
3772 fix_range (const_str)
3773 const char *const_str;
3775 int i, first, last;
3776 char *str, *dash, *comma;
3778 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3779 REG2 are either register names or register numbers. The effect
3780 of this option is to mark the registers in the range from REG1 to
3781 REG2 as ``fixed'' so they won't be used by the compiler. This is
3782 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3784 i = strlen (const_str);
3785 str = (char *) alloca (i + 1);
3786 memcpy (str, const_str, i + 1);
3788 while (1)
3790 dash = strchr (str, '-');
3791 if (!dash)
3793 warning ("value of -mfixed-range must have form REG1-REG2");
3794 return;
3796 *dash = '\0';
3798 comma = strchr (dash + 1, ',');
3799 if (comma)
3800 *comma = '\0';
3802 first = decode_reg_name (str);
3803 if (first < 0)
3805 warning ("unknown register name: %s", str);
3806 return;
3809 last = decode_reg_name (dash + 1);
3810 if (last < 0)
3812 warning ("unknown register name: %s", dash + 1);
3813 return;
3816 *dash = '-';
3818 if (first > last)
3820 warning ("%s-%s is an empty range", str, dash + 1);
3821 return;
3824 for (i = first; i <= last; ++i)
3825 fixed_regs[i] = call_used_regs[i] = 1;
3827 if (!comma)
3828 break;
3830 *comma = ',';
3831 str = comma + 1;
3835 /* Called to register all of our global variables with the garbage
3836 collector. */
3838 static void
3839 ia64_add_gc_roots ()
3841 ggc_add_rtx_root (&ia64_compare_op0, 1);
3842 ggc_add_rtx_root (&ia64_compare_op1, 1);
3845 static void
3846 ia64_init_machine_status (p)
3847 struct function *p;
3849 p->machine =
3850 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
3853 static void
3854 ia64_mark_machine_status (p)
3855 struct function *p;
3857 struct machine_function *machine = p->machine;
3859 if (machine)
3861 ggc_mark_rtx (machine->ia64_eh_epilogue_sp);
3862 ggc_mark_rtx (machine->ia64_eh_epilogue_bsp);
3863 ggc_mark_rtx (machine->ia64_gp_save);
3867 static void
3868 ia64_free_machine_status (p)
3869 struct function *p;
3871 free (p->machine);
3872 p->machine = NULL;
3875 /* Handle TARGET_OPTIONS switches. */
3877 void
3878 ia64_override_options ()
3880 if (TARGET_AUTO_PIC)
3881 target_flags |= MASK_CONST_GP;
3883 if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
3885 warning ("cannot optimize division for both latency and throughput");
3886 target_flags &= ~MASK_INLINE_DIV_THR;
3889 if (ia64_fixed_range_string)
3890 fix_range (ia64_fixed_range_string);
3892 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
3893 flag_schedule_insns_after_reload = 0;
3895 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
3897 init_machine_status = ia64_init_machine_status;
3898 mark_machine_status = ia64_mark_machine_status;
3899 free_machine_status = ia64_free_machine_status;
3901 ia64_add_gc_roots ();
3904 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
3905 static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
3906 static enum attr_type ia64_safe_type PARAMS((rtx));
3908 static enum attr_itanium_requires_unit0
3909 ia64_safe_itanium_requires_unit0 (insn)
3910 rtx insn;
3912 if (recog_memoized (insn) >= 0)
3913 return get_attr_itanium_requires_unit0 (insn);
3914 else
3915 return ITANIUM_REQUIRES_UNIT0_NO;
3918 static enum attr_itanium_class
3919 ia64_safe_itanium_class (insn)
3920 rtx insn;
3922 if (recog_memoized (insn) >= 0)
3923 return get_attr_itanium_class (insn);
3924 else
3925 return ITANIUM_CLASS_UNKNOWN;
3928 static enum attr_type
3929 ia64_safe_type (insn)
3930 rtx insn;
3932 if (recog_memoized (insn) >= 0)
3933 return get_attr_type (insn);
3934 else
3935 return TYPE_UNKNOWN;
3938 /* The following collection of routines emit instruction group stop bits as
3939 necessary to avoid dependencies. */
3941 /* Need to track some additional registers as far as serialization is
3942 concerned so we can properly handle br.call and br.ret. We could
3943 make these registers visible to gcc, but since these registers are
3944 never explicitly used in gcc generated code, it seems wasteful to
3945 do so (plus it would make the call and return patterns needlessly
3946 complex). */
3947 #define REG_GP (GR_REG (1))
3948 #define REG_RP (BR_REG (0))
3949 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
3950 /* This is used for volatile asms which may require a stop bit immediately
3951 before and after them. */
3952 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
3953 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
3954 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
3956 /* For each register, we keep track of how it has been written in the
3957 current instruction group.
3959 If a register is written unconditionally (no qualifying predicate),
3960 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
3962 If a register is written if its qualifying predicate P is true, we
3963 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
3964 may be written again by the complement of P (P^1) and when this happens,
3965 WRITE_COUNT gets set to 2.
3967 The result of this is that whenever an insn attempts to write a register
3968 whose WRITE_COUNT is two, we need to issue a insn group barrier first.
3970 If a predicate register is written by a floating-point insn, we set
3971 WRITTEN_BY_FP to true.
3973 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
3974 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
3976 struct reg_write_state
3978 unsigned int write_count : 2;
3979 unsigned int first_pred : 16;
3980 unsigned int written_by_fp : 1;
3981 unsigned int written_by_and : 1;
3982 unsigned int written_by_or : 1;
3985 /* Cumulative info for the current instruction group. */
3986 struct reg_write_state rws_sum[NUM_REGS];
3987 /* Info for the current instruction. This gets copied to rws_sum after a
3988 stop bit is emitted. */
3989 struct reg_write_state rws_insn[NUM_REGS];
3991 /* Indicates whether this is the first instruction after a stop bit,
3992 in which case we don't need another stop bit. Without this, we hit
3993 the abort in ia64_variable_issue when scheduling an alloc. */
3994 static int first_instruction;
3996 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
3997 RTL for one instruction. */
3998 struct reg_flags
4000 unsigned int is_write : 1; /* Is register being written? */
4001 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4002 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4003 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4004 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4005 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4008 static void rws_update PARAMS ((struct reg_write_state *, int,
4009 struct reg_flags, int));
4010 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
4011 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
4012 static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
4013 static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
4014 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
4015 static void init_insn_group_barriers PARAMS ((void));
4016 static int group_barrier_needed_p PARAMS ((rtx));
4017 static int safe_group_barrier_needed_p PARAMS ((rtx));
4019 /* Update *RWS for REGNO, which is being written by the current instruction,
4020 with predicate PRED, and associated register flags in FLAGS. */
4022 static void
4023 rws_update (rws, regno, flags, pred)
4024 struct reg_write_state *rws;
4025 int regno;
4026 struct reg_flags flags;
4027 int pred;
4029 if (pred)
4030 rws[regno].write_count++;
4031 else
4032 rws[regno].write_count = 2;
4033 rws[regno].written_by_fp |= flags.is_fp;
4034 /* ??? Not tracking and/or across differing predicates. */
4035 rws[regno].written_by_and = flags.is_and;
4036 rws[regno].written_by_or = flags.is_or;
4037 rws[regno].first_pred = pred;
4040 /* Handle an access to register REGNO of type FLAGS using predicate register
4041 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4042 a dependency with an earlier instruction in the same group. */
4044 static int
4045 rws_access_regno (regno, flags, pred)
4046 int regno;
4047 struct reg_flags flags;
4048 int pred;
4050 int need_barrier = 0;
4052 if (regno >= NUM_REGS)
4053 abort ();
4055 if (! PR_REGNO_P (regno))
4056 flags.is_and = flags.is_or = 0;
4058 if (flags.is_write)
4060 int write_count;
4062 /* One insn writes same reg multiple times? */
4063 if (rws_insn[regno].write_count > 0)
4064 abort ();
4066 /* Update info for current instruction. */
4067 rws_update (rws_insn, regno, flags, pred);
4068 write_count = rws_sum[regno].write_count;
4070 switch (write_count)
4072 case 0:
4073 /* The register has not been written yet. */
4074 rws_update (rws_sum, regno, flags, pred);
4075 break;
4077 case 1:
4078 /* The register has been written via a predicate. If this is
4079 not a complementary predicate, then we need a barrier. */
4080 /* ??? This assumes that P and P+1 are always complementary
4081 predicates for P even. */
4082 if (flags.is_and && rws_sum[regno].written_by_and)
4084 else if (flags.is_or && rws_sum[regno].written_by_or)
4086 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4087 need_barrier = 1;
4088 rws_update (rws_sum, regno, flags, pred);
4089 break;
4091 case 2:
4092 /* The register has been unconditionally written already. We
4093 need a barrier. */
4094 if (flags.is_and && rws_sum[regno].written_by_and)
4096 else if (flags.is_or && rws_sum[regno].written_by_or)
4098 else
4099 need_barrier = 1;
4100 rws_sum[regno].written_by_and = flags.is_and;
4101 rws_sum[regno].written_by_or = flags.is_or;
4102 break;
4104 default:
4105 abort ();
4108 else
4110 if (flags.is_branch)
4112 /* Branches have several RAW exceptions that allow to avoid
4113 barriers. */
4115 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4116 /* RAW dependencies on branch regs are permissible as long
4117 as the writer is a non-branch instruction. Since we
4118 never generate code that uses a branch register written
4119 by a branch instruction, handling this case is
4120 easy. */
4121 return 0;
4123 if (REGNO_REG_CLASS (regno) == PR_REGS
4124 && ! rws_sum[regno].written_by_fp)
4125 /* The predicates of a branch are available within the
4126 same insn group as long as the predicate was written by
4127 something other than a floating-point instruction. */
4128 return 0;
4131 if (flags.is_and && rws_sum[regno].written_by_and)
4132 return 0;
4133 if (flags.is_or && rws_sum[regno].written_by_or)
4134 return 0;
4136 switch (rws_sum[regno].write_count)
4138 case 0:
4139 /* The register has not been written yet. */
4140 break;
4142 case 1:
4143 /* The register has been written via a predicate. If this is
4144 not a complementary predicate, then we need a barrier. */
4145 /* ??? This assumes that P and P+1 are always complementary
4146 predicates for P even. */
4147 if ((rws_sum[regno].first_pred ^ 1) != pred)
4148 need_barrier = 1;
4149 break;
4151 case 2:
4152 /* The register has been unconditionally written already. We
4153 need a barrier. */
4154 need_barrier = 1;
4155 break;
4157 default:
4158 abort ();
4162 return need_barrier;
4165 static int
4166 rws_access_reg (reg, flags, pred)
4167 rtx reg;
4168 struct reg_flags flags;
4169 int pred;
4171 int regno = REGNO (reg);
4172 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4174 if (n == 1)
4175 return rws_access_regno (regno, flags, pred);
4176 else
4178 int need_barrier = 0;
4179 while (--n >= 0)
4180 need_barrier |= rws_access_regno (regno + n, flags, pred);
4181 return need_barrier;
4185 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4186 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4188 static void
4189 update_set_flags (x, pflags, ppred, pcond)
4190 rtx x;
4191 struct reg_flags *pflags;
4192 int *ppred;
4193 rtx *pcond;
4195 rtx src = SET_SRC (x);
4197 *pcond = 0;
4199 switch (GET_CODE (src))
4201 case CALL:
4202 return;
4204 case IF_THEN_ELSE:
4205 if (SET_DEST (x) == pc_rtx)
4206 /* X is a conditional branch. */
4207 return;
4208 else
4210 int is_complemented = 0;
4212 /* X is a conditional move. */
4213 rtx cond = XEXP (src, 0);
4214 if (GET_CODE (cond) == EQ)
4215 is_complemented = 1;
4216 cond = XEXP (cond, 0);
4217 if (GET_CODE (cond) != REG
4218 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4219 abort ();
4220 *pcond = cond;
4221 if (XEXP (src, 1) == SET_DEST (x)
4222 || XEXP (src, 2) == SET_DEST (x))
4224 /* X is a conditional move that conditionally writes the
4225 destination. */
4227 /* We need another complement in this case. */
4228 if (XEXP (src, 1) == SET_DEST (x))
4229 is_complemented = ! is_complemented;
4231 *ppred = REGNO (cond);
4232 if (is_complemented)
4233 ++*ppred;
4236 /* ??? If this is a conditional write to the dest, then this
4237 instruction does not actually read one source. This probably
4238 doesn't matter, because that source is also the dest. */
4239 /* ??? Multiple writes to predicate registers are allowed
4240 if they are all AND type compares, or if they are all OR
4241 type compares. We do not generate such instructions
4242 currently. */
4244 /* ... fall through ... */
4246 default:
4247 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4248 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4249 /* Set pflags->is_fp to 1 so that we know we're dealing
4250 with a floating point comparison when processing the
4251 destination of the SET. */
4252 pflags->is_fp = 1;
4254 /* Discover if this is a parallel comparison. We only handle
4255 and.orcm and or.andcm at present, since we must retain a
4256 strict inverse on the predicate pair. */
4257 else if (GET_CODE (src) == AND)
4258 pflags->is_and = 1;
4259 else if (GET_CODE (src) == IOR)
4260 pflags->is_or = 1;
4262 break;
4266 /* Subroutine of rtx_needs_barrier; this function determines whether the
4267 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4268 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4269 for this insn. */
4271 static int
4272 set_src_needs_barrier (x, flags, pred, cond)
4273 rtx x;
4274 struct reg_flags flags;
4275 int pred;
4276 rtx cond;
4278 int need_barrier = 0;
4279 rtx dst;
4280 rtx src = SET_SRC (x);
4282 if (GET_CODE (src) == CALL)
4283 /* We don't need to worry about the result registers that
4284 get written by subroutine call. */
4285 return rtx_needs_barrier (src, flags, pred);
4286 else if (SET_DEST (x) == pc_rtx)
4288 /* X is a conditional branch. */
4289 /* ??? This seems redundant, as the caller sets this bit for
4290 all JUMP_INSNs. */
4291 flags.is_branch = 1;
4292 return rtx_needs_barrier (src, flags, pred);
4295 need_barrier = rtx_needs_barrier (src, flags, pred);
4297 /* This instruction unconditionally uses a predicate register. */
4298 if (cond)
4299 need_barrier |= rws_access_reg (cond, flags, 0);
4301 dst = SET_DEST (x);
4302 if (GET_CODE (dst) == ZERO_EXTRACT)
4304 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4305 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4306 dst = XEXP (dst, 0);
4308 return need_barrier;
4311 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4312 Return 1 is this access creates a dependency with an earlier instruction
4313 in the same group. */
4315 static int
4316 rtx_needs_barrier (x, flags, pred)
4317 rtx x;
4318 struct reg_flags flags;
4319 int pred;
4321 int i, j;
4322 int is_complemented = 0;
4323 int need_barrier = 0;
4324 const char *format_ptr;
4325 struct reg_flags new_flags;
4326 rtx cond = 0;
4328 if (! x)
4329 return 0;
4331 new_flags = flags;
4333 switch (GET_CODE (x))
4335 case SET:
4336 update_set_flags (x, &new_flags, &pred, &cond);
4337 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4338 if (GET_CODE (SET_SRC (x)) != CALL)
4340 new_flags.is_write = 1;
4341 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4343 break;
4345 case CALL:
4346 new_flags.is_write = 0;
4347 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4349 /* Avoid multiple register writes, in case this is a pattern with
4350 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4351 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4353 new_flags.is_write = 1;
4354 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4355 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4356 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4358 break;
4360 case COND_EXEC:
4361 /* X is a predicated instruction. */
4363 cond = COND_EXEC_TEST (x);
4364 if (pred)
4365 abort ();
4366 need_barrier = rtx_needs_barrier (cond, flags, 0);
4368 if (GET_CODE (cond) == EQ)
4369 is_complemented = 1;
4370 cond = XEXP (cond, 0);
4371 if (GET_CODE (cond) != REG
4372 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4373 abort ();
4374 pred = REGNO (cond);
4375 if (is_complemented)
4376 ++pred;
4378 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4379 return need_barrier;
4381 case CLOBBER:
4382 case USE:
4383 /* Clobber & use are for earlier compiler-phases only. */
4384 break;
4386 case ASM_OPERANDS:
4387 case ASM_INPUT:
4388 /* We always emit stop bits for traditional asms. We emit stop bits
4389 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4390 if (GET_CODE (x) != ASM_OPERANDS
4391 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4393 /* Avoid writing the register multiple times if we have multiple
4394 asm outputs. This avoids an abort in rws_access_reg. */
4395 if (! rws_insn[REG_VOLATILE].write_count)
4397 new_flags.is_write = 1;
4398 rws_access_regno (REG_VOLATILE, new_flags, pred);
4400 return 1;
4403 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4404 We can not just fall through here since then we would be confused
4405 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4406 traditional asms unlike their normal usage. */
4408 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4409 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4410 need_barrier = 1;
4411 break;
4413 case PARALLEL:
4414 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4416 rtx pat = XVECEXP (x, 0, i);
4417 if (GET_CODE (pat) == SET)
4419 update_set_flags (pat, &new_flags, &pred, &cond);
4420 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4422 else if (GET_CODE (pat) == USE
4423 || GET_CODE (pat) == CALL
4424 || GET_CODE (pat) == ASM_OPERANDS)
4425 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4426 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4427 abort ();
4429 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4431 rtx pat = XVECEXP (x, 0, i);
4432 if (GET_CODE (pat) == SET)
4434 if (GET_CODE (SET_SRC (pat)) != CALL)
4436 new_flags.is_write = 1;
4437 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4438 pred);
4441 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
4442 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4444 break;
4446 case SUBREG:
4447 x = SUBREG_REG (x);
4448 /* FALLTHRU */
4449 case REG:
4450 if (REGNO (x) == AR_UNAT_REGNUM)
4452 for (i = 0; i < 64; ++i)
4453 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4455 else
4456 need_barrier = rws_access_reg (x, flags, pred);
4457 break;
4459 case MEM:
4460 /* Find the regs used in memory address computation. */
4461 new_flags.is_write = 0;
4462 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4463 break;
4465 case CONST_INT: case CONST_DOUBLE:
4466 case SYMBOL_REF: case LABEL_REF: case CONST:
4467 break;
4469 /* Operators with side-effects. */
4470 case POST_INC: case POST_DEC:
4471 if (GET_CODE (XEXP (x, 0)) != REG)
4472 abort ();
4474 new_flags.is_write = 0;
4475 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4476 new_flags.is_write = 1;
4477 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4478 break;
4480 case POST_MODIFY:
4481 if (GET_CODE (XEXP (x, 0)) != REG)
4482 abort ();
4484 new_flags.is_write = 0;
4485 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4486 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4487 new_flags.is_write = 1;
4488 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4489 break;
4491 /* Handle common unary and binary ops for efficiency. */
4492 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4493 case MOD: case UDIV: case UMOD: case AND: case IOR:
4494 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4495 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4496 case NE: case EQ: case GE: case GT: case LE:
4497 case LT: case GEU: case GTU: case LEU: case LTU:
4498 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4499 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4500 break;
4502 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4503 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4504 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4505 case SQRT: case FFS:
4506 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4507 break;
4509 case UNSPEC:
4510 switch (XINT (x, 1))
4512 case 1: /* st8.spill */
4513 case 2: /* ld8.fill */
4515 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4516 HOST_WIDE_INT bit = (offset >> 3) & 63;
4518 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4519 new_flags.is_write = (XINT (x, 1) == 1);
4520 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4521 new_flags, pred);
4522 break;
4525 case 3: /* stf.spill */
4526 case 4: /* ldf.spill */
4527 case 8: /* popcnt */
4528 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4529 break;
4531 case 7: /* pred_rel_mutex */
4532 case 9: /* pic call */
4533 case 12: /* mf */
4534 case 19: /* fetchadd_acq */
4535 case 20: /* mov = ar.bsp */
4536 case 21: /* flushrs */
4537 case 22: /* bundle selector */
4538 case 23: /* cycle display */
4539 break;
4541 case 24: /* addp4 */
4542 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4543 break;
4545 case 5: /* recip_approx */
4546 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4547 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4548 break;
4550 case 13: /* cmpxchg_acq */
4551 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4552 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4553 break;
4555 default:
4556 abort ();
4558 break;
4560 case UNSPEC_VOLATILE:
4561 switch (XINT (x, 1))
4563 case 0: /* alloc */
4564 /* Alloc must always be the first instruction of a group.
4565 We force this by always returning true. */
4566 /* ??? We might get better scheduling if we explicitly check for
4567 input/local/output register dependencies, and modify the
4568 scheduler so that alloc is always reordered to the start of
4569 the current group. We could then eliminate all of the
4570 first_instruction code. */
4571 rws_access_regno (AR_PFS_REGNUM, flags, pred);
4573 new_flags.is_write = 1;
4574 rws_access_regno (REG_AR_CFM, new_flags, pred);
4575 return 1;
4577 case 1: /* blockage */
4578 case 2: /* insn group barrier */
4579 return 0;
4581 case 5: /* set_bsp */
4582 need_barrier = 1;
4583 break;
4585 case 7: /* pred.rel.mutex */
4586 case 8: /* safe_across_calls all */
4587 case 9: /* safe_across_calls normal */
4588 return 0;
4590 default:
4591 abort ();
4593 break;
4595 case RETURN:
4596 new_flags.is_write = 0;
4597 need_barrier = rws_access_regno (REG_RP, flags, pred);
4598 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
4600 new_flags.is_write = 1;
4601 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4602 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4603 break;
4605 default:
4606 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4607 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4608 switch (format_ptr[i])
4610 case '0': /* unused field */
4611 case 'i': /* integer */
4612 case 'n': /* note */
4613 case 'w': /* wide integer */
4614 case 's': /* pointer to string */
4615 case 'S': /* optional pointer to string */
4616 break;
4618 case 'e':
4619 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4620 need_barrier = 1;
4621 break;
4623 case 'E':
4624 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4625 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4626 need_barrier = 1;
4627 break;
4629 default:
4630 abort ();
4632 break;
4634 return need_barrier;
4637 /* Clear out the state for group_barrier_needed_p at the start of a
4638 sequence of insns. */
4640 static void
4641 init_insn_group_barriers ()
4643 memset (rws_sum, 0, sizeof (rws_sum));
4644 first_instruction = 1;
4647 /* Given the current state, recorded by previous calls to this function,
4648 determine whether a group barrier (a stop bit) is necessary before INSN.
4649 Return nonzero if so. */
4651 static int
4652 group_barrier_needed_p (insn)
4653 rtx insn;
4655 rtx pat;
4656 int need_barrier = 0;
4657 struct reg_flags flags;
4659 memset (&flags, 0, sizeof (flags));
4660 switch (GET_CODE (insn))
4662 case NOTE:
4663 break;
4665 case BARRIER:
4666 /* A barrier doesn't imply an instruction group boundary. */
4667 break;
4669 case CODE_LABEL:
4670 memset (rws_insn, 0, sizeof (rws_insn));
4671 return 1;
4673 case CALL_INSN:
4674 flags.is_branch = 1;
4675 flags.is_sibcall = SIBLING_CALL_P (insn);
4676 memset (rws_insn, 0, sizeof (rws_insn));
4678 /* Don't bundle a call following another call. */
4679 if ((pat = prev_active_insn (insn))
4680 && GET_CODE (pat) == CALL_INSN)
4682 need_barrier = 1;
4683 break;
4686 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
4687 break;
4689 case JUMP_INSN:
4690 flags.is_branch = 1;
4692 /* Don't bundle a jump following a call. */
4693 if ((pat = prev_active_insn (insn))
4694 && GET_CODE (pat) == CALL_INSN)
4696 need_barrier = 1;
4697 break;
4699 /* FALLTHRU */
4701 case INSN:
4702 if (GET_CODE (PATTERN (insn)) == USE
4703 || GET_CODE (PATTERN (insn)) == CLOBBER)
4704 /* Don't care about USE and CLOBBER "insns"---those are used to
4705 indicate to the optimizer that it shouldn't get rid of
4706 certain operations. */
4707 break;
4709 pat = PATTERN (insn);
4711 /* Ug. Hack hacks hacked elsewhere. */
4712 switch (recog_memoized (insn))
4714 /* We play dependency tricks with the epilogue in order
4715 to get proper schedules. Undo this for dv analysis. */
4716 case CODE_FOR_epilogue_deallocate_stack:
4717 pat = XVECEXP (pat, 0, 0);
4718 break;
4720 /* The pattern we use for br.cloop confuses the code above.
4721 The second element of the vector is representative. */
4722 case CODE_FOR_doloop_end_internal:
4723 pat = XVECEXP (pat, 0, 1);
4724 break;
4726 /* Doesn't generate code. */
4727 case CODE_FOR_pred_rel_mutex:
4728 return 0;
4730 default:
4731 break;
4734 memset (rws_insn, 0, sizeof (rws_insn));
4735 need_barrier = rtx_needs_barrier (pat, flags, 0);
4737 /* Check to see if the previous instruction was a volatile
4738 asm. */
4739 if (! need_barrier)
4740 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
4741 break;
4743 default:
4744 abort ();
4747 if (first_instruction)
4749 need_barrier = 0;
4750 first_instruction = 0;
4753 return need_barrier;
4756 /* Like group_barrier_needed_p, but do not clobber the current state. */
4758 static int
4759 safe_group_barrier_needed_p (insn)
4760 rtx insn;
4762 struct reg_write_state rws_saved[NUM_REGS];
4763 int saved_first_instruction;
4764 int t;
4766 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
4767 saved_first_instruction = first_instruction;
4769 t = group_barrier_needed_p (insn);
4771 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
4772 first_instruction = saved_first_instruction;
4774 return t;
4777 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
4778 as necessary to eliminate dependendencies. This function assumes that
4779 a final instruction scheduling pass has been run which has already
4780 inserted most of the necessary stop bits. This function only inserts
4781 new ones at basic block boundaries, since these are invisible to the
4782 scheduler. */
4784 static void
4785 emit_insn_group_barriers (dump, insns)
4786 FILE *dump;
4787 rtx insns;
4789 rtx insn;
4790 rtx last_label = 0;
4791 int insns_since_last_label = 0;
4793 init_insn_group_barriers ();
4795 for (insn = insns; insn; insn = NEXT_INSN (insn))
4797 if (GET_CODE (insn) == CODE_LABEL)
4799 if (insns_since_last_label)
4800 last_label = insn;
4801 insns_since_last_label = 0;
4803 else if (GET_CODE (insn) == NOTE
4804 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4806 if (insns_since_last_label)
4807 last_label = insn;
4808 insns_since_last_label = 0;
4810 else if (GET_CODE (insn) == INSN
4811 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4812 && XINT (PATTERN (insn), 1) == 2)
4814 init_insn_group_barriers ();
4815 last_label = 0;
4817 else if (INSN_P (insn))
4819 insns_since_last_label = 1;
4821 if (group_barrier_needed_p (insn))
4823 if (last_label)
4825 if (dump)
4826 fprintf (dump, "Emitting stop before label %d\n",
4827 INSN_UID (last_label));
4828 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
4829 insn = last_label;
4831 init_insn_group_barriers ();
4832 last_label = 0;
4839 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4840 This function has to emit all necessary group barriers. */
4842 static void
4843 emit_all_insn_group_barriers (dump, insns)
4844 FILE *dump ATTRIBUTE_UNUSED;
4845 rtx insns;
4847 rtx insn;
4849 init_insn_group_barriers ();
4851 for (insn = insns; insn; insn = NEXT_INSN (insn))
4853 if (GET_CODE (insn) == BARRIER)
4855 rtx last = prev_active_insn (insn);
4857 if (! last)
4858 continue;
4859 if (GET_CODE (last) == JUMP_INSN
4860 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
4861 last = prev_active_insn (last);
4862 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
4863 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
4865 init_insn_group_barriers ();
4867 else if (INSN_P (insn))
4869 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
4870 init_insn_group_barriers ();
4871 else if (group_barrier_needed_p (insn))
4873 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4874 init_insn_group_barriers ();
4875 group_barrier_needed_p (insn);
4881 static int errata_find_address_regs PARAMS ((rtx *, void *));
4882 static void errata_emit_nops PARAMS ((rtx));
4883 static void fixup_errata PARAMS ((void));
4885 /* This structure is used to track some details about the previous insns
4886 groups so we can determine if it may be necessary to insert NOPs to
4887 workaround hardware errata. */
4888 static struct group
4890 HARD_REG_SET p_reg_set;
4891 HARD_REG_SET gr_reg_conditionally_set;
4892 } last_group[2];
4894 /* Index into the last_group array. */
4895 static int group_idx;
4897 /* Called through for_each_rtx; determines if a hard register that was
4898 conditionally set in the previous group is used as an address register.
4899 It ensures that for_each_rtx returns 1 in that case. */
4900 static int
4901 errata_find_address_regs (xp, data)
4902 rtx *xp;
4903 void *data ATTRIBUTE_UNUSED;
4905 rtx x = *xp;
4906 if (GET_CODE (x) != MEM)
4907 return 0;
4908 x = XEXP (x, 0);
4909 if (GET_CODE (x) == POST_MODIFY)
4910 x = XEXP (x, 0);
4911 if (GET_CODE (x) == REG)
4913 struct group *prev_group = last_group + (group_idx ^ 1);
4914 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
4915 REGNO (x)))
4916 return 1;
4917 return -1;
4919 return 0;
4922 /* Called for each insn; this function keeps track of the state in
4923 last_group and emits additional NOPs if necessary to work around
4924 an Itanium A/B step erratum. */
4925 static void
4926 errata_emit_nops (insn)
4927 rtx insn;
4929 struct group *this_group = last_group + group_idx;
4930 struct group *prev_group = last_group + (group_idx ^ 1);
4931 rtx pat = PATTERN (insn);
4932 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
4933 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
4934 enum attr_type type;
4935 rtx set = real_pat;
4937 if (GET_CODE (real_pat) == USE
4938 || GET_CODE (real_pat) == CLOBBER
4939 || GET_CODE (real_pat) == ASM_INPUT
4940 || GET_CODE (real_pat) == ADDR_VEC
4941 || GET_CODE (real_pat) == ADDR_DIFF_VEC
4942 || asm_noperands (PATTERN (insn)) >= 0)
4943 return;
4945 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
4946 parts of it. */
4948 if (GET_CODE (set) == PARALLEL)
4950 int i;
4951 set = XVECEXP (real_pat, 0, 0);
4952 for (i = 1; i < XVECLEN (real_pat, 0); i++)
4953 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
4954 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
4956 set = 0;
4957 break;
4961 if (set && GET_CODE (set) != SET)
4962 set = 0;
4964 type = get_attr_type (insn);
4966 if (type == TYPE_F
4967 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
4968 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
4970 if ((type == TYPE_M || type == TYPE_A) && cond && set
4971 && REG_P (SET_DEST (set))
4972 && GET_CODE (SET_SRC (set)) != PLUS
4973 && GET_CODE (SET_SRC (set)) != MINUS
4974 && (GET_CODE (SET_SRC (set)) != ASHIFT
4975 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
4976 && (GET_CODE (SET_SRC (set)) != MEM
4977 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
4978 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
4980 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
4981 || ! REG_P (XEXP (cond, 0)))
4982 abort ();
4984 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
4985 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
4987 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
4989 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4990 emit_insn_before (gen_nop (), insn);
4991 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4992 group_idx = 0;
4993 memset (last_group, 0, sizeof last_group);
4997 /* Emit extra nops if they are required to work around hardware errata. */
4999 static void
5000 fixup_errata ()
5002 rtx insn;
5004 if (! TARGET_B_STEP)
5005 return;
5007 group_idx = 0;
5008 memset (last_group, 0, sizeof last_group);
5010 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5012 if (!INSN_P (insn))
5013 continue;
5015 if (ia64_safe_type (insn) == TYPE_S)
5017 group_idx ^= 1;
5018 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5020 else
5021 errata_emit_nops (insn);
5025 /* Instruction scheduling support. */
5026 /* Describe one bundle. */
5028 struct bundle
5030 /* Zero if there's no possibility of a stop in this bundle other than
5031 at the end, otherwise the position of the optional stop bit. */
5032 int possible_stop;
5033 /* The types of the three slots. */
5034 enum attr_type t[3];
5035 /* The pseudo op to be emitted into the assembler output. */
5036 const char *name;
5039 #define NR_BUNDLES 10
5041 /* A list of all available bundles. */
5043 static const struct bundle bundle[NR_BUNDLES] =
5045 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
5046 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
5047 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
5048 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
5049 #if NR_BUNDLES == 10
5050 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
5051 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
5052 #endif
5053 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
5054 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
5055 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
5056 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
5057 it matches an L type insn. Otherwise we'll try to generate L type
5058 nops. */
5059 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
5062 /* Describe a packet of instructions. Packets consist of two bundles that
5063 are visible to the hardware in one scheduling window. */
5065 struct ia64_packet
5067 const struct bundle *t1, *t2;
5068 /* Precomputed value of the first split issue in this packet if a cycle
5069 starts at its beginning. */
5070 int first_split;
5071 /* For convenience, the insn types are replicated here so we don't have
5072 to go through T1 and T2 all the time. */
5073 enum attr_type t[6];
5076 /* An array containing all possible packets. */
5077 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
5078 static struct ia64_packet packets[NR_PACKETS];
5080 /* Map attr_type to a string with the name. */
5082 static const char *const type_names[] =
5084 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
5087 /* Nonzero if we should insert stop bits into the schedule. */
5088 int ia64_final_schedule = 0;
5090 static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
5091 static rtx ia64_single_set PARAMS ((rtx));
5092 static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
5093 static void ia64_emit_insn_before PARAMS ((rtx, rtx));
5094 static void maybe_rotate PARAMS ((FILE *));
5095 static void finish_last_head PARAMS ((FILE *, int));
5096 static void rotate_one_bundle PARAMS ((FILE *));
5097 static void rotate_two_bundles PARAMS ((FILE *));
5098 static void nop_cycles_until PARAMS ((int, FILE *));
5099 static void cycle_end_fill_slots PARAMS ((FILE *));
5100 static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
5101 static int get_split PARAMS ((const struct ia64_packet *, int));
5102 static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
5103 const struct ia64_packet *, int));
5104 static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
5105 rtx *, enum attr_type *, int));
5106 static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
5107 static void dump_current_packet PARAMS ((FILE *));
5108 static void schedule_stop PARAMS ((FILE *));
5109 static rtx gen_nop_type PARAMS ((enum attr_type));
5110 static void ia64_emit_nops PARAMS ((void));
5112 /* Map a bundle number to its pseudo-op. */
5114 const char *
5115 get_bundle_name (b)
5116 int b;
5118 return bundle[b].name;
5121 /* Compute the slot which will cause a split issue in packet P if the
5122 current cycle begins at slot BEGIN. */
5124 static int
5125 itanium_split_issue (p, begin)
5126 const struct ia64_packet *p;
5127 int begin;
5129 int type_count[TYPE_S];
5130 int i;
5131 int split = 6;
5133 if (begin < 3)
5135 /* Always split before and after MMF. */
5136 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
5137 return 3;
5138 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
5139 return 3;
5140 /* Always split after MBB and BBB. */
5141 if (p->t[1] == TYPE_B)
5142 return 3;
5143 /* Split after first bundle in MIB BBB combination. */
5144 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
5145 return 3;
5148 memset (type_count, 0, sizeof type_count);
5149 for (i = begin; i < split; i++)
5151 enum attr_type t0 = p->t[i];
5152 /* An MLX bundle reserves the same units as an MFI bundle. */
5153 enum attr_type t = (t0 == TYPE_L ? TYPE_F
5154 : t0 == TYPE_X ? TYPE_I
5155 : t0);
5157 /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and
5158 2 integer per cycle. */
5159 int max = (t == TYPE_B ? 3 : 2);
5160 if (type_count[t] == max)
5161 return i;
5163 type_count[t]++;
5165 return split;
5168 /* Return the maximum number of instructions a cpu can issue. */
5170 static int
5171 ia64_issue_rate ()
5173 return 6;
5176 /* Helper function - like single_set, but look inside COND_EXEC. */
5178 static rtx
5179 ia64_single_set (insn)
5180 rtx insn;
5182 rtx x = PATTERN (insn);
5183 if (GET_CODE (x) == COND_EXEC)
5184 x = COND_EXEC_CODE (x);
5185 if (GET_CODE (x) == SET)
5186 return x;
5187 return single_set_2 (insn, x);
5190 /* Adjust the cost of a scheduling dependency. Return the new cost of
5191 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5193 static int
5194 ia64_adjust_cost (insn, link, dep_insn, cost)
5195 rtx insn, link, dep_insn;
5196 int cost;
5198 enum attr_type dep_type;
5199 enum attr_itanium_class dep_class;
5200 enum attr_itanium_class insn_class;
5201 rtx dep_set, set, src, addr;
5203 if (GET_CODE (PATTERN (insn)) == CLOBBER
5204 || GET_CODE (PATTERN (insn)) == USE
5205 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5206 || GET_CODE (PATTERN (dep_insn)) == USE
5207 /* @@@ Not accurate for indirect calls. */
5208 || GET_CODE (insn) == CALL_INSN
5209 || ia64_safe_type (insn) == TYPE_S)
5210 return 0;
5212 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5213 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5214 return 0;
5216 dep_type = ia64_safe_type (dep_insn);
5217 dep_class = ia64_safe_itanium_class (dep_insn);
5218 insn_class = ia64_safe_itanium_class (insn);
5220 /* Compares that feed a conditional branch can execute in the same
5221 cycle. */
5222 dep_set = ia64_single_set (dep_insn);
5223 set = ia64_single_set (insn);
5225 if (dep_type != TYPE_F
5226 && dep_set
5227 && GET_CODE (SET_DEST (dep_set)) == REG
5228 && PR_REG (REGNO (SET_DEST (dep_set)))
5229 && GET_CODE (insn) == JUMP_INSN)
5230 return 0;
5232 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5234 /* ??? Can't find any information in the documenation about whether
5235 a sequence
5236 st [rx] = ra
5237 ld rb = [ry]
5238 splits issue. Assume it doesn't. */
5239 return 0;
5242 src = set ? SET_SRC (set) : 0;
5243 addr = 0;
5244 if (set && GET_CODE (SET_DEST (set)) == MEM)
5245 addr = XEXP (SET_DEST (set), 0);
5246 else if (set && GET_CODE (src) == MEM)
5247 addr = XEXP (src, 0);
5248 else if (set && GET_CODE (src) == ZERO_EXTEND
5249 && GET_CODE (XEXP (src, 0)) == MEM)
5250 addr = XEXP (XEXP (src, 0), 0);
5251 else if (set && GET_CODE (src) == UNSPEC
5252 && XVECLEN (XEXP (src, 0), 0) > 0
5253 && GET_CODE (XVECEXP (src, 0, 0)) == MEM)
5254 addr = XEXP (XVECEXP (src, 0, 0), 0);
5255 if (addr && GET_CODE (addr) == POST_MODIFY)
5256 addr = XEXP (addr, 0);
5258 set = ia64_single_set (dep_insn);
5260 if ((dep_class == ITANIUM_CLASS_IALU
5261 || dep_class == ITANIUM_CLASS_ILOG
5262 || dep_class == ITANIUM_CLASS_LD)
5263 && (insn_class == ITANIUM_CLASS_LD
5264 || insn_class == ITANIUM_CLASS_ST))
5266 if (! addr || ! set)
5267 abort ();
5268 /* This isn't completely correct - an IALU that feeds an address has
5269 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5270 otherwise. Unfortunately there's no good way to describe this. */
5271 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5272 return cost + 1;
5274 if ((dep_class == ITANIUM_CLASS_IALU
5275 || dep_class == ITANIUM_CLASS_ILOG
5276 || dep_class == ITANIUM_CLASS_LD)
5277 && (insn_class == ITANIUM_CLASS_MMMUL
5278 || insn_class == ITANIUM_CLASS_MMSHF
5279 || insn_class == ITANIUM_CLASS_MMSHFI))
5280 return 3;
5281 if (dep_class == ITANIUM_CLASS_FMAC
5282 && (insn_class == ITANIUM_CLASS_FMISC
5283 || insn_class == ITANIUM_CLASS_FCVTFX
5284 || insn_class == ITANIUM_CLASS_XMPY))
5285 return 7;
5286 if ((dep_class == ITANIUM_CLASS_FMAC
5287 || dep_class == ITANIUM_CLASS_FMISC
5288 || dep_class == ITANIUM_CLASS_FCVTFX
5289 || dep_class == ITANIUM_CLASS_XMPY)
5290 && insn_class == ITANIUM_CLASS_STF)
5291 return 8;
5292 if ((dep_class == ITANIUM_CLASS_MMMUL
5293 || dep_class == ITANIUM_CLASS_MMSHF
5294 || dep_class == ITANIUM_CLASS_MMSHFI)
5295 && (insn_class == ITANIUM_CLASS_LD
5296 || insn_class == ITANIUM_CLASS_ST
5297 || insn_class == ITANIUM_CLASS_IALU
5298 || insn_class == ITANIUM_CLASS_ILOG
5299 || insn_class == ITANIUM_CLASS_ISHF))
5300 return 4;
5302 return cost;
5305 /* Describe the current state of the Itanium pipeline. */
5306 static struct
5308 /* The first slot that is used in the current cycle. */
5309 int first_slot;
5310 /* The next slot to fill. */
5311 int cur;
5312 /* The packet we have selected for the current issue window. */
5313 const struct ia64_packet *packet;
5314 /* The position of the split issue that occurs due to issue width
5315 limitations (6 if there's no split issue). */
5316 int split;
5317 /* Record data about the insns scheduled so far in the same issue
5318 window. The elements up to but not including FIRST_SLOT belong
5319 to the previous cycle, the ones starting with FIRST_SLOT belong
5320 to the current cycle. */
5321 enum attr_type types[6];
5322 rtx insns[6];
5323 int stopbit[6];
5324 /* Nonzero if we decided to schedule a stop bit. */
5325 int last_was_stop;
5326 } sched_data;
5328 /* Temporary arrays; they have enough elements to hold all insns that
5329 can be ready at the same time while scheduling of the current block.
5330 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5331 static rtx *sched_ready;
5332 static enum attr_type *sched_types;
5334 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5335 of packet P. */
5337 static int
5338 insn_matches_slot (p, itype, slot, insn)
5339 const struct ia64_packet *p;
5340 enum attr_type itype;
5341 int slot;
5342 rtx insn;
5344 enum attr_itanium_requires_unit0 u0;
5345 enum attr_type stype = p->t[slot];
5347 if (insn)
5349 u0 = ia64_safe_itanium_requires_unit0 (insn);
5350 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5352 int i;
5353 for (i = sched_data.first_slot; i < slot; i++)
5354 if (p->t[i] == stype
5355 || (stype == TYPE_F && p->t[i] == TYPE_L)
5356 || (stype == TYPE_I && p->t[i] == TYPE_X))
5357 return 0;
5359 if (GET_CODE (insn) == CALL_INSN)
5361 /* Reject calls in multiway branch packets. We want to limit
5362 the number of multiway branches we generate (since the branch
5363 predictor is limited), and this seems to work fairly well.
5364 (If we didn't do this, we'd have to add another test here to
5365 force calls into the third slot of the bundle.) */
5366 if (slot < 3)
5368 if (p->t[1] == TYPE_B)
5369 return 0;
5371 else
5373 if (p->t[4] == TYPE_B)
5374 return 0;
5379 if (itype == stype)
5380 return 1;
5381 if (itype == TYPE_A)
5382 return stype == TYPE_M || stype == TYPE_I;
5383 return 0;
5386 /* Like emit_insn_before, but skip cycle_display insns. This makes the
5387 assembly output a bit prettier. */
5389 static void
5390 ia64_emit_insn_before (insn, before)
5391 rtx insn, before;
5393 rtx prev = PREV_INSN (before);
5394 if (prev && GET_CODE (prev) == INSN
5395 && GET_CODE (PATTERN (prev)) == UNSPEC
5396 && XINT (PATTERN (prev), 1) == 23)
5397 before = prev;
5398 emit_insn_before (insn, before);
5401 #if 0
5402 /* Generate a nop insn of the given type. Note we never generate L type
5403 nops. */
5405 static rtx
5406 gen_nop_type (t)
5407 enum attr_type t;
5409 switch (t)
5411 case TYPE_M:
5412 return gen_nop_m ();
5413 case TYPE_I:
5414 return gen_nop_i ();
5415 case TYPE_B:
5416 return gen_nop_b ();
5417 case TYPE_F:
5418 return gen_nop_f ();
5419 case TYPE_X:
5420 return gen_nop_x ();
5421 default:
5422 abort ();
5425 #endif
5427 /* When rotating a bundle out of the issue window, insert a bundle selector
5428 insn in front of it. DUMP is the scheduling dump file or NULL. START
5429 is either 0 or 3, depending on whether we want to emit a bundle selector
5430 for the first bundle or the second bundle in the current issue window.
5432 The selector insns are emitted this late because the selected packet can
5433 be changed until parts of it get rotated out. */
5435 static void
5436 finish_last_head (dump, start)
5437 FILE *dump;
5438 int start;
5440 const struct ia64_packet *p = sched_data.packet;
5441 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5442 int bundle_type = b - bundle;
5443 rtx insn;
5444 int i;
5446 if (! ia64_final_schedule)
5447 return;
5449 for (i = start; sched_data.insns[i] == 0; i++)
5450 if (i == start + 3)
5451 abort ();
5452 insn = sched_data.insns[i];
5454 if (dump)
5455 fprintf (dump, "// Emitting template before %d: %s\n",
5456 INSN_UID (insn), b->name);
5458 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5461 /* We can't schedule more insns this cycle. Fix up the scheduling state
5462 and advance FIRST_SLOT and CUR.
5463 We have to distribute the insns that are currently found between
5464 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5465 far, they are stored successively in the fields starting at FIRST_SLOT;
5466 now they must be moved to the correct slots.
5467 DUMP is the current scheduling dump file, or NULL. */
5469 static void
5470 cycle_end_fill_slots (dump)
5471 FILE *dump;
5473 const struct ia64_packet *packet = sched_data.packet;
5474 int slot, i;
5475 enum attr_type tmp_types[6];
5476 rtx tmp_insns[6];
5478 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5479 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5481 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5483 enum attr_type t = tmp_types[i];
5484 if (t != ia64_safe_type (tmp_insns[i]))
5485 abort ();
5486 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5488 if (slot > sched_data.split)
5489 abort ();
5490 if (dump)
5491 fprintf (dump, "// Packet needs %s, have %s\n", type_names[packet->t[slot]],
5492 type_names[t]);
5493 sched_data.types[slot] = packet->t[slot];
5494 sched_data.insns[slot] = 0;
5495 sched_data.stopbit[slot] = 0;
5497 /* ??? TYPE_L instructions always fill up two slots, but we don't
5498 support TYPE_L nops. */
5499 if (packet->t[slot] == TYPE_L)
5500 abort ();
5502 slot++;
5504 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5505 actual slot type later. */
5506 sched_data.types[slot] = packet->t[slot];
5507 sched_data.insns[slot] = tmp_insns[i];
5508 sched_data.stopbit[slot] = 0;
5509 slot++;
5510 /* TYPE_L instructions always fill up two slots. */
5511 if (t == TYPE_L)
5512 slot++;
5515 /* This isn't right - there's no need to pad out until the forced split;
5516 the CPU will automatically split if an insn isn't ready. */
5517 #if 0
5518 while (slot < sched_data.split)
5520 sched_data.types[slot] = packet->t[slot];
5521 sched_data.insns[slot] = 0;
5522 sched_data.stopbit[slot] = 0;
5523 slot++;
5525 #endif
5527 sched_data.first_slot = sched_data.cur = slot;
5530 /* Bundle rotations, as described in the Itanium optimization manual.
5531 We can rotate either one or both bundles out of the issue window.
5532 DUMP is the current scheduling dump file, or NULL. */
5534 static void
5535 rotate_one_bundle (dump)
5536 FILE *dump;
5538 if (dump)
5539 fprintf (dump, "// Rotating one bundle.\n");
5541 finish_last_head (dump, 0);
5542 if (sched_data.cur > 3)
5544 sched_data.cur -= 3;
5545 sched_data.first_slot -= 3;
5546 memmove (sched_data.types,
5547 sched_data.types + 3,
5548 sched_data.cur * sizeof *sched_data.types);
5549 memmove (sched_data.stopbit,
5550 sched_data.stopbit + 3,
5551 sched_data.cur * sizeof *sched_data.stopbit);
5552 memmove (sched_data.insns,
5553 sched_data.insns + 3,
5554 sched_data.cur * sizeof *sched_data.insns);
5556 else
5558 sched_data.cur = 0;
5559 sched_data.first_slot = 0;
5563 static void
5564 rotate_two_bundles (dump)
5565 FILE *dump;
5567 if (dump)
5568 fprintf (dump, "// Rotating two bundles.\n");
5570 if (sched_data.cur == 0)
5571 return;
5573 finish_last_head (dump, 0);
5574 if (sched_data.cur > 3)
5575 finish_last_head (dump, 3);
5576 sched_data.cur = 0;
5577 sched_data.first_slot = 0;
5580 /* We're beginning a new block. Initialize data structures as necessary. */
5582 static void
5583 ia64_sched_init (dump, sched_verbose, max_ready)
5584 FILE *dump ATTRIBUTE_UNUSED;
5585 int sched_verbose ATTRIBUTE_UNUSED;
5586 int max_ready;
5588 static int initialized = 0;
5590 if (! initialized)
5592 int b1, b2, i;
5594 initialized = 1;
5596 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
5598 const struct bundle *t1 = bundle + b1;
5599 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
5601 const struct bundle *t2 = bundle + b2;
5603 packets[i].t1 = t1;
5604 packets[i].t2 = t2;
5607 for (i = 0; i < NR_PACKETS; i++)
5609 int j;
5610 for (j = 0; j < 3; j++)
5611 packets[i].t[j] = packets[i].t1->t[j];
5612 for (j = 0; j < 3; j++)
5613 packets[i].t[j + 3] = packets[i].t2->t[j];
5614 packets[i].first_split = itanium_split_issue (packets + i, 0);
5619 init_insn_group_barriers ();
5621 memset (&sched_data, 0, sizeof sched_data);
5622 sched_types = (enum attr_type *) xmalloc (max_ready
5623 * sizeof (enum attr_type));
5624 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
5627 /* See if the packet P can match the insns we have already scheduled. Return
5628 nonzero if so. In *PSLOT, we store the first slot that is available for
5629 more instructions if we choose this packet.
5630 SPLIT holds the last slot we can use, there's a split issue after it so
5631 scheduling beyond it would cause us to use more than one cycle. */
5633 static int
5634 packet_matches_p (p, split, pslot)
5635 const struct ia64_packet *p;
5636 int split;
5637 int *pslot;
5639 int filled = sched_data.cur;
5640 int first = sched_data.first_slot;
5641 int i, slot;
5643 /* First, check if the first of the two bundles must be a specific one (due
5644 to stop bits). */
5645 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
5646 return 0;
5647 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
5648 return 0;
5650 for (i = 0; i < first; i++)
5651 if (! insn_matches_slot (p, sched_data.types[i], i,
5652 sched_data.insns[i]))
5653 return 0;
5654 for (i = slot = first; i < filled; i++)
5656 while (slot < split)
5658 if (insn_matches_slot (p, sched_data.types[i], slot,
5659 sched_data.insns[i]))
5660 break;
5661 slot++;
5663 if (slot == split)
5664 return 0;
5665 slot++;
5668 if (pslot)
5669 *pslot = slot;
5670 return 1;
5673 /* A frontend for itanium_split_issue. For a packet P and a slot
5674 number FIRST that describes the start of the current clock cycle,
5675 return the slot number of the first split issue. This function
5676 uses the cached number found in P if possible. */
5678 static int
5679 get_split (p, first)
5680 const struct ia64_packet *p;
5681 int first;
5683 if (first == 0)
5684 return p->first_split;
5685 return itanium_split_issue (p, first);
5688 /* Given N_READY insns in the array READY, whose types are found in the
5689 corresponding array TYPES, return the insn that is best suited to be
5690 scheduled in slot SLOT of packet P. */
5692 static int
5693 find_best_insn (ready, types, n_ready, p, slot)
5694 rtx *ready;
5695 enum attr_type *types;
5696 int n_ready;
5697 const struct ia64_packet *p;
5698 int slot;
5700 int best = -1;
5701 int best_pri = 0;
5702 while (n_ready-- > 0)
5704 rtx insn = ready[n_ready];
5705 if (! insn)
5706 continue;
5707 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
5708 break;
5709 /* If we have equally good insns, one of which has a stricter
5710 slot requirement, prefer the one with the stricter requirement. */
5711 if (best >= 0 && types[n_ready] == TYPE_A)
5712 continue;
5713 if (insn_matches_slot (p, types[n_ready], slot, insn))
5715 best = n_ready;
5716 best_pri = INSN_PRIORITY (ready[best]);
5718 /* If there's no way we could get a stricter requirement, stop
5719 looking now. */
5720 if (types[n_ready] != TYPE_A
5721 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
5722 break;
5723 break;
5726 return best;
5729 /* Select the best packet to use given the current scheduler state and the
5730 current ready list.
5731 READY is an array holding N_READY ready insns; TYPES is a corresponding
5732 array that holds their types. Store the best packet in *PPACKET and the
5733 number of insns that can be scheduled in the current cycle in *PBEST. */
5735 static void
5736 find_best_packet (pbest, ppacket, ready, types, n_ready)
5737 int *pbest;
5738 const struct ia64_packet **ppacket;
5739 rtx *ready;
5740 enum attr_type *types;
5741 int n_ready;
5743 int first = sched_data.first_slot;
5744 int best = 0;
5745 int lowest_end = 6;
5746 const struct ia64_packet *best_packet = NULL;
5747 int i;
5749 for (i = 0; i < NR_PACKETS; i++)
5751 const struct ia64_packet *p = packets + i;
5752 int slot;
5753 int split = get_split (p, first);
5754 int win = 0;
5755 int first_slot, last_slot;
5756 int b_nops = 0;
5758 if (! packet_matches_p (p, split, &first_slot))
5759 continue;
5761 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
5763 win = 0;
5764 last_slot = 6;
5765 for (slot = first_slot; slot < split; slot++)
5767 int insn_nr;
5769 /* Disallow a degenerate case where the first bundle doesn't
5770 contain anything but NOPs! */
5771 if (first_slot == 0 && win == 0 && slot == 3)
5773 win = -1;
5774 break;
5777 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
5778 if (insn_nr >= 0)
5780 sched_ready[insn_nr] = 0;
5781 last_slot = slot;
5782 win++;
5784 else if (p->t[slot] == TYPE_B)
5785 b_nops++;
5787 /* We must disallow MBB/BBB packets if any of their B slots would be
5788 filled with nops. */
5789 if (last_slot < 3)
5791 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
5792 win = -1;
5794 else
5796 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
5797 win = -1;
5800 if (win > best
5801 || (win == best && last_slot < lowest_end))
5803 best = win;
5804 lowest_end = last_slot;
5805 best_packet = p;
5808 *pbest = best;
5809 *ppacket = best_packet;
5812 /* Reorder the ready list so that the insns that can be issued in this cycle
5813 are found in the correct order at the end of the list.
5814 DUMP is the scheduling dump file, or NULL. READY points to the start,
5815 E_READY to the end of the ready list. MAY_FAIL determines what should be
5816 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5817 otherwise we return 0.
5818 Return 1 if any insns can be scheduled in this cycle. */
5820 static int
5821 itanium_reorder (dump, ready, e_ready, may_fail)
5822 FILE *dump;
5823 rtx *ready;
5824 rtx *e_ready;
5825 int may_fail;
5827 const struct ia64_packet *best_packet;
5828 int n_ready = e_ready - ready;
5829 int first = sched_data.first_slot;
5830 int i, best, best_split, filled;
5832 for (i = 0; i < n_ready; i++)
5833 sched_types[i] = ia64_safe_type (ready[i]);
5835 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
5837 if (best == 0)
5839 if (may_fail)
5840 return 0;
5841 abort ();
5844 if (dump)
5846 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
5847 best_packet->t1->name,
5848 best_packet->t2 ? best_packet->t2->name : NULL, best);
5851 best_split = itanium_split_issue (best_packet, first);
5852 packet_matches_p (best_packet, best_split, &filled);
5854 for (i = filled; i < best_split; i++)
5856 int insn_nr;
5858 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
5859 if (insn_nr >= 0)
5861 rtx insn = ready[insn_nr];
5862 memmove (ready + insn_nr, ready + insn_nr + 1,
5863 (n_ready - insn_nr - 1) * sizeof (rtx));
5864 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
5865 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
5866 ready[--n_ready] = insn;
5870 sched_data.packet = best_packet;
5871 sched_data.split = best_split;
5872 return 1;
5875 /* Dump information about the current scheduling state to file DUMP. */
5877 static void
5878 dump_current_packet (dump)
5879 FILE *dump;
5881 int i;
5882 fprintf (dump, "// %d slots filled:", sched_data.cur);
5883 for (i = 0; i < sched_data.first_slot; i++)
5885 rtx insn = sched_data.insns[i];
5886 fprintf (dump, " %s", type_names[sched_data.types[i]]);
5887 if (insn)
5888 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
5889 if (sched_data.stopbit[i])
5890 fprintf (dump, " ;;");
5892 fprintf (dump, " :::");
5893 for (i = sched_data.first_slot; i < sched_data.cur; i++)
5895 rtx insn = sched_data.insns[i];
5896 enum attr_type t = ia64_safe_type (insn);
5897 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
5899 fprintf (dump, "\n");
5902 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
5903 NULL. */
5905 static void
5906 schedule_stop (dump)
5907 FILE *dump;
5909 const struct ia64_packet *best = sched_data.packet;
5910 int i;
5911 int best_stop = 6;
5913 if (dump)
5914 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
5916 if (sched_data.cur == 0)
5918 if (dump)
5919 fprintf (dump, "// At start of bundle, so nothing to do.\n");
5921 rotate_two_bundles (NULL);
5922 return;
5925 for (i = -1; i < NR_PACKETS; i++)
5927 /* This is a slight hack to give the current packet the first chance.
5928 This is done to avoid e.g. switching from MIB to MBB bundles. */
5929 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
5930 int split = get_split (p, sched_data.first_slot);
5931 const struct bundle *compare;
5932 int next, stoppos;
5934 if (! packet_matches_p (p, split, &next))
5935 continue;
5937 compare = next > 3 ? p->t2 : p->t1;
5939 stoppos = 3;
5940 if (compare->possible_stop)
5941 stoppos = compare->possible_stop;
5942 if (next > 3)
5943 stoppos += 3;
5945 if (stoppos < next || stoppos >= best_stop)
5947 if (compare->possible_stop == 0)
5948 continue;
5949 stoppos = (next > 3 ? 6 : 3);
5951 if (stoppos < next || stoppos >= best_stop)
5952 continue;
5954 if (dump)
5955 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
5956 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
5957 stoppos);
5959 best_stop = stoppos;
5960 best = p;
5963 sched_data.packet = best;
5964 cycle_end_fill_slots (dump);
5965 while (sched_data.cur < best_stop)
5967 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
5968 sched_data.insns[sched_data.cur] = 0;
5969 sched_data.stopbit[sched_data.cur] = 0;
5970 sched_data.cur++;
5972 sched_data.stopbit[sched_data.cur - 1] = 1;
5973 sched_data.first_slot = best_stop;
5975 if (dump)
5976 dump_current_packet (dump);
5979 /* If necessary, perform one or two rotations on the scheduling state.
5980 This should only be called if we are starting a new cycle. */
5982 static void
5983 maybe_rotate (dump)
5984 FILE *dump;
5986 if (sched_data.cur == 6)
5987 rotate_two_bundles (dump);
5988 else if (sched_data.cur >= 3)
5989 rotate_one_bundle (dump);
5990 sched_data.first_slot = sched_data.cur;
5993 /* The clock cycle when ia64_sched_reorder was last called. */
5994 static int prev_cycle;
5996 /* The first insn scheduled in the previous cycle. This is the saved
5997 value of sched_data.first_slot. */
5998 static int prev_first;
6000 /* The last insn that has been scheduled. At the start of a new cycle
6001 we know that we can emit new insns after it; the main scheduling code
6002 has already emitted a cycle_display insn after it and is using that
6003 as its current last insn. */
6004 static rtx last_issued;
6006 /* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
6007 pad out the delay between MM (shifts, etc.) and integer operations. */
6009 static void
6010 nop_cycles_until (clock_var, dump)
6011 int clock_var;
6012 FILE *dump;
6014 int prev_clock = prev_cycle;
6015 int cycles_left = clock_var - prev_clock;
6017 /* Finish the previous cycle; pad it out with NOPs. */
6018 if (sched_data.cur == 3)
6020 rtx t = gen_insn_group_barrier (GEN_INT (3));
6021 last_issued = emit_insn_after (t, last_issued);
6022 maybe_rotate (dump);
6024 else if (sched_data.cur > 0)
6026 int need_stop = 0;
6027 int split = itanium_split_issue (sched_data.packet, prev_first);
6029 if (sched_data.cur < 3 && split > 3)
6031 split = 3;
6032 need_stop = 1;
6035 if (split > sched_data.cur)
6037 int i;
6038 for (i = sched_data.cur; i < split; i++)
6040 rtx t;
6042 t = gen_nop_type (sched_data.packet->t[i]);
6043 last_issued = emit_insn_after (t, last_issued);
6044 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
6045 sched_data.insns[i] = last_issued;
6046 sched_data.stopbit[i] = 0;
6048 sched_data.cur = split;
6051 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
6052 && cycles_left > 1)
6054 int i;
6055 for (i = sched_data.cur; i < 6; i++)
6057 rtx t;
6059 t = gen_nop_type (sched_data.packet->t[i]);
6060 last_issued = emit_insn_after (t, last_issued);
6061 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
6062 sched_data.insns[i] = last_issued;
6063 sched_data.stopbit[i] = 0;
6065 sched_data.cur = 6;
6066 cycles_left--;
6067 need_stop = 1;
6070 if (need_stop || sched_data.cur == 6)
6072 rtx t = gen_insn_group_barrier (GEN_INT (3));
6073 last_issued = emit_insn_after (t, last_issued);
6075 maybe_rotate (dump);
6078 cycles_left--;
6079 while (cycles_left > 0)
6081 rtx t = gen_bundle_selector (GEN_INT (0));
6082 last_issued = emit_insn_after (t, last_issued);
6083 t = gen_nop_type (TYPE_M);
6084 last_issued = emit_insn_after (t, last_issued);
6085 t = gen_nop_type (TYPE_I);
6086 last_issued = emit_insn_after (t, last_issued);
6087 if (cycles_left > 1)
6089 t = gen_insn_group_barrier (GEN_INT (2));
6090 last_issued = emit_insn_after (t, last_issued);
6091 cycles_left--;
6093 t = gen_nop_type (TYPE_I);
6094 last_issued = emit_insn_after (t, last_issued);
6095 t = gen_insn_group_barrier (GEN_INT (3));
6096 last_issued = emit_insn_after (t, last_issued);
6097 cycles_left--;
6101 /* We are about to being issuing insns for this clock cycle.
6102 Override the default sort algorithm to better slot instructions. */
6104 static int
6105 ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready,
6106 reorder_type, clock_var)
6107 FILE *dump ATTRIBUTE_UNUSED;
6108 int sched_verbose ATTRIBUTE_UNUSED;
6109 rtx *ready;
6110 int *pn_ready;
6111 int reorder_type, clock_var;
6113 int n_asms;
6114 int n_ready = *pn_ready;
6115 rtx *e_ready = ready + n_ready;
6116 rtx *insnp;
6118 if (sched_verbose)
6120 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
6121 dump_current_packet (dump);
6124 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
6126 for (insnp = ready; insnp < e_ready; insnp++)
6128 rtx insn = *insnp;
6129 enum attr_itanium_class t = ia64_safe_itanium_class (insn);
6130 if (t == ITANIUM_CLASS_IALU || t == ITANIUM_CLASS_ISHF
6131 || t == ITANIUM_CLASS_ILOG
6132 || t == ITANIUM_CLASS_LD || t == ITANIUM_CLASS_ST)
6134 rtx link;
6135 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6136 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT
6137 && REG_NOTE_KIND (link) != REG_DEP_ANTI)
6139 rtx other = XEXP (link, 0);
6140 enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
6141 if (t0 == ITANIUM_CLASS_MMSHF
6142 || t0 == ITANIUM_CLASS_MMMUL)
6144 nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
6145 goto out;
6151 out:
6153 prev_first = sched_data.first_slot;
6154 prev_cycle = clock_var;
6156 if (reorder_type == 0)
6157 maybe_rotate (sched_verbose ? dump : NULL);
6159 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6160 n_asms = 0;
6161 for (insnp = ready; insnp < e_ready; insnp++)
6162 if (insnp < e_ready)
6164 rtx insn = *insnp;
6165 enum attr_type t = ia64_safe_type (insn);
6166 if (t == TYPE_UNKNOWN)
6168 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6169 || asm_noperands (PATTERN (insn)) >= 0)
6171 rtx lowest = ready[n_asms];
6172 ready[n_asms] = insn;
6173 *insnp = lowest;
6174 n_asms++;
6176 else
6178 rtx highest = ready[n_ready - 1];
6179 ready[n_ready - 1] = insn;
6180 *insnp = highest;
6181 if (ia64_final_schedule && group_barrier_needed_p (insn))
6183 schedule_stop (sched_verbose ? dump : NULL);
6184 sched_data.last_was_stop = 1;
6185 maybe_rotate (sched_verbose ? dump : NULL);
6188 return 1;
6192 if (n_asms < n_ready)
6194 /* Some normal insns to process. Skip the asms. */
6195 ready += n_asms;
6196 n_ready -= n_asms;
6198 else if (n_ready > 0)
6200 /* Only asm insns left. */
6201 if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1]))
6203 schedule_stop (sched_verbose ? dump : NULL);
6204 sched_data.last_was_stop = 1;
6205 maybe_rotate (sched_verbose ? dump : NULL);
6207 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6208 return 1;
6211 if (ia64_final_schedule)
6213 int nr_need_stop = 0;
6215 for (insnp = ready; insnp < e_ready; insnp++)
6216 if (safe_group_barrier_needed_p (*insnp))
6217 nr_need_stop++;
6219 /* Schedule a stop bit if
6220 - all insns require a stop bit, or
6221 - we are starting a new cycle and _any_ insns require a stop bit.
6222 The reason for the latter is that if our schedule is accurate, then
6223 the additional stop won't decrease performance at this point (since
6224 there's a split issue at this point anyway), but it gives us more
6225 freedom when scheduling the currently ready insns. */
6226 if ((reorder_type == 0 && nr_need_stop)
6227 || (reorder_type == 1 && n_ready == nr_need_stop))
6229 schedule_stop (sched_verbose ? dump : NULL);
6230 sched_data.last_was_stop = 1;
6231 maybe_rotate (sched_verbose ? dump : NULL);
6232 if (reorder_type == 1)
6233 return 0;
6235 else
6237 int deleted = 0;
6238 insnp = e_ready;
6239 /* Move down everything that needs a stop bit, preserving relative
6240 order. */
6241 while (insnp-- > ready + deleted)
6242 while (insnp >= ready + deleted)
6244 rtx insn = *insnp;
6245 if (! safe_group_barrier_needed_p (insn))
6246 break;
6247 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6248 *ready = insn;
6249 deleted++;
6251 n_ready -= deleted;
6252 ready += deleted;
6253 if (deleted != nr_need_stop)
6254 abort ();
6258 return itanium_reorder (sched_verbose ? dump : NULL,
6259 ready, e_ready, reorder_type == 1);
6262 static int
6263 ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
6264 FILE *dump;
6265 int sched_verbose;
6266 rtx *ready;
6267 int *pn_ready;
6268 int clock_var;
6270 return ia64_internal_sched_reorder (dump, sched_verbose, ready,
6271 pn_ready, 0, clock_var);
6274 /* Like ia64_sched_reorder, but called after issuing each insn.
6275 Override the default sort algorithm to better slot instructions. */
6277 static int
6278 ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6279 FILE *dump ATTRIBUTE_UNUSED;
6280 int sched_verbose ATTRIBUTE_UNUSED;
6281 rtx *ready;
6282 int *pn_ready;
6283 int clock_var;
6285 if (sched_data.last_was_stop)
6286 return 0;
6288 /* Detect one special case and try to optimize it.
6289 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6290 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6291 if (sched_data.first_slot == 1
6292 && sched_data.stopbit[0]
6293 && ((sched_data.cur == 4
6294 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6295 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6296 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6297 || (sched_data.cur == 3
6298 && (sched_data.types[1] == TYPE_M
6299 || sched_data.types[1] == TYPE_A)
6300 && (sched_data.types[2] != TYPE_M
6301 && sched_data.types[2] != TYPE_I
6302 && sched_data.types[2] != TYPE_A))))
6305 int i, best;
6306 rtx stop = sched_data.insns[1];
6308 /* Search backward for the stop bit that must be there. */
6309 while (1)
6311 int insn_code;
6313 stop = PREV_INSN (stop);
6314 if (GET_CODE (stop) != INSN)
6315 abort ();
6316 insn_code = recog_memoized (stop);
6318 /* Ignore cycle displays and .pred.rel.mutex. */
6319 if (insn_code == CODE_FOR_cycle_display
6320 || insn_code == CODE_FOR_pred_rel_mutex)
6321 continue;
6323 if (insn_code == CODE_FOR_insn_group_barrier)
6324 break;
6325 abort ();
6328 /* Adjust the stop bit's slot selector. */
6329 if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1)
6330 abort ();
6331 XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3);
6333 sched_data.stopbit[0] = 0;
6334 sched_data.stopbit[2] = 1;
6336 sched_data.types[5] = sched_data.types[3];
6337 sched_data.types[4] = sched_data.types[2];
6338 sched_data.types[3] = sched_data.types[1];
6339 sched_data.insns[5] = sched_data.insns[3];
6340 sched_data.insns[4] = sched_data.insns[2];
6341 sched_data.insns[3] = sched_data.insns[1];
6342 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6343 sched_data.cur += 2;
6344 sched_data.first_slot = 3;
6345 for (i = 0; i < NR_PACKETS; i++)
6347 const struct ia64_packet *p = packets + i;
6348 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6350 sched_data.packet = p;
6351 break;
6354 rotate_one_bundle (sched_verbose ? dump : NULL);
6356 best = 6;
6357 for (i = 0; i < NR_PACKETS; i++)
6359 const struct ia64_packet *p = packets + i;
6360 int split = get_split (p, sched_data.first_slot);
6361 int next;
6363 /* Disallow multiway branches here. */
6364 if (p->t[1] == TYPE_B)
6365 continue;
6367 if (packet_matches_p (p, split, &next) && next < best)
6369 best = next;
6370 sched_data.packet = p;
6371 sched_data.split = split;
6374 if (best == 6)
6375 abort ();
6378 if (*pn_ready > 0)
6380 int more = ia64_internal_sched_reorder (dump, sched_verbose,
6381 ready, pn_ready, 1,
6382 clock_var);
6383 if (more)
6384 return more;
6385 /* Did we schedule a stop? If so, finish this cycle. */
6386 if (sched_data.cur == sched_data.first_slot)
6387 return 0;
6390 if (sched_verbose)
6391 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
6393 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6394 if (sched_verbose)
6395 dump_current_packet (dump);
6396 return 0;
6399 /* We are about to issue INSN. Return the number of insns left on the
6400 ready queue that can be issued this cycle. */
6402 static int
6403 ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6404 FILE *dump;
6405 int sched_verbose;
6406 rtx insn;
6407 int can_issue_more ATTRIBUTE_UNUSED;
6409 enum attr_type t = ia64_safe_type (insn);
6411 last_issued = insn;
6413 if (sched_data.last_was_stop)
6415 int t = sched_data.first_slot;
6416 if (t == 0)
6417 t = 3;
6418 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6419 init_insn_group_barriers ();
6420 sched_data.last_was_stop = 0;
6423 if (t == TYPE_UNKNOWN)
6425 if (sched_verbose)
6426 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
6427 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6428 || asm_noperands (PATTERN (insn)) >= 0)
6430 /* This must be some kind of asm. Clear the scheduling state. */
6431 rotate_two_bundles (sched_verbose ? dump : NULL);
6432 if (ia64_final_schedule)
6433 group_barrier_needed_p (insn);
6435 return 1;
6438 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6439 important state info. Don't delete this test. */
6440 if (ia64_final_schedule
6441 && group_barrier_needed_p (insn))
6442 abort ();
6444 sched_data.stopbit[sched_data.cur] = 0;
6445 sched_data.insns[sched_data.cur] = insn;
6446 sched_data.types[sched_data.cur] = t;
6448 sched_data.cur++;
6449 if (sched_verbose)
6450 fprintf (dump, "// Scheduling insn %d of type %s\n",
6451 INSN_UID (insn), type_names[t]);
6453 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6455 schedule_stop (sched_verbose ? dump : NULL);
6456 sched_data.last_was_stop = 1;
6459 return 1;
6462 /* Free data allocated by ia64_sched_init. */
6464 static void
6465 ia64_sched_finish (dump, sched_verbose)
6466 FILE *dump;
6467 int sched_verbose;
6469 if (sched_verbose)
6470 fprintf (dump, "// Finishing schedule.\n");
6471 rotate_two_bundles (NULL);
6472 free (sched_types);
6473 free (sched_ready);
6476 static rtx
6477 ia64_cycle_display (clock, last)
6478 int clock;
6479 rtx last;
6481 if (ia64_final_schedule)
6482 return emit_insn_after (gen_cycle_display (GEN_INT (clock)), last);
6483 else
6484 return last;
6487 /* Emit pseudo-ops for the assembler to describe predicate relations.
6488 At present this assumes that we only consider predicate pairs to
6489 be mutex, and that the assembler can deduce proper values from
6490 straight-line code. */
6492 static void
6493 emit_predicate_relation_info ()
6495 int i;
6497 for (i = n_basic_blocks - 1; i >= 0; --i)
6499 basic_block bb = BASIC_BLOCK (i);
6500 int r;
6501 rtx head = bb->head;
6503 /* We only need such notes at code labels. */
6504 if (GET_CODE (head) != CODE_LABEL)
6505 continue;
6506 if (GET_CODE (NEXT_INSN (head)) == NOTE
6507 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6508 head = NEXT_INSN (head);
6510 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6511 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6513 rtx p = gen_rtx_REG (BImode, r);
6514 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6515 if (head == bb->end)
6516 bb->end = n;
6517 head = n;
6521 /* Look for conditional calls that do not return, and protect predicate
6522 relations around them. Otherwise the assembler will assume the call
6523 returns, and complain about uses of call-clobbered predicates after
6524 the call. */
6525 for (i = n_basic_blocks - 1; i >= 0; --i)
6527 basic_block bb = BASIC_BLOCK (i);
6528 rtx insn = bb->head;
6530 while (1)
6532 if (GET_CODE (insn) == CALL_INSN
6533 && GET_CODE (PATTERN (insn)) == COND_EXEC
6534 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6536 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6537 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6538 if (bb->head == insn)
6539 bb->head = b;
6540 if (bb->end == insn)
6541 bb->end = a;
6544 if (insn == bb->end)
6545 break;
6546 insn = NEXT_INSN (insn);
6551 /* Generate a NOP instruction of type T. We will never generate L type
6552 nops. */
6554 static rtx
6555 gen_nop_type (t)
6556 enum attr_type t;
6558 switch (t)
6560 case TYPE_M:
6561 return gen_nop_m ();
6562 case TYPE_I:
6563 return gen_nop_i ();
6564 case TYPE_B:
6565 return gen_nop_b ();
6566 case TYPE_F:
6567 return gen_nop_f ();
6568 case TYPE_X:
6569 return gen_nop_x ();
6570 default:
6571 abort ();
6575 /* After the last scheduling pass, fill in NOPs. It's easier to do this
6576 here than while scheduling. */
6578 static void
6579 ia64_emit_nops ()
6581 rtx insn;
6582 const struct bundle *b = 0;
6583 int bundle_pos = 0;
6585 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6587 rtx pat;
6588 enum attr_type t;
6589 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
6590 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
6591 continue;
6592 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 22)
6593 || GET_CODE (insn) == CODE_LABEL)
6595 if (b)
6596 while (bundle_pos < 3)
6598 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6599 bundle_pos++;
6601 if (GET_CODE (insn) != CODE_LABEL)
6602 b = bundle + INTVAL (XVECEXP (pat, 0, 0));
6603 else
6604 b = 0;
6605 bundle_pos = 0;
6606 continue;
6608 else if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 2)
6610 int t = INTVAL (XVECEXP (pat, 0, 0));
6611 if (b)
6612 while (bundle_pos < t)
6614 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6615 bundle_pos++;
6617 continue;
6620 if (bundle_pos == 3)
6621 b = 0;
6623 if (b && INSN_P (insn))
6625 t = ia64_safe_type (insn);
6626 if (asm_noperands (PATTERN (insn)) >= 0
6627 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
6629 while (bundle_pos < 3)
6631 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6632 bundle_pos++;
6634 continue;
6637 if (t == TYPE_UNKNOWN)
6638 continue;
6639 while (bundle_pos < 3)
6641 if (t == b->t[bundle_pos]
6642 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
6643 || b->t[bundle_pos] == TYPE_I)))
6644 break;
6646 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6647 bundle_pos++;
6649 if (bundle_pos < 3)
6650 bundle_pos++;
6655 /* Perform machine dependent operations on the rtl chain INSNS. */
6657 void
6658 ia64_reorg (insns)
6659 rtx insns;
6661 /* If optimizing, we'll have split before scheduling. */
6662 if (optimize == 0)
6663 split_all_insns_noflow ();
6665 /* Make sure the CFG and global_live_at_start are correct
6666 for emit_predicate_relation_info. */
6667 find_basic_blocks (insns, max_reg_num (), NULL);
6668 life_analysis (insns, NULL, PROP_DEATH_NOTES);
6670 if (ia64_flag_schedule_insns2)
6672 timevar_push (TV_SCHED2);
6673 ia64_final_schedule = 1;
6674 schedule_ebbs (rtl_dump_file);
6675 ia64_final_schedule = 0;
6676 timevar_pop (TV_SCHED2);
6678 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6679 place as they were during scheduling. */
6680 emit_insn_group_barriers (rtl_dump_file, insns);
6681 ia64_emit_nops ();
6683 else
6684 emit_all_insn_group_barriers (rtl_dump_file, insns);
6686 /* A call must not be the last instruction in a function, so that the
6687 return address is still within the function, so that unwinding works
6688 properly. Note that IA-64 differs from dwarf2 on this point. */
6689 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
6691 rtx insn;
6692 int saw_stop = 0;
6694 insn = get_last_insn ();
6695 if (! INSN_P (insn))
6696 insn = prev_active_insn (insn);
6697 if (GET_CODE (insn) == INSN
6698 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6699 && XINT (PATTERN (insn), 1) == 2)
6701 saw_stop = 1;
6702 insn = prev_active_insn (insn);
6704 if (GET_CODE (insn) == CALL_INSN)
6706 if (! saw_stop)
6707 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6708 emit_insn (gen_break_f ());
6709 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6713 fixup_errata ();
6714 emit_predicate_relation_info ();
6717 /* Return true if REGNO is used by the epilogue. */
6720 ia64_epilogue_uses (regno)
6721 int regno;
6723 switch (regno)
6725 case R_GR (1):
6726 /* When a function makes a call through a function descriptor, we
6727 will write a (potentially) new value to "gp". After returning
6728 from such a call, we need to make sure the function restores the
6729 original gp-value, even if the function itself does not use the
6730 gp anymore. */
6731 return (TARGET_CONST_GP && !(TARGET_AUTO_PIC || TARGET_NO_PIC));
6733 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
6734 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
6735 /* For functions defined with the syscall_linkage attribute, all
6736 input registers are marked as live at all function exits. This
6737 prevents the register allocator from using the input registers,
6738 which in turn makes it possible to restart a system call after
6739 an interrupt without having to save/restore the input registers.
6740 This also prevents kernel data from leaking to application code. */
6741 return lookup_attribute ("syscall_linkage",
6742 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
6744 case R_BR (0):
6745 /* Conditional return patterns can't represent the use of `b0' as
6746 the return address, so we force the value live this way. */
6747 return 1;
6749 case AR_PFS_REGNUM:
6750 /* Likewise for ar.pfs, which is used by br.ret. */
6751 return 1;
6753 default:
6754 return 0;
6758 /* Table of valid machine attributes. */
6759 const struct attribute_spec ia64_attribute_table[] =
6761 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
6762 { "syscall_linkage", 0, 0, false, true, true, NULL },
6763 { NULL, 0, 0, false, false, false, NULL }
6766 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6768 We add @ to the name if this goes in small data/bss. We can only put
6769 a variable in small data/bss if it is defined in this module or a module
6770 that we are statically linked with. We can't check the second condition,
6771 but TREE_STATIC gives us the first one. */
6773 /* ??? If we had IPA, we could check the second condition. We could support
6774 programmer added section attributes if the variable is not defined in this
6775 module. */
6777 /* ??? See the v850 port for a cleaner way to do this. */
6779 /* ??? We could also support own long data here. Generating movl/add/ld8
6780 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6781 code faster because there is one less load. This also includes incomplete
6782 types which can't go in sdata/sbss. */
6784 /* ??? See select_section. We must put short own readonly variables in
6785 sdata/sbss instead of the more natural rodata, because we can't perform
6786 the DECL_READONLY_SECTION test here. */
6788 extern struct obstack * saveable_obstack;
6790 void
6791 ia64_encode_section_info (decl)
6792 tree decl;
6794 const char *symbol_str;
6796 if (TREE_CODE (decl) == FUNCTION_DECL)
6798 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
6799 return;
6802 /* Careful not to prod global register variables. */
6803 if (TREE_CODE (decl) != VAR_DECL
6804 || GET_CODE (DECL_RTL (decl)) != MEM
6805 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
6806 return;
6808 symbol_str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
6810 /* We assume that -fpic is used only to create a shared library (dso).
6811 With -fpic, no global data can ever be sdata.
6812 Without -fpic, global common uninitialized data can never be sdata, since
6813 it can unify with a real definition in a dso. */
6814 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
6815 to access them. The linker may then be able to do linker relaxation to
6816 optimize references to them. Currently sdata implies use of gprel. */
6817 /* We need the DECL_EXTERNAL check for C++. static class data members get
6818 both TREE_STATIC and DECL_EXTERNAL set, to indicate that they are
6819 statically allocated, but the space is allocated somewhere else. Such
6820 decls can not be own data. */
6821 if (! TARGET_NO_SDATA
6822 && TREE_STATIC (decl) && ! DECL_EXTERNAL (decl)
6823 && ! (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
6824 && ! (TREE_PUBLIC (decl)
6825 && (flag_pic
6826 || (DECL_COMMON (decl)
6827 && (DECL_INITIAL (decl) == 0
6828 || DECL_INITIAL (decl) == error_mark_node))))
6829 /* Either the variable must be declared without a section attribute,
6830 or the section must be sdata or sbss. */
6831 && (DECL_SECTION_NAME (decl) == 0
6832 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6833 ".sdata")
6834 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6835 ".sbss")))
6837 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
6839 /* If the variable has already been defined in the output file, then it
6840 is too late to put it in sdata if it wasn't put there in the first
6841 place. The test is here rather than above, because if it is already
6842 in sdata, then it can stay there. */
6844 if (TREE_ASM_WRITTEN (decl))
6847 /* If this is an incomplete type with size 0, then we can't put it in
6848 sdata because it might be too big when completed. */
6849 else if (size > 0
6850 && size <= (HOST_WIDE_INT) ia64_section_threshold
6851 && symbol_str[0] != SDATA_NAME_FLAG_CHAR)
6853 size_t len = strlen (symbol_str);
6854 char *newstr = alloca (len + 1);
6855 const char *string;
6857 *newstr = SDATA_NAME_FLAG_CHAR;
6858 memcpy (newstr + 1, symbol_str, len + 1);
6860 string = ggc_alloc_string (newstr, len + 1);
6861 XSTR (XEXP (DECL_RTL (decl), 0), 0) = string;
6864 /* This decl is marked as being in small data/bss but it shouldn't
6865 be; one likely explanation for this is that the decl has been
6866 moved into a different section from the one it was in when
6867 ENCODE_SECTION_INFO was first called. Remove the '@'.*/
6868 else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
6870 XSTR (XEXP (DECL_RTL (decl), 0), 0)
6871 = ggc_strdup (symbol_str + 1);
6875 /* Output assembly directives for prologue regions. */
6877 /* The current basic block number. */
6879 static int block_num;
6881 /* True if we need a copy_state command at the start of the next block. */
6883 static int need_copy_state;
6885 /* The function emits unwind directives for the start of an epilogue. */
6887 static void
6888 process_epilogue ()
6890 /* If this isn't the last block of the function, then we need to label the
6891 current state, and copy it back in at the start of the next block. */
6893 if (block_num != n_basic_blocks - 1)
6895 fprintf (asm_out_file, "\t.label_state 1\n");
6896 need_copy_state = 1;
6899 fprintf (asm_out_file, "\t.restore sp\n");
6902 /* This function processes a SET pattern looking for specific patterns
6903 which result in emitting an assembly directive required for unwinding. */
6905 static int
6906 process_set (asm_out_file, pat)
6907 FILE *asm_out_file;
6908 rtx pat;
6910 rtx src = SET_SRC (pat);
6911 rtx dest = SET_DEST (pat);
6912 int src_regno, dest_regno;
6914 /* Look for the ALLOC insn. */
6915 if (GET_CODE (src) == UNSPEC_VOLATILE
6916 && XINT (src, 1) == 0
6917 && GET_CODE (dest) == REG)
6919 dest_regno = REGNO (dest);
6921 /* If this isn't the final destination for ar.pfs, the alloc
6922 shouldn't have been marked frame related. */
6923 if (dest_regno != current_frame_info.reg_save_ar_pfs)
6924 abort ();
6926 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
6927 ia64_dbx_register_number (dest_regno));
6928 return 1;
6931 /* Look for SP = .... */
6932 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
6934 if (GET_CODE (src) == PLUS)
6936 rtx op0 = XEXP (src, 0);
6937 rtx op1 = XEXP (src, 1);
6938 if (op0 == dest && GET_CODE (op1) == CONST_INT)
6940 if (INTVAL (op1) < 0)
6942 fputs ("\t.fframe ", asm_out_file);
6943 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
6944 -INTVAL (op1));
6945 fputc ('\n', asm_out_file);
6947 else
6948 process_epilogue ();
6950 else
6951 abort ();
6953 else if (GET_CODE (src) == REG
6954 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
6955 process_epilogue ();
6956 else
6957 abort ();
6959 return 1;
6962 /* Register move we need to look at. */
6963 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
6965 src_regno = REGNO (src);
6966 dest_regno = REGNO (dest);
6968 switch (src_regno)
6970 case BR_REG (0):
6971 /* Saving return address pointer. */
6972 if (dest_regno != current_frame_info.reg_save_b0)
6973 abort ();
6974 fprintf (asm_out_file, "\t.save rp, r%d\n",
6975 ia64_dbx_register_number (dest_regno));
6976 return 1;
6978 case PR_REG (0):
6979 if (dest_regno != current_frame_info.reg_save_pr)
6980 abort ();
6981 fprintf (asm_out_file, "\t.save pr, r%d\n",
6982 ia64_dbx_register_number (dest_regno));
6983 return 1;
6985 case AR_UNAT_REGNUM:
6986 if (dest_regno != current_frame_info.reg_save_ar_unat)
6987 abort ();
6988 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
6989 ia64_dbx_register_number (dest_regno));
6990 return 1;
6992 case AR_LC_REGNUM:
6993 if (dest_regno != current_frame_info.reg_save_ar_lc)
6994 abort ();
6995 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
6996 ia64_dbx_register_number (dest_regno));
6997 return 1;
6999 case STACK_POINTER_REGNUM:
7000 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7001 || ! frame_pointer_needed)
7002 abort ();
7003 fprintf (asm_out_file, "\t.vframe r%d\n",
7004 ia64_dbx_register_number (dest_regno));
7005 return 1;
7007 default:
7008 /* Everything else should indicate being stored to memory. */
7009 abort ();
7013 /* Memory store we need to look at. */
7014 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7016 long off;
7017 rtx base;
7018 const char *saveop;
7020 if (GET_CODE (XEXP (dest, 0)) == REG)
7022 base = XEXP (dest, 0);
7023 off = 0;
7025 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7026 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7028 base = XEXP (XEXP (dest, 0), 0);
7029 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7031 else
7032 abort ();
7034 if (base == hard_frame_pointer_rtx)
7036 saveop = ".savepsp";
7037 off = - off;
7039 else if (base == stack_pointer_rtx)
7040 saveop = ".savesp";
7041 else
7042 abort ();
7044 src_regno = REGNO (src);
7045 switch (src_regno)
7047 case BR_REG (0):
7048 if (current_frame_info.reg_save_b0 != 0)
7049 abort ();
7050 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7051 return 1;
7053 case PR_REG (0):
7054 if (current_frame_info.reg_save_pr != 0)
7055 abort ();
7056 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7057 return 1;
7059 case AR_LC_REGNUM:
7060 if (current_frame_info.reg_save_ar_lc != 0)
7061 abort ();
7062 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7063 return 1;
7065 case AR_PFS_REGNUM:
7066 if (current_frame_info.reg_save_ar_pfs != 0)
7067 abort ();
7068 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7069 return 1;
7071 case AR_UNAT_REGNUM:
7072 if (current_frame_info.reg_save_ar_unat != 0)
7073 abort ();
7074 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7075 return 1;
7077 case GR_REG (4):
7078 case GR_REG (5):
7079 case GR_REG (6):
7080 case GR_REG (7):
7081 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7082 1 << (src_regno - GR_REG (4)));
7083 return 1;
7085 case BR_REG (1):
7086 case BR_REG (2):
7087 case BR_REG (3):
7088 case BR_REG (4):
7089 case BR_REG (5):
7090 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7091 1 << (src_regno - BR_REG (1)));
7092 return 1;
7094 case FR_REG (2):
7095 case FR_REG (3):
7096 case FR_REG (4):
7097 case FR_REG (5):
7098 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7099 1 << (src_regno - FR_REG (2)));
7100 return 1;
7102 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7103 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7104 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7105 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7106 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7107 1 << (src_regno - FR_REG (12)));
7108 return 1;
7110 default:
7111 return 0;
7115 return 0;
7119 /* This function looks at a single insn and emits any directives
7120 required to unwind this insn. */
7121 void
7122 process_for_unwind_directive (asm_out_file, insn)
7123 FILE *asm_out_file;
7124 rtx insn;
7126 if (flag_unwind_tables
7127 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7129 rtx pat;
7131 if (GET_CODE (insn) == NOTE
7132 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7134 block_num = NOTE_BASIC_BLOCK (insn)->index;
7136 /* Restore unwind state from immediately before the epilogue. */
7137 if (need_copy_state)
7139 fprintf (asm_out_file, "\t.body\n");
7140 fprintf (asm_out_file, "\t.copy_state 1\n");
7141 need_copy_state = 0;
7145 if (! RTX_FRAME_RELATED_P (insn))
7146 return;
7148 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7149 if (pat)
7150 pat = XEXP (pat, 0);
7151 else
7152 pat = PATTERN (insn);
7154 switch (GET_CODE (pat))
7156 case SET:
7157 process_set (asm_out_file, pat);
7158 break;
7160 case PARALLEL:
7162 int par_index;
7163 int limit = XVECLEN (pat, 0);
7164 for (par_index = 0; par_index < limit; par_index++)
7166 rtx x = XVECEXP (pat, 0, par_index);
7167 if (GET_CODE (x) == SET)
7168 process_set (asm_out_file, x);
7170 break;
7173 default:
7174 abort ();
7180 void
7181 ia64_init_builtins ()
7183 tree psi_type_node = build_pointer_type (integer_type_node);
7184 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7185 tree endlink = void_list_node;
7187 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7188 tree si_ftype_psi_si_si
7189 = build_function_type (integer_type_node,
7190 tree_cons (NULL_TREE, psi_type_node,
7191 tree_cons (NULL_TREE, integer_type_node,
7192 tree_cons (NULL_TREE,
7193 integer_type_node,
7194 endlink))));
7196 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
7197 tree di_ftype_pdi_di_di
7198 = build_function_type (long_integer_type_node,
7199 tree_cons (NULL_TREE, pdi_type_node,
7200 tree_cons (NULL_TREE,
7201 long_integer_type_node,
7202 tree_cons (NULL_TREE,
7203 long_integer_type_node,
7204 endlink))));
7205 /* __sync_synchronize */
7206 tree void_ftype_void
7207 = build_function_type (void_type_node, endlink);
7209 /* __sync_lock_test_and_set_si */
7210 tree si_ftype_psi_si
7211 = build_function_type (integer_type_node,
7212 tree_cons (NULL_TREE, psi_type_node,
7213 tree_cons (NULL_TREE, integer_type_node, endlink)));
7215 /* __sync_lock_test_and_set_di */
7216 tree di_ftype_pdi_di
7217 = build_function_type (long_integer_type_node,
7218 tree_cons (NULL_TREE, pdi_type_node,
7219 tree_cons (NULL_TREE, long_integer_type_node,
7220 endlink)));
7222 /* __sync_lock_release_si */
7223 tree void_ftype_psi
7224 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
7225 endlink));
7227 /* __sync_lock_release_di */
7228 tree void_ftype_pdi
7229 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
7230 endlink));
7232 #define def_builtin(name, type, code) \
7233 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
7235 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7236 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7237 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7238 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7239 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7240 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7241 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
7242 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7244 def_builtin ("__sync_synchronize", void_ftype_void,
7245 IA64_BUILTIN_SYNCHRONIZE);
7247 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7248 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7249 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7250 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7251 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7252 IA64_BUILTIN_LOCK_RELEASE_SI);
7253 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7254 IA64_BUILTIN_LOCK_RELEASE_DI);
7256 def_builtin ("__builtin_ia64_bsp",
7257 build_function_type (ptr_type_node, endlink),
7258 IA64_BUILTIN_BSP);
7260 def_builtin ("__builtin_ia64_flushrs",
7261 build_function_type (void_type_node, endlink),
7262 IA64_BUILTIN_FLUSHRS);
7264 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7265 IA64_BUILTIN_FETCH_AND_ADD_SI);
7266 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7267 IA64_BUILTIN_FETCH_AND_SUB_SI);
7268 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7269 IA64_BUILTIN_FETCH_AND_OR_SI);
7270 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7271 IA64_BUILTIN_FETCH_AND_AND_SI);
7272 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7273 IA64_BUILTIN_FETCH_AND_XOR_SI);
7274 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7275 IA64_BUILTIN_FETCH_AND_NAND_SI);
7277 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7278 IA64_BUILTIN_ADD_AND_FETCH_SI);
7279 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7280 IA64_BUILTIN_SUB_AND_FETCH_SI);
7281 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7282 IA64_BUILTIN_OR_AND_FETCH_SI);
7283 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7284 IA64_BUILTIN_AND_AND_FETCH_SI);
7285 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7286 IA64_BUILTIN_XOR_AND_FETCH_SI);
7287 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7288 IA64_BUILTIN_NAND_AND_FETCH_SI);
7290 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7291 IA64_BUILTIN_FETCH_AND_ADD_DI);
7292 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7293 IA64_BUILTIN_FETCH_AND_SUB_DI);
7294 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7295 IA64_BUILTIN_FETCH_AND_OR_DI);
7296 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7297 IA64_BUILTIN_FETCH_AND_AND_DI);
7298 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7299 IA64_BUILTIN_FETCH_AND_XOR_DI);
7300 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7301 IA64_BUILTIN_FETCH_AND_NAND_DI);
7303 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7304 IA64_BUILTIN_ADD_AND_FETCH_DI);
7305 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7306 IA64_BUILTIN_SUB_AND_FETCH_DI);
7307 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7308 IA64_BUILTIN_OR_AND_FETCH_DI);
7309 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7310 IA64_BUILTIN_AND_AND_FETCH_DI);
7311 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7312 IA64_BUILTIN_XOR_AND_FETCH_DI);
7313 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7314 IA64_BUILTIN_NAND_AND_FETCH_DI);
7316 #undef def_builtin
7319 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7322 tmp = [ptr];
7323 do {
7324 ret = tmp;
7325 ar.ccv = tmp;
7326 tmp <op>= value;
7327 cmpxchgsz.acq tmp = [ptr], tmp
7328 } while (tmp != ret)
7331 static rtx
7332 ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7333 optab binoptab;
7334 enum machine_mode mode;
7335 tree arglist;
7336 rtx target;
7338 rtx ret, label, tmp, ccv, insn, mem, value;
7339 tree arg0, arg1;
7341 arg0 = TREE_VALUE (arglist);
7342 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7343 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7344 value = expand_expr (arg1, NULL_RTX, mode, 0);
7346 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7347 MEM_VOLATILE_P (mem) = 1;
7349 if (target && register_operand (target, mode))
7350 ret = target;
7351 else
7352 ret = gen_reg_rtx (mode);
7354 emit_insn (gen_mf ());
7356 /* Special case for fetchadd instructions. */
7357 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7359 if (mode == SImode)
7360 insn = gen_fetchadd_acq_si (ret, mem, value);
7361 else
7362 insn = gen_fetchadd_acq_di (ret, mem, value);
7363 emit_insn (insn);
7364 return ret;
7367 tmp = gen_reg_rtx (mode);
7368 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7369 emit_move_insn (tmp, mem);
7371 label = gen_label_rtx ();
7372 emit_label (label);
7373 emit_move_insn (ret, tmp);
7374 emit_move_insn (ccv, tmp);
7376 /* Perform the specific operation. Special case NAND by noticing
7377 one_cmpl_optab instead. */
7378 if (binoptab == one_cmpl_optab)
7380 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7381 binoptab = and_optab;
7383 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7385 if (mode == SImode)
7386 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7387 else
7388 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7389 emit_insn (insn);
7391 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, 0, label);
7393 return ret;
7396 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7399 tmp = [ptr];
7400 do {
7401 old = tmp;
7402 ar.ccv = tmp;
7403 ret = tmp + value;
7404 cmpxchgsz.acq tmp = [ptr], ret
7405 } while (tmp != old)
7408 static rtx
7409 ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7410 optab binoptab;
7411 enum machine_mode mode;
7412 tree arglist;
7413 rtx target;
7415 rtx old, label, tmp, ret, ccv, insn, mem, value;
7416 tree arg0, arg1;
7418 arg0 = TREE_VALUE (arglist);
7419 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7420 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7421 value = expand_expr (arg1, NULL_RTX, mode, 0);
7423 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7424 MEM_VOLATILE_P (mem) = 1;
7426 if (target && ! register_operand (target, mode))
7427 target = NULL_RTX;
7429 emit_insn (gen_mf ());
7430 tmp = gen_reg_rtx (mode);
7431 old = gen_reg_rtx (mode);
7432 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7434 emit_move_insn (tmp, mem);
7436 label = gen_label_rtx ();
7437 emit_label (label);
7438 emit_move_insn (old, tmp);
7439 emit_move_insn (ccv, tmp);
7441 /* Perform the specific operation. Special case NAND by noticing
7442 one_cmpl_optab instead. */
7443 if (binoptab == one_cmpl_optab)
7445 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7446 binoptab = and_optab;
7448 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
7450 if (mode == SImode)
7451 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
7452 else
7453 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7454 emit_insn (insn);
7456 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, 0, label);
7458 return ret;
7461 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7463 ar.ccv = oldval
7465 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7466 return ret
7468 For bool_ it's the same except return ret == oldval.
7471 static rtx
7472 ia64_expand_compare_and_swap (mode, boolp, arglist, target)
7473 enum machine_mode mode;
7474 int boolp;
7475 tree arglist;
7476 rtx target;
7478 tree arg0, arg1, arg2;
7479 rtx mem, old, new, ccv, tmp, insn;
7481 arg0 = TREE_VALUE (arglist);
7482 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7483 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7484 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7485 old = expand_expr (arg1, NULL_RTX, mode, 0);
7486 new = expand_expr (arg2, NULL_RTX, mode, 0);
7488 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7489 MEM_VOLATILE_P (mem) = 1;
7491 if (! register_operand (old, mode))
7492 old = copy_to_mode_reg (mode, old);
7493 if (! register_operand (new, mode))
7494 new = copy_to_mode_reg (mode, new);
7496 if (! boolp && target && register_operand (target, mode))
7497 tmp = target;
7498 else
7499 tmp = gen_reg_rtx (mode);
7501 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7502 emit_move_insn (ccv, old);
7503 emit_insn (gen_mf ());
7504 if (mode == SImode)
7505 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7506 else
7507 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7508 emit_insn (insn);
7510 if (boolp)
7512 if (! target)
7513 target = gen_reg_rtx (mode);
7514 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
7516 else
7517 return tmp;
7520 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7522 static rtx
7523 ia64_expand_lock_test_and_set (mode, arglist, target)
7524 enum machine_mode mode;
7525 tree arglist;
7526 rtx target;
7528 tree arg0, arg1;
7529 rtx mem, new, ret, insn;
7531 arg0 = TREE_VALUE (arglist);
7532 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7533 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7534 new = expand_expr (arg1, NULL_RTX, mode, 0);
7536 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7537 MEM_VOLATILE_P (mem) = 1;
7538 if (! register_operand (new, mode))
7539 new = copy_to_mode_reg (mode, new);
7541 if (target && register_operand (target, mode))
7542 ret = target;
7543 else
7544 ret = gen_reg_rtx (mode);
7546 if (mode == SImode)
7547 insn = gen_xchgsi (ret, mem, new);
7548 else
7549 insn = gen_xchgdi (ret, mem, new);
7550 emit_insn (insn);
7552 return ret;
7555 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7557 static rtx
7558 ia64_expand_lock_release (mode, arglist, target)
7559 enum machine_mode mode;
7560 tree arglist;
7561 rtx target ATTRIBUTE_UNUSED;
7563 tree arg0;
7564 rtx mem;
7566 arg0 = TREE_VALUE (arglist);
7567 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7569 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7570 MEM_VOLATILE_P (mem) = 1;
7572 emit_move_insn (mem, const0_rtx);
7574 return const0_rtx;
7578 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
7579 tree exp;
7580 rtx target;
7581 rtx subtarget ATTRIBUTE_UNUSED;
7582 enum machine_mode mode ATTRIBUTE_UNUSED;
7583 int ignore ATTRIBUTE_UNUSED;
7585 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7586 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
7587 tree arglist = TREE_OPERAND (exp, 1);
7589 switch (fcode)
7591 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7592 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7593 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7594 case IA64_BUILTIN_LOCK_RELEASE_SI:
7595 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7596 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7597 case IA64_BUILTIN_FETCH_AND_OR_SI:
7598 case IA64_BUILTIN_FETCH_AND_AND_SI:
7599 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7600 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7601 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7602 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7603 case IA64_BUILTIN_OR_AND_FETCH_SI:
7604 case IA64_BUILTIN_AND_AND_FETCH_SI:
7605 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7606 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7607 mode = SImode;
7608 break;
7610 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7611 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7612 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7613 case IA64_BUILTIN_LOCK_RELEASE_DI:
7614 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7615 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7616 case IA64_BUILTIN_FETCH_AND_OR_DI:
7617 case IA64_BUILTIN_FETCH_AND_AND_DI:
7618 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7619 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7620 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7621 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7622 case IA64_BUILTIN_OR_AND_FETCH_DI:
7623 case IA64_BUILTIN_AND_AND_FETCH_DI:
7624 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7625 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7626 mode = DImode;
7627 break;
7629 default:
7630 break;
7633 switch (fcode)
7635 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7636 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7637 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
7639 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7640 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7641 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
7643 case IA64_BUILTIN_SYNCHRONIZE:
7644 emit_insn (gen_mf ());
7645 return const0_rtx;
7647 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7648 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7649 return ia64_expand_lock_test_and_set (mode, arglist, target);
7651 case IA64_BUILTIN_LOCK_RELEASE_SI:
7652 case IA64_BUILTIN_LOCK_RELEASE_DI:
7653 return ia64_expand_lock_release (mode, arglist, target);
7655 case IA64_BUILTIN_BSP:
7656 if (! target || ! register_operand (target, DImode))
7657 target = gen_reg_rtx (DImode);
7658 emit_insn (gen_bsp_value (target));
7659 return target;
7661 case IA64_BUILTIN_FLUSHRS:
7662 emit_insn (gen_flushrs ());
7663 return const0_rtx;
7665 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7666 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7667 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
7669 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7670 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7671 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
7673 case IA64_BUILTIN_FETCH_AND_OR_SI:
7674 case IA64_BUILTIN_FETCH_AND_OR_DI:
7675 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
7677 case IA64_BUILTIN_FETCH_AND_AND_SI:
7678 case IA64_BUILTIN_FETCH_AND_AND_DI:
7679 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
7681 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7682 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7683 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
7685 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7686 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7687 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
7689 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7690 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7691 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
7693 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7694 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7695 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
7697 case IA64_BUILTIN_OR_AND_FETCH_SI:
7698 case IA64_BUILTIN_OR_AND_FETCH_DI:
7699 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
7701 case IA64_BUILTIN_AND_AND_FETCH_SI:
7702 case IA64_BUILTIN_AND_AND_FETCH_DI:
7703 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
7705 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7706 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7707 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
7709 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7710 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7711 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
7713 default:
7714 break;
7717 return NULL_RTX;