2002-05-03 David S. Miller <davem@redhat.com>
[official-gcc.git] / gcc / config / ia64 / ia64.c
blob0520db4d20c750b79caae3657fa8e228bdfe2085
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "recog.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "obstack.h"
39 #include "except.h"
40 #include "function.h"
41 #include "ggc.h"
42 #include "basic-block.h"
43 #include "toplev.h"
44 #include "sched-int.h"
45 #include "timevar.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "tm_p.h"
50 /* This is used for communication between ASM_OUTPUT_LABEL and
51 ASM_OUTPUT_LABELREF. */
52 int ia64_asm_output_label = 0;
54 /* Define the information needed to generate branch and scc insns. This is
55 stored from the compare operation. */
56 struct rtx_def * ia64_compare_op0;
57 struct rtx_def * ia64_compare_op1;
59 /* Register names for ia64_expand_prologue. */
60 static const char * const ia64_reg_numbers[96] =
61 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
62 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
63 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
64 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
65 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
66 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
67 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
68 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
69 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
70 "r104","r105","r106","r107","r108","r109","r110","r111",
71 "r112","r113","r114","r115","r116","r117","r118","r119",
72 "r120","r121","r122","r123","r124","r125","r126","r127"};
74 /* ??? These strings could be shared with REGISTER_NAMES. */
75 static const char * const ia64_input_reg_names[8] =
76 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
78 /* ??? These strings could be shared with REGISTER_NAMES. */
79 static const char * const ia64_local_reg_names[80] =
80 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
81 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
82 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
83 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
84 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
85 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
86 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
87 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
88 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
89 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
91 /* ??? These strings could be shared with REGISTER_NAMES. */
92 static const char * const ia64_output_reg_names[8] =
93 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
95 /* String used with the -mfixed-range= option. */
96 const char *ia64_fixed_range_string;
98 /* Determines whether we run our final scheduling pass or not. We always
99 avoid the normal second scheduling pass. */
100 static int ia64_flag_schedule_insns2;
102 /* Variables which are this size or smaller are put in the sdata/sbss
103 sections. */
105 unsigned int ia64_section_threshold;
107 static int find_gr_spill PARAMS ((int));
108 static int next_scratch_gr_reg PARAMS ((void));
109 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
110 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
111 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
112 static void finish_spill_pointers PARAMS ((void));
113 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
114 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
115 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
116 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
117 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
118 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
120 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
121 static void fix_range PARAMS ((const char *));
122 static void ia64_add_gc_roots PARAMS ((void));
123 static void ia64_init_machine_status PARAMS ((struct function *));
124 static void ia64_mark_machine_status PARAMS ((struct function *));
125 static void ia64_free_machine_status PARAMS ((struct function *));
126 static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
127 static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
128 static void emit_predicate_relation_info PARAMS ((void));
129 static void process_epilogue PARAMS ((void));
130 static int process_set PARAMS ((FILE *, rtx));
132 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
133 tree, rtx));
134 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
135 tree, rtx));
136 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
137 tree, rtx));
138 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
139 tree, rtx));
140 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
141 static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int));
142 static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
143 static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
144 static void ia64_output_function_end_prologue PARAMS ((FILE *));
146 static int ia64_issue_rate PARAMS ((void));
147 static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
148 static void ia64_sched_init PARAMS ((FILE *, int, int));
149 static void ia64_sched_finish PARAMS ((FILE *, int));
150 static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *,
151 int *, int, int));
152 static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
153 static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
154 static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
157 /* Table of valid machine attributes. */
158 static const struct attribute_spec ia64_attribute_table[] =
160 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
161 { "syscall_linkage", 0, 0, false, true, true, NULL },
162 { NULL, 0, 0, false, false, false, NULL }
165 /* Initialize the GCC target structure. */
166 #undef TARGET_ATTRIBUTE_TABLE
167 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
169 #undef TARGET_INIT_BUILTINS
170 #define TARGET_INIT_BUILTINS ia64_init_builtins
172 #undef TARGET_EXPAND_BUILTIN
173 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
175 #undef TARGET_ASM_BYTE_OP
176 #define TARGET_ASM_BYTE_OP "\tdata1\t"
177 #undef TARGET_ASM_ALIGNED_HI_OP
178 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
179 #undef TARGET_ASM_ALIGNED_SI_OP
180 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
181 #undef TARGET_ASM_ALIGNED_DI_OP
182 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
183 #undef TARGET_ASM_UNALIGNED_HI_OP
184 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
185 #undef TARGET_ASM_UNALIGNED_SI_OP
186 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
187 #undef TARGET_ASM_UNALIGNED_DI_OP
188 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
189 #undef TARGET_ASM_INTEGER
190 #define TARGET_ASM_INTEGER ia64_assemble_integer
192 #undef TARGET_ASM_FUNCTION_PROLOGUE
193 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
194 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
195 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
196 #undef TARGET_ASM_FUNCTION_EPILOGUE
197 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
199 #undef TARGET_SCHED_ADJUST_COST
200 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
201 #undef TARGET_SCHED_ISSUE_RATE
202 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
203 #undef TARGET_SCHED_VARIABLE_ISSUE
204 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
205 #undef TARGET_SCHED_INIT
206 #define TARGET_SCHED_INIT ia64_sched_init
207 #undef TARGET_SCHED_FINISH
208 #define TARGET_SCHED_FINISH ia64_sched_finish
209 #undef TARGET_SCHED_REORDER
210 #define TARGET_SCHED_REORDER ia64_sched_reorder
211 #undef TARGET_SCHED_REORDER2
212 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
214 struct gcc_target targetm = TARGET_INITIALIZER;
216 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
219 call_operand (op, mode)
220 rtx op;
221 enum machine_mode mode;
223 if (mode != GET_MODE (op))
224 return 0;
226 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
227 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
230 /* Return 1 if OP refers to a symbol in the sdata section. */
233 sdata_symbolic_operand (op, mode)
234 rtx op;
235 enum machine_mode mode ATTRIBUTE_UNUSED;
237 switch (GET_CODE (op))
239 case CONST:
240 if (GET_CODE (XEXP (op, 0)) != PLUS
241 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
242 break;
243 op = XEXP (XEXP (op, 0), 0);
244 /* FALLTHRU */
246 case SYMBOL_REF:
247 if (CONSTANT_POOL_ADDRESS_P (op))
248 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
249 else
250 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
252 default:
253 break;
256 return 0;
259 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
262 got_symbolic_operand (op, mode)
263 rtx op;
264 enum machine_mode mode ATTRIBUTE_UNUSED;
266 switch (GET_CODE (op))
268 case CONST:
269 op = XEXP (op, 0);
270 if (GET_CODE (op) != PLUS)
271 return 0;
272 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
273 return 0;
274 op = XEXP (op, 1);
275 if (GET_CODE (op) != CONST_INT)
276 return 0;
278 return 1;
280 /* Ok if we're not using GOT entries at all. */
281 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
282 return 1;
284 /* "Ok" while emitting rtl, since otherwise we won't be provided
285 with the entire offset during emission, which makes it very
286 hard to split the offset into high and low parts. */
287 if (rtx_equal_function_value_matters)
288 return 1;
290 /* Force the low 14 bits of the constant to zero so that we do not
291 use up so many GOT entries. */
292 return (INTVAL (op) & 0x3fff) == 0;
294 case SYMBOL_REF:
295 case LABEL_REF:
296 return 1;
298 default:
299 break;
301 return 0;
304 /* Return 1 if OP refers to a symbol. */
307 symbolic_operand (op, mode)
308 rtx op;
309 enum machine_mode mode ATTRIBUTE_UNUSED;
311 switch (GET_CODE (op))
313 case CONST:
314 case SYMBOL_REF:
315 case LABEL_REF:
316 return 1;
318 default:
319 break;
321 return 0;
324 /* Return 1 if OP refers to a function. */
327 function_operand (op, mode)
328 rtx op;
329 enum machine_mode mode ATTRIBUTE_UNUSED;
331 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
332 return 1;
333 else
334 return 0;
337 /* Return 1 if OP is setjmp or a similar function. */
339 /* ??? This is an unsatisfying solution. Should rethink. */
342 setjmp_operand (op, mode)
343 rtx op;
344 enum machine_mode mode ATTRIBUTE_UNUSED;
346 const char *name;
347 int retval = 0;
349 if (GET_CODE (op) != SYMBOL_REF)
350 return 0;
352 name = XSTR (op, 0);
354 /* The following code is borrowed from special_function_p in calls.c. */
356 /* Disregard prefix _, __ or __x. */
357 if (name[0] == '_')
359 if (name[1] == '_' && name[2] == 'x')
360 name += 3;
361 else if (name[1] == '_')
362 name += 2;
363 else
364 name += 1;
367 if (name[0] == 's')
369 retval
370 = ((name[1] == 'e'
371 && (! strcmp (name, "setjmp")
372 || ! strcmp (name, "setjmp_syscall")))
373 || (name[1] == 'i'
374 && ! strcmp (name, "sigsetjmp"))
375 || (name[1] == 'a'
376 && ! strcmp (name, "savectx")));
378 else if ((name[0] == 'q' && name[1] == 's'
379 && ! strcmp (name, "qsetjmp"))
380 || (name[0] == 'v' && name[1] == 'f'
381 && ! strcmp (name, "vfork")))
382 retval = 1;
384 return retval;
387 /* Return 1 if OP is a general operand, but when pic exclude symbolic
388 operands. */
390 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
391 from PREDICATE_CODES. */
394 move_operand (op, mode)
395 rtx op;
396 enum machine_mode mode;
398 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
399 return 0;
401 return general_operand (op, mode);
404 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
407 gr_register_operand (op, mode)
408 rtx op;
409 enum machine_mode mode;
411 if (! register_operand (op, mode))
412 return 0;
413 if (GET_CODE (op) == SUBREG)
414 op = SUBREG_REG (op);
415 if (GET_CODE (op) == REG)
417 unsigned int regno = REGNO (op);
418 if (regno < FIRST_PSEUDO_REGISTER)
419 return GENERAL_REGNO_P (regno);
421 return 1;
424 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
427 fr_register_operand (op, mode)
428 rtx op;
429 enum machine_mode mode;
431 if (! register_operand (op, mode))
432 return 0;
433 if (GET_CODE (op) == SUBREG)
434 op = SUBREG_REG (op);
435 if (GET_CODE (op) == REG)
437 unsigned int regno = REGNO (op);
438 if (regno < FIRST_PSEUDO_REGISTER)
439 return FR_REGNO_P (regno);
441 return 1;
444 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
447 grfr_register_operand (op, mode)
448 rtx op;
449 enum machine_mode mode;
451 if (! register_operand (op, mode))
452 return 0;
453 if (GET_CODE (op) == SUBREG)
454 op = SUBREG_REG (op);
455 if (GET_CODE (op) == REG)
457 unsigned int regno = REGNO (op);
458 if (regno < FIRST_PSEUDO_REGISTER)
459 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
461 return 1;
464 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
467 gr_nonimmediate_operand (op, mode)
468 rtx op;
469 enum machine_mode mode;
471 if (! nonimmediate_operand (op, mode))
472 return 0;
473 if (GET_CODE (op) == SUBREG)
474 op = SUBREG_REG (op);
475 if (GET_CODE (op) == REG)
477 unsigned int regno = REGNO (op);
478 if (regno < FIRST_PSEUDO_REGISTER)
479 return GENERAL_REGNO_P (regno);
481 return 1;
484 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
487 fr_nonimmediate_operand (op, mode)
488 rtx op;
489 enum machine_mode mode;
491 if (! nonimmediate_operand (op, mode))
492 return 0;
493 if (GET_CODE (op) == SUBREG)
494 op = SUBREG_REG (op);
495 if (GET_CODE (op) == REG)
497 unsigned int regno = REGNO (op);
498 if (regno < FIRST_PSEUDO_REGISTER)
499 return FR_REGNO_P (regno);
501 return 1;
504 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
507 grfr_nonimmediate_operand (op, mode)
508 rtx op;
509 enum machine_mode mode;
511 if (! nonimmediate_operand (op, mode))
512 return 0;
513 if (GET_CODE (op) == SUBREG)
514 op = SUBREG_REG (op);
515 if (GET_CODE (op) == REG)
517 unsigned int regno = REGNO (op);
518 if (regno < FIRST_PSEUDO_REGISTER)
519 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
521 return 1;
524 /* Return 1 if OP is a GR register operand, or zero. */
527 gr_reg_or_0_operand (op, mode)
528 rtx op;
529 enum machine_mode mode;
531 return (op == const0_rtx || gr_register_operand (op, mode));
534 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
537 gr_reg_or_5bit_operand (op, mode)
538 rtx op;
539 enum machine_mode mode;
541 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
542 || GET_CODE (op) == CONSTANT_P_RTX
543 || gr_register_operand (op, mode));
546 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
549 gr_reg_or_6bit_operand (op, mode)
550 rtx op;
551 enum machine_mode mode;
553 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
554 || GET_CODE (op) == CONSTANT_P_RTX
555 || gr_register_operand (op, mode));
558 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
561 gr_reg_or_8bit_operand (op, mode)
562 rtx op;
563 enum machine_mode mode;
565 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
566 || GET_CODE (op) == CONSTANT_P_RTX
567 || gr_register_operand (op, mode));
570 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
573 grfr_reg_or_8bit_operand (op, mode)
574 rtx op;
575 enum machine_mode mode;
577 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
578 || GET_CODE (op) == CONSTANT_P_RTX
579 || grfr_register_operand (op, mode));
582 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
583 operand. */
586 gr_reg_or_8bit_adjusted_operand (op, mode)
587 rtx op;
588 enum machine_mode mode;
590 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
591 || GET_CODE (op) == CONSTANT_P_RTX
592 || gr_register_operand (op, mode));
595 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
596 immediate and an 8 bit adjusted immediate operand. This is necessary
597 because when we emit a compare, we don't know what the condition will be,
598 so we need the union of the immediates accepted by GT and LT. */
601 gr_reg_or_8bit_and_adjusted_operand (op, mode)
602 rtx op;
603 enum machine_mode mode;
605 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
606 && CONST_OK_FOR_L (INTVAL (op)))
607 || GET_CODE (op) == CONSTANT_P_RTX
608 || gr_register_operand (op, mode));
611 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
614 gr_reg_or_14bit_operand (op, mode)
615 rtx op;
616 enum machine_mode mode;
618 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
619 || GET_CODE (op) == CONSTANT_P_RTX
620 || gr_register_operand (op, mode));
623 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
626 gr_reg_or_22bit_operand (op, mode)
627 rtx op;
628 enum machine_mode mode;
630 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
631 || GET_CODE (op) == CONSTANT_P_RTX
632 || gr_register_operand (op, mode));
635 /* Return 1 if OP is a 6 bit immediate operand. */
638 shift_count_operand (op, mode)
639 rtx op;
640 enum machine_mode mode ATTRIBUTE_UNUSED;
642 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
643 || GET_CODE (op) == CONSTANT_P_RTX);
646 /* Return 1 if OP is a 5 bit immediate operand. */
649 shift_32bit_count_operand (op, mode)
650 rtx op;
651 enum machine_mode mode ATTRIBUTE_UNUSED;
653 return ((GET_CODE (op) == CONST_INT
654 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
655 || GET_CODE (op) == CONSTANT_P_RTX);
658 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
661 shladd_operand (op, mode)
662 rtx op;
663 enum machine_mode mode ATTRIBUTE_UNUSED;
665 return (GET_CODE (op) == CONST_INT
666 && (INTVAL (op) == 2 || INTVAL (op) == 4
667 || INTVAL (op) == 8 || INTVAL (op) == 16));
670 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
673 fetchadd_operand (op, mode)
674 rtx op;
675 enum machine_mode mode ATTRIBUTE_UNUSED;
677 return (GET_CODE (op) == CONST_INT
678 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
679 INTVAL (op) == -4 || INTVAL (op) == -1 ||
680 INTVAL (op) == 1 || INTVAL (op) == 4 ||
681 INTVAL (op) == 8 || INTVAL (op) == 16));
684 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
687 fr_reg_or_fp01_operand (op, mode)
688 rtx op;
689 enum machine_mode mode;
691 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
692 || fr_register_operand (op, mode));
695 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
696 POST_MODIFY with a REG as displacement. */
699 destination_operand (op, mode)
700 rtx op;
701 enum machine_mode mode;
703 if (! nonimmediate_operand (op, mode))
704 return 0;
705 if (GET_CODE (op) == MEM
706 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
707 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
708 return 0;
709 return 1;
712 /* Like memory_operand, but don't allow post-increments. */
715 not_postinc_memory_operand (op, mode)
716 rtx op;
717 enum machine_mode mode;
719 return (memory_operand (op, mode)
720 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
723 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
724 signed immediate operand. */
727 normal_comparison_operator (op, mode)
728 register rtx op;
729 enum machine_mode mode;
731 enum rtx_code code = GET_CODE (op);
732 return ((mode == VOIDmode || GET_MODE (op) == mode)
733 && (code == EQ || code == NE
734 || code == GT || code == LE || code == GTU || code == LEU));
737 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
738 signed immediate operand. */
741 adjusted_comparison_operator (op, mode)
742 register rtx op;
743 enum machine_mode mode;
745 enum rtx_code code = GET_CODE (op);
746 return ((mode == VOIDmode || GET_MODE (op) == mode)
747 && (code == LT || code == GE || code == LTU || code == GEU));
750 /* Return 1 if this is a signed inequality operator. */
753 signed_inequality_operator (op, mode)
754 register rtx op;
755 enum machine_mode mode;
757 enum rtx_code code = GET_CODE (op);
758 return ((mode == VOIDmode || GET_MODE (op) == mode)
759 && (code == GE || code == GT
760 || code == LE || code == LT));
763 /* Return 1 if this operator is valid for predication. */
766 predicate_operator (op, mode)
767 register rtx op;
768 enum machine_mode mode;
770 enum rtx_code code = GET_CODE (op);
771 return ((GET_MODE (op) == mode || mode == VOIDmode)
772 && (code == EQ || code == NE));
775 /* Return 1 if this operator can be used in a conditional operation. */
778 condop_operator (op, mode)
779 register rtx op;
780 enum machine_mode mode;
782 enum rtx_code code = GET_CODE (op);
783 return ((GET_MODE (op) == mode || mode == VOIDmode)
784 && (code == PLUS || code == MINUS || code == AND
785 || code == IOR || code == XOR));
788 /* Return 1 if this is the ar.lc register. */
791 ar_lc_reg_operand (op, mode)
792 register rtx op;
793 enum machine_mode mode;
795 return (GET_MODE (op) == DImode
796 && (mode == DImode || mode == VOIDmode)
797 && GET_CODE (op) == REG
798 && REGNO (op) == AR_LC_REGNUM);
801 /* Return 1 if this is the ar.ccv register. */
804 ar_ccv_reg_operand (op, mode)
805 register rtx op;
806 enum machine_mode mode;
808 return ((GET_MODE (op) == mode || mode == VOIDmode)
809 && GET_CODE (op) == REG
810 && REGNO (op) == AR_CCV_REGNUM);
813 /* Return 1 if this is the ar.pfs register. */
816 ar_pfs_reg_operand (op, mode)
817 register rtx op;
818 enum machine_mode mode;
820 return ((GET_MODE (op) == mode || mode == VOIDmode)
821 && GET_CODE (op) == REG
822 && REGNO (op) == AR_PFS_REGNUM);
825 /* Like general_operand, but don't allow (mem (addressof)). */
828 general_tfmode_operand (op, mode)
829 rtx op;
830 enum machine_mode mode;
832 if (! general_operand (op, mode))
833 return 0;
834 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
835 return 0;
836 return 1;
839 /* Similarly. */
842 destination_tfmode_operand (op, mode)
843 rtx op;
844 enum machine_mode mode;
846 if (! destination_operand (op, mode))
847 return 0;
848 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
849 return 0;
850 return 1;
853 /* Similarly. */
856 tfreg_or_fp01_operand (op, mode)
857 rtx op;
858 enum machine_mode mode;
860 if (GET_CODE (op) == SUBREG)
861 return 0;
862 return fr_reg_or_fp01_operand (op, mode);
865 /* Return 1 if OP is valid as a base register in a reg + offset address. */
868 basereg_operand (op, mode)
869 rtx op;
870 enum machine_mode mode;
872 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
873 checks from pa.c basereg_operand as well? Seems to be OK without them
874 in test runs. */
876 return (register_operand (op, mode) &&
877 REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
880 /* Return 1 if the operands of a move are ok. */
883 ia64_move_ok (dst, src)
884 rtx dst, src;
886 /* If we're under init_recog_no_volatile, we'll not be able to use
887 memory_operand. So check the code directly and don't worry about
888 the validity of the underlying address, which should have been
889 checked elsewhere anyway. */
890 if (GET_CODE (dst) != MEM)
891 return 1;
892 if (GET_CODE (src) == MEM)
893 return 0;
894 if (register_operand (src, VOIDmode))
895 return 1;
897 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
898 if (INTEGRAL_MODE_P (GET_MODE (dst)))
899 return src == const0_rtx;
900 else
901 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
904 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
905 Return the length of the field, or <= 0 on failure. */
908 ia64_depz_field_mask (rop, rshift)
909 rtx rop, rshift;
911 unsigned HOST_WIDE_INT op = INTVAL (rop);
912 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
914 /* Get rid of the zero bits we're shifting in. */
915 op >>= shift;
917 /* We must now have a solid block of 1's at bit 0. */
918 return exact_log2 (op + 1);
921 /* Expand a symbolic constant load. */
922 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
924 void
925 ia64_expand_load_address (dest, src, scratch)
926 rtx dest, src, scratch;
928 rtx temp;
930 /* The destination could be a MEM during initial rtl generation,
931 which isn't a valid destination for the PIC load address patterns. */
932 if (! register_operand (dest, DImode))
933 temp = gen_reg_rtx (DImode);
934 else
935 temp = dest;
937 if (TARGET_AUTO_PIC)
938 emit_insn (gen_load_gprel64 (temp, src));
939 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
940 emit_insn (gen_load_fptr (temp, src));
941 else if (sdata_symbolic_operand (src, DImode))
942 emit_insn (gen_load_gprel (temp, src));
943 else if (GET_CODE (src) == CONST
944 && GET_CODE (XEXP (src, 0)) == PLUS
945 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
946 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
948 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
949 rtx sym = XEXP (XEXP (src, 0), 0);
950 HOST_WIDE_INT ofs, hi, lo;
952 /* Split the offset into a sign extended 14-bit low part
953 and a complementary high part. */
954 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
955 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
956 hi = ofs - lo;
958 if (! scratch)
959 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
961 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
962 scratch));
963 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
965 else
967 rtx insn;
968 if (! scratch)
969 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
971 insn = emit_insn (gen_load_symptr (temp, src, scratch));
972 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
975 if (temp != dest)
976 emit_move_insn (dest, temp);
980 ia64_gp_save_reg (setjmp_p)
981 int setjmp_p;
983 rtx save = cfun->machine->ia64_gp_save;
985 if (save != NULL)
987 /* We can't save GP in a pseudo if we are calling setjmp, because
988 pseudos won't be restored by longjmp. For now, we save it in r4. */
989 /* ??? It would be more efficient to save this directly into a stack
990 slot. Unfortunately, the stack slot address gets cse'd across
991 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
992 place. */
994 /* ??? Get the barf bag, Virginia. We've got to replace this thing
995 in place, since this rtx is used in exception handling receivers.
996 Moreover, we must get this rtx out of regno_reg_rtx or reload
997 will do the wrong thing. */
998 unsigned int old_regno = REGNO (save);
999 if (setjmp_p && old_regno != GR_REG (4))
1001 REGNO (save) = GR_REG (4);
1002 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
1005 else
1007 if (setjmp_p)
1008 save = gen_rtx_REG (DImode, GR_REG (4));
1009 else if (! optimize)
1010 save = gen_rtx_REG (DImode, LOC_REG (0));
1011 else
1012 save = gen_reg_rtx (DImode);
1013 cfun->machine->ia64_gp_save = save;
1016 return save;
1019 /* Split a post-reload TImode reference into two DImode components. */
1022 ia64_split_timode (out, in, scratch)
1023 rtx out[2];
1024 rtx in, scratch;
1026 switch (GET_CODE (in))
1028 case REG:
1029 out[0] = gen_rtx_REG (DImode, REGNO (in));
1030 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
1031 return NULL_RTX;
1033 case MEM:
1035 rtx base = XEXP (in, 0);
1037 switch (GET_CODE (base))
1039 case REG:
1040 out[0] = adjust_address (in, DImode, 0);
1041 break;
1042 case POST_MODIFY:
1043 base = XEXP (base, 0);
1044 out[0] = adjust_address (in, DImode, 0);
1045 break;
1047 /* Since we're changing the mode, we need to change to POST_MODIFY
1048 as well to preserve the size of the increment. Either that or
1049 do the update in two steps, but we've already got this scratch
1050 register handy so let's use it. */
1051 case POST_INC:
1052 base = XEXP (base, 0);
1053 out[0]
1054 = change_address (in, DImode,
1055 gen_rtx_POST_MODIFY
1056 (Pmode, base, plus_constant (base, 16)));
1057 break;
1058 case POST_DEC:
1059 base = XEXP (base, 0);
1060 out[0]
1061 = change_address (in, DImode,
1062 gen_rtx_POST_MODIFY
1063 (Pmode, base, plus_constant (base, -16)));
1064 break;
1065 default:
1066 abort ();
1069 if (scratch == NULL_RTX)
1070 abort ();
1071 out[1] = change_address (in, DImode, scratch);
1072 return gen_adddi3 (scratch, base, GEN_INT (8));
1075 case CONST_INT:
1076 case CONST_DOUBLE:
1077 split_double (in, &out[0], &out[1]);
1078 return NULL_RTX;
1080 default:
1081 abort ();
1085 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1086 through memory plus an extra GR scratch register. Except that you can
1087 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1088 SECONDARY_RELOAD_CLASS, but not both.
1090 We got into problems in the first place by allowing a construct like
1091 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1092 This solution attempts to prevent this situation from occurring. When
1093 we see something like the above, we spill the inner register to memory. */
1096 spill_tfmode_operand (in, force)
1097 rtx in;
1098 int force;
1100 if (GET_CODE (in) == SUBREG
1101 && GET_MODE (SUBREG_REG (in)) == TImode
1102 && GET_CODE (SUBREG_REG (in)) == REG)
1104 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
1105 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1107 else if (force && GET_CODE (in) == REG)
1109 rtx mem = gen_mem_addressof (in, NULL_TREE);
1110 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1112 else if (GET_CODE (in) == MEM
1113 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1114 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
1115 else
1116 return in;
1119 /* Emit comparison instruction if necessary, returning the expression
1120 that holds the compare result in the proper mode. */
1123 ia64_expand_compare (code, mode)
1124 enum rtx_code code;
1125 enum machine_mode mode;
1127 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1128 rtx cmp;
1130 /* If we have a BImode input, then we already have a compare result, and
1131 do not need to emit another comparison. */
1132 if (GET_MODE (op0) == BImode)
1134 if ((code == NE || code == EQ) && op1 == const0_rtx)
1135 cmp = op0;
1136 else
1137 abort ();
1139 else
1141 cmp = gen_reg_rtx (BImode);
1142 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1143 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1144 code = NE;
1147 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1150 /* Emit the appropriate sequence for a call. */
1152 void
1153 ia64_expand_call (retval, addr, nextarg, sibcall_p)
1154 rtx retval;
1155 rtx addr;
1156 rtx nextarg;
1157 int sibcall_p;
1159 rtx insn, b0, pfs, gp_save, narg_rtx, dest;
1160 bool indirect_p;
1161 int narg;
1163 addr = XEXP (addr, 0);
1164 b0 = gen_rtx_REG (DImode, R_BR (0));
1165 pfs = gen_rtx_REG (DImode, AR_PFS_REGNUM);
1167 if (! nextarg)
1168 narg = 0;
1169 else if (IN_REGNO_P (REGNO (nextarg)))
1170 narg = REGNO (nextarg) - IN_REG (0);
1171 else
1172 narg = REGNO (nextarg) - OUT_REG (0);
1173 narg_rtx = GEN_INT (narg);
1175 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1177 if (sibcall_p)
1178 insn = gen_sibcall_nopic (addr, narg_rtx, b0, pfs);
1179 else if (! retval)
1180 insn = gen_call_nopic (addr, narg_rtx, b0);
1181 else
1182 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1183 emit_call_insn (insn);
1184 return;
1187 indirect_p = ! symbolic_operand (addr, VOIDmode);
1189 if (sibcall_p || (TARGET_CONST_GP && !indirect_p))
1190 gp_save = NULL_RTX;
1191 else
1192 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1194 if (gp_save)
1195 emit_move_insn (gp_save, pic_offset_table_rtx);
1197 /* If this is an indirect call, then we have the address of a descriptor. */
1198 if (indirect_p)
1200 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1201 emit_move_insn (pic_offset_table_rtx,
1202 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
1204 else
1205 dest = addr;
1207 if (sibcall_p)
1208 insn = gen_sibcall_pic (dest, narg_rtx, b0, pfs);
1209 else if (! retval)
1210 insn = gen_call_pic (dest, narg_rtx, b0);
1211 else
1212 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1213 emit_call_insn (insn);
1215 if (gp_save)
1216 emit_move_insn (pic_offset_table_rtx, gp_save);
1219 /* Begin the assembly file. */
1221 void
1222 emit_safe_across_calls (f)
1223 FILE *f;
1225 unsigned int rs, re;
1226 int out_state;
1228 rs = 1;
1229 out_state = 0;
1230 while (1)
1232 while (rs < 64 && call_used_regs[PR_REG (rs)])
1233 rs++;
1234 if (rs >= 64)
1235 break;
1236 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1237 continue;
1238 if (out_state == 0)
1240 fputs ("\t.pred.safe_across_calls ", f);
1241 out_state = 1;
1243 else
1244 fputc (',', f);
1245 if (re == rs + 1)
1246 fprintf (f, "p%u", rs);
1247 else
1248 fprintf (f, "p%u-p%u", rs, re - 1);
1249 rs = re + 1;
1251 if (out_state)
1252 fputc ('\n', f);
1256 /* Structure to be filled in by ia64_compute_frame_size with register
1257 save masks and offsets for the current function. */
1259 struct ia64_frame_info
1261 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1262 the caller's scratch area. */
1263 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1264 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1265 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
1266 HARD_REG_SET mask; /* mask of saved registers. */
1267 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1268 registers or long-term scratches. */
1269 int n_spilled; /* number of spilled registers. */
1270 int reg_fp; /* register for fp. */
1271 int reg_save_b0; /* save register for b0. */
1272 int reg_save_pr; /* save register for prs. */
1273 int reg_save_ar_pfs; /* save register for ar.pfs. */
1274 int reg_save_ar_unat; /* save register for ar.unat. */
1275 int reg_save_ar_lc; /* save register for ar.lc. */
1276 int n_input_regs; /* number of input registers used. */
1277 int n_local_regs; /* number of local registers used. */
1278 int n_output_regs; /* number of output registers used. */
1279 int n_rotate_regs; /* number of rotating registers used. */
1281 char need_regstk; /* true if a .regstk directive needed. */
1282 char initialized; /* true if the data is finalized. */
1285 /* Current frame information calculated by ia64_compute_frame_size. */
1286 static struct ia64_frame_info current_frame_info;
1288 /* Helper function for ia64_compute_frame_size: find an appropriate general
1289 register to spill some special register to. SPECIAL_SPILL_MASK contains
1290 bits in GR0 to GR31 that have already been allocated by this routine.
1291 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1293 static int
1294 find_gr_spill (try_locals)
1295 int try_locals;
1297 int regno;
1299 /* If this is a leaf function, first try an otherwise unused
1300 call-clobbered register. */
1301 if (current_function_is_leaf)
1303 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1304 if (! regs_ever_live[regno]
1305 && call_used_regs[regno]
1306 && ! fixed_regs[regno]
1307 && ! global_regs[regno]
1308 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1310 current_frame_info.gr_used_mask |= 1 << regno;
1311 return regno;
1315 if (try_locals)
1317 regno = current_frame_info.n_local_regs;
1318 /* If there is a frame pointer, then we can't use loc79, because
1319 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1320 reg_name switching code in ia64_expand_prologue. */
1321 if (regno < (80 - frame_pointer_needed))
1323 current_frame_info.n_local_regs = regno + 1;
1324 return LOC_REG (0) + regno;
1328 /* Failed to find a general register to spill to. Must use stack. */
1329 return 0;
1332 /* In order to make for nice schedules, we try to allocate every temporary
1333 to a different register. We must of course stay away from call-saved,
1334 fixed, and global registers. We must also stay away from registers
1335 allocated in current_frame_info.gr_used_mask, since those include regs
1336 used all through the prologue.
1338 Any register allocated here must be used immediately. The idea is to
1339 aid scheduling, not to solve data flow problems. */
1341 static int last_scratch_gr_reg;
1343 static int
1344 next_scratch_gr_reg ()
1346 int i, regno;
1348 for (i = 0; i < 32; ++i)
1350 regno = (last_scratch_gr_reg + i + 1) & 31;
1351 if (call_used_regs[regno]
1352 && ! fixed_regs[regno]
1353 && ! global_regs[regno]
1354 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1356 last_scratch_gr_reg = regno;
1357 return regno;
1361 /* There must be _something_ available. */
1362 abort ();
1365 /* Helper function for ia64_compute_frame_size, called through
1366 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1368 static void
1369 mark_reg_gr_used_mask (reg, data)
1370 rtx reg;
1371 void *data ATTRIBUTE_UNUSED;
1373 unsigned int regno = REGNO (reg);
1374 if (regno < 32)
1376 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1377 for (i = 0; i < n; ++i)
1378 current_frame_info.gr_used_mask |= 1 << (regno + i);
1382 /* Returns the number of bytes offset between the frame pointer and the stack
1383 pointer for the current function. SIZE is the number of bytes of space
1384 needed for local variables. */
1386 static void
1387 ia64_compute_frame_size (size)
1388 HOST_WIDE_INT size;
1390 HOST_WIDE_INT total_size;
1391 HOST_WIDE_INT spill_size = 0;
1392 HOST_WIDE_INT extra_spill_size = 0;
1393 HOST_WIDE_INT pretend_args_size;
1394 HARD_REG_SET mask;
1395 int n_spilled = 0;
1396 int spilled_gr_p = 0;
1397 int spilled_fr_p = 0;
1398 unsigned int regno;
1399 int i;
1401 if (current_frame_info.initialized)
1402 return;
1404 memset (&current_frame_info, 0, sizeof current_frame_info);
1405 CLEAR_HARD_REG_SET (mask);
1407 /* Don't allocate scratches to the return register. */
1408 diddle_return_value (mark_reg_gr_used_mask, NULL);
1410 /* Don't allocate scratches to the EH scratch registers. */
1411 if (cfun->machine->ia64_eh_epilogue_sp)
1412 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1413 if (cfun->machine->ia64_eh_epilogue_bsp)
1414 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1416 /* Find the size of the register stack frame. We have only 80 local
1417 registers, because we reserve 8 for the inputs and 8 for the
1418 outputs. */
1420 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1421 since we'll be adjusting that down later. */
1422 regno = LOC_REG (78) + ! frame_pointer_needed;
1423 for (; regno >= LOC_REG (0); regno--)
1424 if (regs_ever_live[regno])
1425 break;
1426 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1428 /* For functions marked with the syscall_linkage attribute, we must mark
1429 all eight input registers as in use, so that locals aren't visible to
1430 the caller. */
1432 if (cfun->machine->n_varargs > 0
1433 || lookup_attribute ("syscall_linkage",
1434 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1435 current_frame_info.n_input_regs = 8;
1436 else
1438 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1439 if (regs_ever_live[regno])
1440 break;
1441 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1444 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1445 if (regs_ever_live[regno])
1446 break;
1447 i = regno - OUT_REG (0) + 1;
1449 /* When -p profiling, we need one output register for the mcount argument.
1450 Likwise for -a profiling for the bb_init_func argument. For -ax
1451 profiling, we need two output registers for the two bb_init_trace_func
1452 arguments. */
1453 if (current_function_profile)
1454 i = MAX (i, 1);
1455 current_frame_info.n_output_regs = i;
1457 /* ??? No rotating register support yet. */
1458 current_frame_info.n_rotate_regs = 0;
1460 /* Discover which registers need spilling, and how much room that
1461 will take. Begin with floating point and general registers,
1462 which will always wind up on the stack. */
1464 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1465 if (regs_ever_live[regno] && ! call_used_regs[regno])
1467 SET_HARD_REG_BIT (mask, regno);
1468 spill_size += 16;
1469 n_spilled += 1;
1470 spilled_fr_p = 1;
1473 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1474 if (regs_ever_live[regno] && ! call_used_regs[regno])
1476 SET_HARD_REG_BIT (mask, regno);
1477 spill_size += 8;
1478 n_spilled += 1;
1479 spilled_gr_p = 1;
1482 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1483 if (regs_ever_live[regno] && ! call_used_regs[regno])
1485 SET_HARD_REG_BIT (mask, regno);
1486 spill_size += 8;
1487 n_spilled += 1;
1490 /* Now come all special registers that might get saved in other
1491 general registers. */
1493 if (frame_pointer_needed)
1495 current_frame_info.reg_fp = find_gr_spill (1);
1496 /* If we did not get a register, then we take LOC79. This is guaranteed
1497 to be free, even if regs_ever_live is already set, because this is
1498 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1499 as we don't count loc79 above. */
1500 if (current_frame_info.reg_fp == 0)
1502 current_frame_info.reg_fp = LOC_REG (79);
1503 current_frame_info.n_local_regs++;
1507 if (! current_function_is_leaf)
1509 /* Emit a save of BR0 if we call other functions. Do this even
1510 if this function doesn't return, as EH depends on this to be
1511 able to unwind the stack. */
1512 SET_HARD_REG_BIT (mask, BR_REG (0));
1514 current_frame_info.reg_save_b0 = find_gr_spill (1);
1515 if (current_frame_info.reg_save_b0 == 0)
1517 spill_size += 8;
1518 n_spilled += 1;
1521 /* Similarly for ar.pfs. */
1522 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1523 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1524 if (current_frame_info.reg_save_ar_pfs == 0)
1526 extra_spill_size += 8;
1527 n_spilled += 1;
1530 else
1532 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1534 SET_HARD_REG_BIT (mask, BR_REG (0));
1535 spill_size += 8;
1536 n_spilled += 1;
1540 /* Unwind descriptor hackery: things are most efficient if we allocate
1541 consecutive GR save registers for RP, PFS, FP in that order. However,
1542 it is absolutely critical that FP get the only hard register that's
1543 guaranteed to be free, so we allocated it first. If all three did
1544 happen to be allocated hard regs, and are consecutive, rearrange them
1545 into the preferred order now. */
1546 if (current_frame_info.reg_fp != 0
1547 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1548 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1550 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1551 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1552 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1555 /* See if we need to store the predicate register block. */
1556 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1557 if (regs_ever_live[regno] && ! call_used_regs[regno])
1558 break;
1559 if (regno <= PR_REG (63))
1561 SET_HARD_REG_BIT (mask, PR_REG (0));
1562 current_frame_info.reg_save_pr = find_gr_spill (1);
1563 if (current_frame_info.reg_save_pr == 0)
1565 extra_spill_size += 8;
1566 n_spilled += 1;
1569 /* ??? Mark them all as used so that register renaming and such
1570 are free to use them. */
1571 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1572 regs_ever_live[regno] = 1;
1575 /* If we're forced to use st8.spill, we're forced to save and restore
1576 ar.unat as well. */
1577 if (spilled_gr_p || cfun->machine->n_varargs)
1579 regs_ever_live[AR_UNAT_REGNUM] = 1;
1580 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1581 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1582 if (current_frame_info.reg_save_ar_unat == 0)
1584 extra_spill_size += 8;
1585 n_spilled += 1;
1589 if (regs_ever_live[AR_LC_REGNUM])
1591 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1592 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1593 if (current_frame_info.reg_save_ar_lc == 0)
1595 extra_spill_size += 8;
1596 n_spilled += 1;
1600 /* If we have an odd number of words of pretend arguments written to
1601 the stack, then the FR save area will be unaligned. We round the
1602 size of this area up to keep things 16 byte aligned. */
1603 if (spilled_fr_p)
1604 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1605 else
1606 pretend_args_size = current_function_pretend_args_size;
1608 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1609 + current_function_outgoing_args_size);
1610 total_size = IA64_STACK_ALIGN (total_size);
1612 /* We always use the 16-byte scratch area provided by the caller, but
1613 if we are a leaf function, there's no one to which we need to provide
1614 a scratch area. */
1615 if (current_function_is_leaf)
1616 total_size = MAX (0, total_size - 16);
1618 current_frame_info.total_size = total_size;
1619 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1620 current_frame_info.spill_size = spill_size;
1621 current_frame_info.extra_spill_size = extra_spill_size;
1622 COPY_HARD_REG_SET (current_frame_info.mask, mask);
1623 current_frame_info.n_spilled = n_spilled;
1624 current_frame_info.initialized = reload_completed;
1627 /* Compute the initial difference between the specified pair of registers. */
1629 HOST_WIDE_INT
1630 ia64_initial_elimination_offset (from, to)
1631 int from, to;
1633 HOST_WIDE_INT offset;
1635 ia64_compute_frame_size (get_frame_size ());
1636 switch (from)
1638 case FRAME_POINTER_REGNUM:
1639 if (to == HARD_FRAME_POINTER_REGNUM)
1641 if (current_function_is_leaf)
1642 offset = -current_frame_info.total_size;
1643 else
1644 offset = -(current_frame_info.total_size
1645 - current_function_outgoing_args_size - 16);
1647 else if (to == STACK_POINTER_REGNUM)
1649 if (current_function_is_leaf)
1650 offset = 0;
1651 else
1652 offset = 16 + current_function_outgoing_args_size;
1654 else
1655 abort ();
1656 break;
1658 case ARG_POINTER_REGNUM:
1659 /* Arguments start above the 16 byte save area, unless stdarg
1660 in which case we store through the 16 byte save area. */
1661 if (to == HARD_FRAME_POINTER_REGNUM)
1662 offset = 16 - current_function_pretend_args_size;
1663 else if (to == STACK_POINTER_REGNUM)
1664 offset = (current_frame_info.total_size
1665 + 16 - current_function_pretend_args_size);
1666 else
1667 abort ();
1668 break;
1670 case RETURN_ADDRESS_POINTER_REGNUM:
1671 offset = 0;
1672 break;
1674 default:
1675 abort ();
1678 return offset;
1681 /* If there are more than a trivial number of register spills, we use
1682 two interleaved iterators so that we can get two memory references
1683 per insn group.
1685 In order to simplify things in the prologue and epilogue expanders,
1686 we use helper functions to fix up the memory references after the
1687 fact with the appropriate offsets to a POST_MODIFY memory mode.
1688 The following data structure tracks the state of the two iterators
1689 while insns are being emitted. */
1691 struct spill_fill_data
1693 rtx init_after; /* point at which to emit initializations */
1694 rtx init_reg[2]; /* initial base register */
1695 rtx iter_reg[2]; /* the iterator registers */
1696 rtx *prev_addr[2]; /* address of last memory use */
1697 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
1698 HOST_WIDE_INT prev_off[2]; /* last offset */
1699 int n_iter; /* number of iterators in use */
1700 int next_iter; /* next iterator to use */
1701 unsigned int save_gr_used_mask;
1704 static struct spill_fill_data spill_fill_data;
1706 static void
1707 setup_spill_pointers (n_spills, init_reg, cfa_off)
1708 int n_spills;
1709 rtx init_reg;
1710 HOST_WIDE_INT cfa_off;
1712 int i;
1714 spill_fill_data.init_after = get_last_insn ();
1715 spill_fill_data.init_reg[0] = init_reg;
1716 spill_fill_data.init_reg[1] = init_reg;
1717 spill_fill_data.prev_addr[0] = NULL;
1718 spill_fill_data.prev_addr[1] = NULL;
1719 spill_fill_data.prev_insn[0] = NULL;
1720 spill_fill_data.prev_insn[1] = NULL;
1721 spill_fill_data.prev_off[0] = cfa_off;
1722 spill_fill_data.prev_off[1] = cfa_off;
1723 spill_fill_data.next_iter = 0;
1724 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1726 spill_fill_data.n_iter = 1 + (n_spills > 2);
1727 for (i = 0; i < spill_fill_data.n_iter; ++i)
1729 int regno = next_scratch_gr_reg ();
1730 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1731 current_frame_info.gr_used_mask |= 1 << regno;
1735 static void
1736 finish_spill_pointers ()
1738 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1741 static rtx
1742 spill_restore_mem (reg, cfa_off)
1743 rtx reg;
1744 HOST_WIDE_INT cfa_off;
1746 int iter = spill_fill_data.next_iter;
1747 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1748 rtx disp_rtx = GEN_INT (disp);
1749 rtx mem;
1751 if (spill_fill_data.prev_addr[iter])
1753 if (CONST_OK_FOR_N (disp))
1755 *spill_fill_data.prev_addr[iter]
1756 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1757 gen_rtx_PLUS (DImode,
1758 spill_fill_data.iter_reg[iter],
1759 disp_rtx));
1760 REG_NOTES (spill_fill_data.prev_insn[iter])
1761 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
1762 REG_NOTES (spill_fill_data.prev_insn[iter]));
1764 else
1766 /* ??? Could use register post_modify for loads. */
1767 if (! CONST_OK_FOR_I (disp))
1769 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1770 emit_move_insn (tmp, disp_rtx);
1771 disp_rtx = tmp;
1773 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1774 spill_fill_data.iter_reg[iter], disp_rtx));
1777 /* Micro-optimization: if we've created a frame pointer, it's at
1778 CFA 0, which may allow the real iterator to be initialized lower,
1779 slightly increasing parallelism. Also, if there are few saves
1780 it may eliminate the iterator entirely. */
1781 else if (disp == 0
1782 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1783 && frame_pointer_needed)
1785 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1786 set_mem_alias_set (mem, get_varargs_alias_set ());
1787 return mem;
1789 else
1791 rtx seq, insn;
1793 if (disp == 0)
1794 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1795 spill_fill_data.init_reg[iter]);
1796 else
1798 start_sequence ();
1800 if (! CONST_OK_FOR_I (disp))
1802 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1803 emit_move_insn (tmp, disp_rtx);
1804 disp_rtx = tmp;
1807 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1808 spill_fill_data.init_reg[iter],
1809 disp_rtx));
1811 seq = gen_sequence ();
1812 end_sequence ();
1815 /* Careful for being the first insn in a sequence. */
1816 if (spill_fill_data.init_after)
1817 insn = emit_insn_after (seq, spill_fill_data.init_after);
1818 else
1820 rtx first = get_insns ();
1821 if (first)
1822 insn = emit_insn_before (seq, first);
1823 else
1824 insn = emit_insn (seq);
1826 spill_fill_data.init_after = insn;
1828 /* If DISP is 0, we may or may not have a further adjustment
1829 afterward. If we do, then the load/store insn may be modified
1830 to be a post-modify. If we don't, then this copy may be
1831 eliminated by copyprop_hardreg_forward, which makes this
1832 insn garbage, which runs afoul of the sanity check in
1833 propagate_one_insn. So mark this insn as legal to delete. */
1834 if (disp == 0)
1835 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1836 REG_NOTES (insn));
1839 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
1841 /* ??? Not all of the spills are for varargs, but some of them are.
1842 The rest of the spills belong in an alias set of their own. But
1843 it doesn't actually hurt to include them here. */
1844 set_mem_alias_set (mem, get_varargs_alias_set ());
1846 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1847 spill_fill_data.prev_off[iter] = cfa_off;
1849 if (++iter >= spill_fill_data.n_iter)
1850 iter = 0;
1851 spill_fill_data.next_iter = iter;
1853 return mem;
1856 static void
1857 do_spill (move_fn, reg, cfa_off, frame_reg)
1858 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1859 rtx reg, frame_reg;
1860 HOST_WIDE_INT cfa_off;
1862 int iter = spill_fill_data.next_iter;
1863 rtx mem, insn;
1865 mem = spill_restore_mem (reg, cfa_off);
1866 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
1867 spill_fill_data.prev_insn[iter] = insn;
1869 if (frame_reg)
1871 rtx base;
1872 HOST_WIDE_INT off;
1874 RTX_FRAME_RELATED_P (insn) = 1;
1876 /* Don't even pretend that the unwind code can intuit its way
1877 through a pair of interleaved post_modify iterators. Just
1878 provide the correct answer. */
1880 if (frame_pointer_needed)
1882 base = hard_frame_pointer_rtx;
1883 off = - cfa_off;
1885 else
1887 base = stack_pointer_rtx;
1888 off = current_frame_info.total_size - cfa_off;
1891 REG_NOTES (insn)
1892 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1893 gen_rtx_SET (VOIDmode,
1894 gen_rtx_MEM (GET_MODE (reg),
1895 plus_constant (base, off)),
1896 frame_reg),
1897 REG_NOTES (insn));
1901 static void
1902 do_restore (move_fn, reg, cfa_off)
1903 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1904 rtx reg;
1905 HOST_WIDE_INT cfa_off;
1907 int iter = spill_fill_data.next_iter;
1908 rtx insn;
1910 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1911 GEN_INT (cfa_off)));
1912 spill_fill_data.prev_insn[iter] = insn;
1915 /* Wrapper functions that discards the CONST_INT spill offset. These
1916 exist so that we can give gr_spill/gr_fill the offset they need and
1917 use a consistant function interface. */
1919 static rtx
1920 gen_movdi_x (dest, src, offset)
1921 rtx dest, src;
1922 rtx offset ATTRIBUTE_UNUSED;
1924 return gen_movdi (dest, src);
1927 static rtx
1928 gen_fr_spill_x (dest, src, offset)
1929 rtx dest, src;
1930 rtx offset ATTRIBUTE_UNUSED;
1932 return gen_fr_spill (dest, src);
1935 static rtx
1936 gen_fr_restore_x (dest, src, offset)
1937 rtx dest, src;
1938 rtx offset ATTRIBUTE_UNUSED;
1940 return gen_fr_restore (dest, src);
1943 /* Called after register allocation to add any instructions needed for the
1944 prologue. Using a prologue insn is favored compared to putting all of the
1945 instructions in output_function_prologue(), since it allows the scheduler
1946 to intermix instructions with the saves of the caller saved registers. In
1947 some cases, it might be necessary to emit a barrier instruction as the last
1948 insn to prevent such scheduling.
1950 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
1951 so that the debug info generation code can handle them properly.
1953 The register save area is layed out like so:
1954 cfa+16
1955 [ varargs spill area ]
1956 [ fr register spill area ]
1957 [ br register spill area ]
1958 [ ar register spill area ]
1959 [ pr register spill area ]
1960 [ gr register spill area ] */
1962 /* ??? Get inefficient code when the frame size is larger than can fit in an
1963 adds instruction. */
1965 void
1966 ia64_expand_prologue ()
1968 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
1969 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
1970 rtx reg, alt_reg;
1972 ia64_compute_frame_size (get_frame_size ());
1973 last_scratch_gr_reg = 15;
1975 /* If there is no epilogue, then we don't need some prologue insns.
1976 We need to avoid emitting the dead prologue insns, because flow
1977 will complain about them. */
1978 if (optimize)
1980 edge e;
1982 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
1983 if ((e->flags & EDGE_FAKE) == 0
1984 && (e->flags & EDGE_FALLTHRU) != 0)
1985 break;
1986 epilogue_p = (e != NULL);
1988 else
1989 epilogue_p = 1;
1991 /* Set the local, input, and output register names. We need to do this
1992 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1993 half. If we use in/loc/out register names, then we get assembler errors
1994 in crtn.S because there is no alloc insn or regstk directive in there. */
1995 if (! TARGET_REG_NAMES)
1997 int inputs = current_frame_info.n_input_regs;
1998 int locals = current_frame_info.n_local_regs;
1999 int outputs = current_frame_info.n_output_regs;
2001 for (i = 0; i < inputs; i++)
2002 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2003 for (i = 0; i < locals; i++)
2004 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2005 for (i = 0; i < outputs; i++)
2006 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2009 /* Set the frame pointer register name. The regnum is logically loc79,
2010 but of course we'll not have allocated that many locals. Rather than
2011 worrying about renumbering the existing rtxs, we adjust the name. */
2012 /* ??? This code means that we can never use one local register when
2013 there is a frame pointer. loc79 gets wasted in this case, as it is
2014 renamed to a register that will never be used. See also the try_locals
2015 code in find_gr_spill. */
2016 if (current_frame_info.reg_fp)
2018 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2019 reg_names[HARD_FRAME_POINTER_REGNUM]
2020 = reg_names[current_frame_info.reg_fp];
2021 reg_names[current_frame_info.reg_fp] = tmp;
2024 /* Fix up the return address placeholder. */
2025 /* ??? We can fail if __builtin_return_address is used, and we didn't
2026 allocate a register in which to save b0. I can't think of a way to
2027 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
2028 then be sure that I got the right one. Further, reload doesn't seem
2029 to care if an eliminable register isn't used, and "eliminates" it
2030 anyway. */
2031 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
2032 && current_frame_info.reg_save_b0 != 0)
2033 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
2035 /* We don't need an alloc instruction if we've used no outputs or locals. */
2036 if (current_frame_info.n_local_regs == 0
2037 && current_frame_info.n_output_regs == 0
2038 && current_frame_info.n_input_regs <= current_function_args_info.int_regs)
2040 /* If there is no alloc, but there are input registers used, then we
2041 need a .regstk directive. */
2042 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2043 ar_pfs_save_reg = NULL_RTX;
2045 else
2047 current_frame_info.need_regstk = 0;
2049 if (current_frame_info.reg_save_ar_pfs)
2050 regno = current_frame_info.reg_save_ar_pfs;
2051 else
2052 regno = next_scratch_gr_reg ();
2053 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2055 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2056 GEN_INT (current_frame_info.n_input_regs),
2057 GEN_INT (current_frame_info.n_local_regs),
2058 GEN_INT (current_frame_info.n_output_regs),
2059 GEN_INT (current_frame_info.n_rotate_regs)));
2060 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2063 /* Set up frame pointer, stack pointer, and spill iterators. */
2065 n_varargs = cfun->machine->n_varargs;
2066 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2067 stack_pointer_rtx, 0);
2069 if (frame_pointer_needed)
2071 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2072 RTX_FRAME_RELATED_P (insn) = 1;
2075 if (current_frame_info.total_size != 0)
2077 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2078 rtx offset;
2080 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2081 offset = frame_size_rtx;
2082 else
2084 regno = next_scratch_gr_reg ();
2085 offset = gen_rtx_REG (DImode, regno);
2086 emit_move_insn (offset, frame_size_rtx);
2089 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2090 stack_pointer_rtx, offset));
2092 if (! frame_pointer_needed)
2094 RTX_FRAME_RELATED_P (insn) = 1;
2095 if (GET_CODE (offset) != CONST_INT)
2097 REG_NOTES (insn)
2098 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2099 gen_rtx_SET (VOIDmode,
2100 stack_pointer_rtx,
2101 gen_rtx_PLUS (DImode,
2102 stack_pointer_rtx,
2103 frame_size_rtx)),
2104 REG_NOTES (insn));
2108 /* ??? At this point we must generate a magic insn that appears to
2109 modify the stack pointer, the frame pointer, and all spill
2110 iterators. This would allow the most scheduling freedom. For
2111 now, just hard stop. */
2112 emit_insn (gen_blockage ());
2115 /* Must copy out ar.unat before doing any integer spills. */
2116 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2118 if (current_frame_info.reg_save_ar_unat)
2119 ar_unat_save_reg
2120 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2121 else
2123 alt_regno = next_scratch_gr_reg ();
2124 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2125 current_frame_info.gr_used_mask |= 1 << alt_regno;
2128 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2129 insn = emit_move_insn (ar_unat_save_reg, reg);
2130 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2132 /* Even if we're not going to generate an epilogue, we still
2133 need to save the register so that EH works. */
2134 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2135 emit_insn (gen_prologue_use (ar_unat_save_reg));
2137 else
2138 ar_unat_save_reg = NULL_RTX;
2140 /* Spill all varargs registers. Do this before spilling any GR registers,
2141 since we want the UNAT bits for the GR registers to override the UNAT
2142 bits from varargs, which we don't care about. */
2144 cfa_off = -16;
2145 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2147 reg = gen_rtx_REG (DImode, regno);
2148 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2151 /* Locate the bottom of the register save area. */
2152 cfa_off = (current_frame_info.spill_cfa_off
2153 + current_frame_info.spill_size
2154 + current_frame_info.extra_spill_size);
2156 /* Save the predicate register block either in a register or in memory. */
2157 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2159 reg = gen_rtx_REG (DImode, PR_REG (0));
2160 if (current_frame_info.reg_save_pr != 0)
2162 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2163 insn = emit_move_insn (alt_reg, reg);
2165 /* ??? Denote pr spill/fill by a DImode move that modifies all
2166 64 hard registers. */
2167 RTX_FRAME_RELATED_P (insn) = 1;
2168 REG_NOTES (insn)
2169 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2170 gen_rtx_SET (VOIDmode, alt_reg, reg),
2171 REG_NOTES (insn));
2173 /* Even if we're not going to generate an epilogue, we still
2174 need to save the register so that EH works. */
2175 if (! epilogue_p)
2176 emit_insn (gen_prologue_use (alt_reg));
2178 else
2180 alt_regno = next_scratch_gr_reg ();
2181 alt_reg = gen_rtx_REG (DImode, alt_regno);
2182 insn = emit_move_insn (alt_reg, reg);
2183 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2184 cfa_off -= 8;
2188 /* Handle AR regs in numerical order. All of them get special handling. */
2189 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2190 && current_frame_info.reg_save_ar_unat == 0)
2192 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2193 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2194 cfa_off -= 8;
2197 /* The alloc insn already copied ar.pfs into a general register. The
2198 only thing we have to do now is copy that register to a stack slot
2199 if we'd not allocated a local register for the job. */
2200 if (current_frame_info.reg_save_ar_pfs == 0
2201 && ! current_function_is_leaf)
2203 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2204 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2205 cfa_off -= 8;
2208 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2210 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2211 if (current_frame_info.reg_save_ar_lc != 0)
2213 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2214 insn = emit_move_insn (alt_reg, reg);
2215 RTX_FRAME_RELATED_P (insn) = 1;
2217 /* Even if we're not going to generate an epilogue, we still
2218 need to save the register so that EH works. */
2219 if (! epilogue_p)
2220 emit_insn (gen_prologue_use (alt_reg));
2222 else
2224 alt_regno = next_scratch_gr_reg ();
2225 alt_reg = gen_rtx_REG (DImode, alt_regno);
2226 emit_move_insn (alt_reg, reg);
2227 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2228 cfa_off -= 8;
2232 /* We should now be at the base of the gr/br/fr spill area. */
2233 if (cfa_off != (current_frame_info.spill_cfa_off
2234 + current_frame_info.spill_size))
2235 abort ();
2237 /* Spill all general registers. */
2238 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2239 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2241 reg = gen_rtx_REG (DImode, regno);
2242 do_spill (gen_gr_spill, reg, cfa_off, reg);
2243 cfa_off -= 8;
2246 /* Handle BR0 specially -- it may be getting stored permanently in
2247 some GR register. */
2248 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2250 reg = gen_rtx_REG (DImode, BR_REG (0));
2251 if (current_frame_info.reg_save_b0 != 0)
2253 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2254 insn = emit_move_insn (alt_reg, reg);
2255 RTX_FRAME_RELATED_P (insn) = 1;
2257 /* Even if we're not going to generate an epilogue, we still
2258 need to save the register so that EH works. */
2259 if (! epilogue_p)
2260 emit_insn (gen_prologue_use (alt_reg));
2262 else
2264 alt_regno = next_scratch_gr_reg ();
2265 alt_reg = gen_rtx_REG (DImode, alt_regno);
2266 emit_move_insn (alt_reg, reg);
2267 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2268 cfa_off -= 8;
2272 /* Spill the rest of the BR registers. */
2273 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2274 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2276 alt_regno = next_scratch_gr_reg ();
2277 alt_reg = gen_rtx_REG (DImode, alt_regno);
2278 reg = gen_rtx_REG (DImode, regno);
2279 emit_move_insn (alt_reg, reg);
2280 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2281 cfa_off -= 8;
2284 /* Align the frame and spill all FR registers. */
2285 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2286 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2288 if (cfa_off & 15)
2289 abort ();
2290 reg = gen_rtx_REG (TFmode, regno);
2291 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2292 cfa_off -= 16;
2295 if (cfa_off != current_frame_info.spill_cfa_off)
2296 abort ();
2298 finish_spill_pointers ();
2301 /* Called after register allocation to add any instructions needed for the
2302 epilogue. Using an epilogue insn is favored compared to putting all of the
2303 instructions in output_function_prologue(), since it allows the scheduler
2304 to intermix instructions with the saves of the caller saved registers. In
2305 some cases, it might be necessary to emit a barrier instruction as the last
2306 insn to prevent such scheduling. */
2308 void
2309 ia64_expand_epilogue (sibcall_p)
2310 int sibcall_p;
2312 rtx insn, reg, alt_reg, ar_unat_save_reg;
2313 int regno, alt_regno, cfa_off;
2315 ia64_compute_frame_size (get_frame_size ());
2317 /* If there is a frame pointer, then we use it instead of the stack
2318 pointer, so that the stack pointer does not need to be valid when
2319 the epilogue starts. See EXIT_IGNORE_STACK. */
2320 if (frame_pointer_needed)
2321 setup_spill_pointers (current_frame_info.n_spilled,
2322 hard_frame_pointer_rtx, 0);
2323 else
2324 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2325 current_frame_info.total_size);
2327 if (current_frame_info.total_size != 0)
2329 /* ??? At this point we must generate a magic insn that appears to
2330 modify the spill iterators and the frame pointer. This would
2331 allow the most scheduling freedom. For now, just hard stop. */
2332 emit_insn (gen_blockage ());
2335 /* Locate the bottom of the register save area. */
2336 cfa_off = (current_frame_info.spill_cfa_off
2337 + current_frame_info.spill_size
2338 + current_frame_info.extra_spill_size);
2340 /* Restore the predicate registers. */
2341 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2343 if (current_frame_info.reg_save_pr != 0)
2344 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2345 else
2347 alt_regno = next_scratch_gr_reg ();
2348 alt_reg = gen_rtx_REG (DImode, alt_regno);
2349 do_restore (gen_movdi_x, alt_reg, cfa_off);
2350 cfa_off -= 8;
2352 reg = gen_rtx_REG (DImode, PR_REG (0));
2353 emit_move_insn (reg, alt_reg);
2356 /* Restore the application registers. */
2358 /* Load the saved unat from the stack, but do not restore it until
2359 after the GRs have been restored. */
2360 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2362 if (current_frame_info.reg_save_ar_unat != 0)
2363 ar_unat_save_reg
2364 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2365 else
2367 alt_regno = next_scratch_gr_reg ();
2368 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2369 current_frame_info.gr_used_mask |= 1 << alt_regno;
2370 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2371 cfa_off -= 8;
2374 else
2375 ar_unat_save_reg = NULL_RTX;
2377 if (current_frame_info.reg_save_ar_pfs != 0)
2379 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2380 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2381 emit_move_insn (reg, alt_reg);
2383 else if (! current_function_is_leaf)
2385 alt_regno = next_scratch_gr_reg ();
2386 alt_reg = gen_rtx_REG (DImode, alt_regno);
2387 do_restore (gen_movdi_x, alt_reg, cfa_off);
2388 cfa_off -= 8;
2389 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2390 emit_move_insn (reg, alt_reg);
2393 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2395 if (current_frame_info.reg_save_ar_lc != 0)
2396 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2397 else
2399 alt_regno = next_scratch_gr_reg ();
2400 alt_reg = gen_rtx_REG (DImode, alt_regno);
2401 do_restore (gen_movdi_x, alt_reg, cfa_off);
2402 cfa_off -= 8;
2404 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2405 emit_move_insn (reg, alt_reg);
2408 /* We should now be at the base of the gr/br/fr spill area. */
2409 if (cfa_off != (current_frame_info.spill_cfa_off
2410 + current_frame_info.spill_size))
2411 abort ();
2413 /* Restore all general registers. */
2414 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2415 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2417 reg = gen_rtx_REG (DImode, regno);
2418 do_restore (gen_gr_restore, reg, cfa_off);
2419 cfa_off -= 8;
2422 /* Restore the branch registers. Handle B0 specially, as it may
2423 have gotten stored in some GR register. */
2424 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2426 if (current_frame_info.reg_save_b0 != 0)
2427 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2428 else
2430 alt_regno = next_scratch_gr_reg ();
2431 alt_reg = gen_rtx_REG (DImode, alt_regno);
2432 do_restore (gen_movdi_x, alt_reg, cfa_off);
2433 cfa_off -= 8;
2435 reg = gen_rtx_REG (DImode, BR_REG (0));
2436 emit_move_insn (reg, alt_reg);
2439 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2440 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2442 alt_regno = next_scratch_gr_reg ();
2443 alt_reg = gen_rtx_REG (DImode, alt_regno);
2444 do_restore (gen_movdi_x, alt_reg, cfa_off);
2445 cfa_off -= 8;
2446 reg = gen_rtx_REG (DImode, regno);
2447 emit_move_insn (reg, alt_reg);
2450 /* Restore floating point registers. */
2451 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2452 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2454 if (cfa_off & 15)
2455 abort ();
2456 reg = gen_rtx_REG (TFmode, regno);
2457 do_restore (gen_fr_restore_x, reg, cfa_off);
2458 cfa_off -= 16;
2461 /* Restore ar.unat for real. */
2462 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2464 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2465 emit_move_insn (reg, ar_unat_save_reg);
2468 if (cfa_off != current_frame_info.spill_cfa_off)
2469 abort ();
2471 finish_spill_pointers ();
2473 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2475 /* ??? At this point we must generate a magic insn that appears to
2476 modify the spill iterators, the stack pointer, and the frame
2477 pointer. This would allow the most scheduling freedom. For now,
2478 just hard stop. */
2479 emit_insn (gen_blockage ());
2482 if (cfun->machine->ia64_eh_epilogue_sp)
2483 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2484 else if (frame_pointer_needed)
2486 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2487 RTX_FRAME_RELATED_P (insn) = 1;
2489 else if (current_frame_info.total_size)
2491 rtx offset, frame_size_rtx;
2493 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2494 if (CONST_OK_FOR_I (current_frame_info.total_size))
2495 offset = frame_size_rtx;
2496 else
2498 regno = next_scratch_gr_reg ();
2499 offset = gen_rtx_REG (DImode, regno);
2500 emit_move_insn (offset, frame_size_rtx);
2503 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2504 offset));
2506 RTX_FRAME_RELATED_P (insn) = 1;
2507 if (GET_CODE (offset) != CONST_INT)
2509 REG_NOTES (insn)
2510 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2511 gen_rtx_SET (VOIDmode,
2512 stack_pointer_rtx,
2513 gen_rtx_PLUS (DImode,
2514 stack_pointer_rtx,
2515 frame_size_rtx)),
2516 REG_NOTES (insn));
2520 if (cfun->machine->ia64_eh_epilogue_bsp)
2521 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2523 if (! sibcall_p)
2524 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2525 else
2527 int fp = GR_REG (2);
2528 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2529 first available call clobbered register. If there was a frame_pointer
2530 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2531 so we have to make sure we're using the string "r2" when emitting
2532 the register name for the assmbler. */
2533 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2534 fp = HARD_FRAME_POINTER_REGNUM;
2536 /* We must emit an alloc to force the input registers to become output
2537 registers. Otherwise, if the callee tries to pass its parameters
2538 through to another call without an intervening alloc, then these
2539 values get lost. */
2540 /* ??? We don't need to preserve all input registers. We only need to
2541 preserve those input registers used as arguments to the sibling call.
2542 It is unclear how to compute that number here. */
2543 if (current_frame_info.n_input_regs != 0)
2544 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2545 GEN_INT (0), GEN_INT (0),
2546 GEN_INT (current_frame_info.n_input_regs),
2547 GEN_INT (0)));
2551 /* Return 1 if br.ret can do all the work required to return from a
2552 function. */
2555 ia64_direct_return ()
2557 if (reload_completed && ! frame_pointer_needed)
2559 ia64_compute_frame_size (get_frame_size ());
2561 return (current_frame_info.total_size == 0
2562 && current_frame_info.n_spilled == 0
2563 && current_frame_info.reg_save_b0 == 0
2564 && current_frame_info.reg_save_pr == 0
2565 && current_frame_info.reg_save_ar_pfs == 0
2566 && current_frame_info.reg_save_ar_unat == 0
2567 && current_frame_info.reg_save_ar_lc == 0);
2569 return 0;
2573 ia64_hard_regno_rename_ok (from, to)
2574 int from;
2575 int to;
2577 /* Don't clobber any of the registers we reserved for the prologue. */
2578 if (to == current_frame_info.reg_fp
2579 || to == current_frame_info.reg_save_b0
2580 || to == current_frame_info.reg_save_pr
2581 || to == current_frame_info.reg_save_ar_pfs
2582 || to == current_frame_info.reg_save_ar_unat
2583 || to == current_frame_info.reg_save_ar_lc)
2584 return 0;
2586 if (from == current_frame_info.reg_fp
2587 || from == current_frame_info.reg_save_b0
2588 || from == current_frame_info.reg_save_pr
2589 || from == current_frame_info.reg_save_ar_pfs
2590 || from == current_frame_info.reg_save_ar_unat
2591 || from == current_frame_info.reg_save_ar_lc)
2592 return 0;
2594 /* Don't use output registers outside the register frame. */
2595 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2596 return 0;
2598 /* Retain even/oddness on predicate register pairs. */
2599 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2600 return (from & 1) == (to & 1);
2602 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2603 if (from == GR_REG (4) && current_function_calls_setjmp)
2604 return 0;
2606 return 1;
2609 /* Target hook for assembling integer objects. Handle word-sized
2610 aligned objects and detect the cases when @fptr is needed. */
2612 static bool
2613 ia64_assemble_integer (x, size, aligned_p)
2614 rtx x;
2615 unsigned int size;
2616 int aligned_p;
2618 if (size == UNITS_PER_WORD && aligned_p
2619 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
2620 && GET_CODE (x) == SYMBOL_REF
2621 && SYMBOL_REF_FLAG (x))
2623 fputs ("\tdata8\t@fptr(", asm_out_file);
2624 output_addr_const (asm_out_file, x);
2625 fputs (")\n", asm_out_file);
2626 return true;
2628 return default_assemble_integer (x, size, aligned_p);
2631 /* Emit the function prologue. */
2633 static void
2634 ia64_output_function_prologue (file, size)
2635 FILE *file;
2636 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2638 int mask, grsave, grsave_prev;
2640 if (current_frame_info.need_regstk)
2641 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2642 current_frame_info.n_input_regs,
2643 current_frame_info.n_local_regs,
2644 current_frame_info.n_output_regs,
2645 current_frame_info.n_rotate_regs);
2647 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2648 return;
2650 /* Emit the .prologue directive. */
2652 mask = 0;
2653 grsave = grsave_prev = 0;
2654 if (current_frame_info.reg_save_b0 != 0)
2656 mask |= 8;
2657 grsave = grsave_prev = current_frame_info.reg_save_b0;
2659 if (current_frame_info.reg_save_ar_pfs != 0
2660 && (grsave_prev == 0
2661 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2663 mask |= 4;
2664 if (grsave_prev == 0)
2665 grsave = current_frame_info.reg_save_ar_pfs;
2666 grsave_prev = current_frame_info.reg_save_ar_pfs;
2668 if (current_frame_info.reg_fp != 0
2669 && (grsave_prev == 0
2670 || current_frame_info.reg_fp == grsave_prev + 1))
2672 mask |= 2;
2673 if (grsave_prev == 0)
2674 grsave = HARD_FRAME_POINTER_REGNUM;
2675 grsave_prev = current_frame_info.reg_fp;
2677 if (current_frame_info.reg_save_pr != 0
2678 && (grsave_prev == 0
2679 || current_frame_info.reg_save_pr == grsave_prev + 1))
2681 mask |= 1;
2682 if (grsave_prev == 0)
2683 grsave = current_frame_info.reg_save_pr;
2686 if (mask)
2687 fprintf (file, "\t.prologue %d, %d\n", mask,
2688 ia64_dbx_register_number (grsave));
2689 else
2690 fputs ("\t.prologue\n", file);
2692 /* Emit a .spill directive, if necessary, to relocate the base of
2693 the register spill area. */
2694 if (current_frame_info.spill_cfa_off != -16)
2695 fprintf (file, "\t.spill %ld\n",
2696 (long) (current_frame_info.spill_cfa_off
2697 + current_frame_info.spill_size));
2700 /* Emit the .body directive at the scheduled end of the prologue. */
2702 static void
2703 ia64_output_function_end_prologue (file)
2704 FILE *file;
2706 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2707 return;
2709 fputs ("\t.body\n", file);
2712 /* Emit the function epilogue. */
2714 static void
2715 ia64_output_function_epilogue (file, size)
2716 FILE *file ATTRIBUTE_UNUSED;
2717 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2719 int i;
2721 /* Reset from the function's potential modifications. */
2722 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
2724 if (current_frame_info.reg_fp)
2726 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2727 reg_names[HARD_FRAME_POINTER_REGNUM]
2728 = reg_names[current_frame_info.reg_fp];
2729 reg_names[current_frame_info.reg_fp] = tmp;
2731 if (! TARGET_REG_NAMES)
2733 for (i = 0; i < current_frame_info.n_input_regs; i++)
2734 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2735 for (i = 0; i < current_frame_info.n_local_regs; i++)
2736 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2737 for (i = 0; i < current_frame_info.n_output_regs; i++)
2738 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2741 current_frame_info.initialized = 0;
2745 ia64_dbx_register_number (regno)
2746 int regno;
2748 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2749 from its home at loc79 to something inside the register frame. We
2750 must perform the same renumbering here for the debug info. */
2751 if (current_frame_info.reg_fp)
2753 if (regno == HARD_FRAME_POINTER_REGNUM)
2754 regno = current_frame_info.reg_fp;
2755 else if (regno == current_frame_info.reg_fp)
2756 regno = HARD_FRAME_POINTER_REGNUM;
2759 if (IN_REGNO_P (regno))
2760 return 32 + regno - IN_REG (0);
2761 else if (LOC_REGNO_P (regno))
2762 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2763 else if (OUT_REGNO_P (regno))
2764 return (32 + current_frame_info.n_input_regs
2765 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2766 else
2767 return regno;
2770 void
2771 ia64_initialize_trampoline (addr, fnaddr, static_chain)
2772 rtx addr, fnaddr, static_chain;
2774 rtx addr_reg, eight = GEN_INT (8);
2776 /* Load up our iterator. */
2777 addr_reg = gen_reg_rtx (Pmode);
2778 emit_move_insn (addr_reg, addr);
2780 /* The first two words are the fake descriptor:
2781 __ia64_trampoline, ADDR+16. */
2782 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2783 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2784 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2786 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2787 copy_to_reg (plus_constant (addr, 16)));
2788 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2790 /* The third word is the target descriptor. */
2791 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2792 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2794 /* The fourth word is the static chain. */
2795 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2798 /* Do any needed setup for a variadic function. CUM has not been updated
2799 for the last named argument which has type TYPE and mode MODE.
2801 We generate the actual spill instructions during prologue generation. */
2803 void
2804 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
2805 CUMULATIVE_ARGS cum;
2806 int int_mode;
2807 tree type;
2808 int * pretend_size;
2809 int second_time ATTRIBUTE_UNUSED;
2811 /* If this is a stdarg function, then skip the current argument. */
2812 if (! current_function_varargs)
2813 ia64_function_arg_advance (&cum, int_mode, type, 1);
2815 if (cum.words < MAX_ARGUMENT_SLOTS)
2817 int n = MAX_ARGUMENT_SLOTS - cum.words;
2818 *pretend_size = n * UNITS_PER_WORD;
2819 cfun->machine->n_varargs = n;
2823 /* Check whether TYPE is a homogeneous floating point aggregate. If
2824 it is, return the mode of the floating point type that appears
2825 in all leafs. If it is not, return VOIDmode.
2827 An aggregate is a homogeneous floating point aggregate is if all
2828 fields/elements in it have the same floating point type (e.g,
2829 SFmode). 128-bit quad-precision floats are excluded. */
2831 static enum machine_mode
2832 hfa_element_mode (type, nested)
2833 tree type;
2834 int nested;
2836 enum machine_mode element_mode = VOIDmode;
2837 enum machine_mode mode;
2838 enum tree_code code = TREE_CODE (type);
2839 int know_element_mode = 0;
2840 tree t;
2842 switch (code)
2844 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2845 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2846 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2847 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2848 case FUNCTION_TYPE:
2849 return VOIDmode;
2851 /* Fortran complex types are supposed to be HFAs, so we need to handle
2852 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2853 types though. */
2854 case COMPLEX_TYPE:
2855 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
2856 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
2857 * BITS_PER_UNIT, MODE_FLOAT, 0);
2858 else
2859 return VOIDmode;
2861 case REAL_TYPE:
2862 /* ??? Should exclude 128-bit long double here. */
2863 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2864 mode if this is contained within an aggregate. */
2865 if (nested)
2866 return TYPE_MODE (type);
2867 else
2868 return VOIDmode;
2870 case ARRAY_TYPE:
2871 return hfa_element_mode (TREE_TYPE (type), 1);
2873 case RECORD_TYPE:
2874 case UNION_TYPE:
2875 case QUAL_UNION_TYPE:
2876 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
2878 if (TREE_CODE (t) != FIELD_DECL)
2879 continue;
2881 mode = hfa_element_mode (TREE_TYPE (t), 1);
2882 if (know_element_mode)
2884 if (mode != element_mode)
2885 return VOIDmode;
2887 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
2888 return VOIDmode;
2889 else
2891 know_element_mode = 1;
2892 element_mode = mode;
2895 return element_mode;
2897 default:
2898 /* If we reach here, we probably have some front-end specific type
2899 that the backend doesn't know about. This can happen via the
2900 aggregate_value_p call in init_function_start. All we can do is
2901 ignore unknown tree types. */
2902 return VOIDmode;
2905 return VOIDmode;
2908 /* Return rtx for register where argument is passed, or zero if it is passed
2909 on the stack. */
2911 /* ??? 128-bit quad-precision floats are always passed in general
2912 registers. */
2915 ia64_function_arg (cum, mode, type, named, incoming)
2916 CUMULATIVE_ARGS *cum;
2917 enum machine_mode mode;
2918 tree type;
2919 int named;
2920 int incoming;
2922 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
2923 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2924 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2925 / UNITS_PER_WORD);
2926 int offset = 0;
2927 enum machine_mode hfa_mode = VOIDmode;
2929 /* Integer and float arguments larger than 8 bytes start at the next even
2930 boundary. Aggregates larger than 8 bytes start at the next even boundary
2931 if the aggregate has 16 byte alignment. Net effect is that types with
2932 alignment greater than 8 start at the next even boundary. */
2933 /* ??? The ABI does not specify how to handle aggregates with alignment from
2934 9 to 15 bytes, or greater than 16. We handle them all as if they had
2935 16 byte alignment. Such aggregates can occur only if gcc extensions are
2936 used. */
2937 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2938 : (words > 1))
2939 && (cum->words & 1))
2940 offset = 1;
2942 /* If all argument slots are used, then it must go on the stack. */
2943 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2944 return 0;
2946 /* Check for and handle homogeneous FP aggregates. */
2947 if (type)
2948 hfa_mode = hfa_element_mode (type, 0);
2950 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2951 and unprototyped hfas are passed specially. */
2952 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2954 rtx loc[16];
2955 int i = 0;
2956 int fp_regs = cum->fp_regs;
2957 int int_regs = cum->words + offset;
2958 int hfa_size = GET_MODE_SIZE (hfa_mode);
2959 int byte_size;
2960 int args_byte_size;
2962 /* If prototyped, pass it in FR regs then GR regs.
2963 If not prototyped, pass it in both FR and GR regs.
2965 If this is an SFmode aggregate, then it is possible to run out of
2966 FR regs while GR regs are still left. In that case, we pass the
2967 remaining part in the GR regs. */
2969 /* Fill the FP regs. We do this always. We stop if we reach the end
2970 of the argument, the last FP register, or the last argument slot. */
2972 byte_size = ((mode == BLKmode)
2973 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2974 args_byte_size = int_regs * UNITS_PER_WORD;
2975 offset = 0;
2976 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2977 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
2979 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2980 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
2981 + fp_regs)),
2982 GEN_INT (offset));
2983 offset += hfa_size;
2984 args_byte_size += hfa_size;
2985 fp_regs++;
2988 /* If no prototype, then the whole thing must go in GR regs. */
2989 if (! cum->prototype)
2990 offset = 0;
2991 /* If this is an SFmode aggregate, then we might have some left over
2992 that needs to go in GR regs. */
2993 else if (byte_size != offset)
2994 int_regs += offset / UNITS_PER_WORD;
2996 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
2998 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3000 enum machine_mode gr_mode = DImode;
3002 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3003 then this goes in a GR reg left adjusted/little endian, right
3004 adjusted/big endian. */
3005 /* ??? Currently this is handled wrong, because 4-byte hunks are
3006 always right adjusted/little endian. */
3007 if (offset & 0x4)
3008 gr_mode = SImode;
3009 /* If we have an even 4 byte hunk because the aggregate is a
3010 multiple of 4 bytes in size, then this goes in a GR reg right
3011 adjusted/little endian. */
3012 else if (byte_size - offset == 4)
3013 gr_mode = SImode;
3014 /* Complex floats need to have float mode. */
3015 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3016 gr_mode = hfa_mode;
3018 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3019 gen_rtx_REG (gr_mode, (basereg
3020 + int_regs)),
3021 GEN_INT (offset));
3022 offset += GET_MODE_SIZE (gr_mode);
3023 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
3024 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
3027 /* If we ended up using just one location, just return that one loc. */
3028 if (i == 1)
3029 return XEXP (loc[0], 0);
3030 else
3031 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3034 /* Integral and aggregates go in general registers. If we have run out of
3035 FR registers, then FP values must also go in general registers. This can
3036 happen when we have a SFmode HFA. */
3037 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
3038 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3039 return gen_rtx_REG (mode, basereg + cum->words + offset);
3041 /* If there is a prototype, then FP values go in a FR register when
3042 named, and in a GR registeer when unnamed. */
3043 else if (cum->prototype)
3045 if (! named)
3046 return gen_rtx_REG (mode, basereg + cum->words + offset);
3047 else
3048 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3050 /* If there is no prototype, then FP values go in both FR and GR
3051 registers. */
3052 else
3054 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3055 gen_rtx_REG (mode, (FR_ARG_FIRST
3056 + cum->fp_regs)),
3057 const0_rtx);
3058 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3059 gen_rtx_REG (mode,
3060 (basereg + cum->words
3061 + offset)),
3062 const0_rtx);
3064 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3068 /* Return number of words, at the beginning of the argument, that must be
3069 put in registers. 0 is the argument is entirely in registers or entirely
3070 in memory. */
3073 ia64_function_arg_partial_nregs (cum, mode, type, named)
3074 CUMULATIVE_ARGS *cum;
3075 enum machine_mode mode;
3076 tree type;
3077 int named ATTRIBUTE_UNUSED;
3079 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3080 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3081 / UNITS_PER_WORD);
3082 int offset = 0;
3084 /* Arguments with alignment larger than 8 bytes start at the next even
3085 boundary. */
3086 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3087 : (words > 1))
3088 && (cum->words & 1))
3089 offset = 1;
3091 /* If all argument slots are used, then it must go on the stack. */
3092 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3093 return 0;
3095 /* It doesn't matter whether the argument goes in FR or GR regs. If
3096 it fits within the 8 argument slots, then it goes entirely in
3097 registers. If it extends past the last argument slot, then the rest
3098 goes on the stack. */
3100 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3101 return 0;
3103 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3106 /* Update CUM to point after this argument. This is patterned after
3107 ia64_function_arg. */
3109 void
3110 ia64_function_arg_advance (cum, mode, type, named)
3111 CUMULATIVE_ARGS *cum;
3112 enum machine_mode mode;
3113 tree type;
3114 int named;
3116 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3117 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3118 / UNITS_PER_WORD);
3119 int offset = 0;
3120 enum machine_mode hfa_mode = VOIDmode;
3122 /* If all arg slots are already full, then there is nothing to do. */
3123 if (cum->words >= MAX_ARGUMENT_SLOTS)
3124 return;
3126 /* Arguments with alignment larger than 8 bytes start at the next even
3127 boundary. */
3128 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3129 : (words > 1))
3130 && (cum->words & 1))
3131 offset = 1;
3133 cum->words += words + offset;
3135 /* Check for and handle homogeneous FP aggregates. */
3136 if (type)
3137 hfa_mode = hfa_element_mode (type, 0);
3139 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3140 and unprototyped hfas are passed specially. */
3141 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3143 int fp_regs = cum->fp_regs;
3144 /* This is the original value of cum->words + offset. */
3145 int int_regs = cum->words - words;
3146 int hfa_size = GET_MODE_SIZE (hfa_mode);
3147 int byte_size;
3148 int args_byte_size;
3150 /* If prototyped, pass it in FR regs then GR regs.
3151 If not prototyped, pass it in both FR and GR regs.
3153 If this is an SFmode aggregate, then it is possible to run out of
3154 FR regs while GR regs are still left. In that case, we pass the
3155 remaining part in the GR regs. */
3157 /* Fill the FP regs. We do this always. We stop if we reach the end
3158 of the argument, the last FP register, or the last argument slot. */
3160 byte_size = ((mode == BLKmode)
3161 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3162 args_byte_size = int_regs * UNITS_PER_WORD;
3163 offset = 0;
3164 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3165 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3167 offset += hfa_size;
3168 args_byte_size += hfa_size;
3169 fp_regs++;
3172 cum->fp_regs = fp_regs;
3175 /* Integral and aggregates go in general registers. If we have run out of
3176 FR registers, then FP values must also go in general registers. This can
3177 happen when we have a SFmode HFA. */
3178 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3179 cum->int_regs = cum->words;
3181 /* If there is a prototype, then FP values go in a FR register when
3182 named, and in a GR registeer when unnamed. */
3183 else if (cum->prototype)
3185 if (! named)
3186 cum->int_regs = cum->words;
3187 else
3188 /* ??? Complex types should not reach here. */
3189 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3191 /* If there is no prototype, then FP values go in both FR and GR
3192 registers. */
3193 else
3195 /* ??? Complex types should not reach here. */
3196 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3197 cum->int_regs = cum->words;
3201 /* Variable sized types are passed by reference. */
3202 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3205 ia64_function_arg_pass_by_reference (cum, mode, type, named)
3206 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
3207 enum machine_mode mode ATTRIBUTE_UNUSED;
3208 tree type;
3209 int named ATTRIBUTE_UNUSED;
3211 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3214 /* Implement va_start. */
3216 void
3217 ia64_va_start (stdarg_p, valist, nextarg)
3218 int stdarg_p;
3219 tree valist;
3220 rtx nextarg;
3222 int arg_words;
3223 int ofs;
3225 arg_words = current_function_args_info.words;
3227 if (stdarg_p)
3228 ofs = 0;
3229 else
3230 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
3232 nextarg = plus_constant (nextarg, ofs);
3233 std_expand_builtin_va_start (1, valist, nextarg);
3236 /* Implement va_arg. */
3239 ia64_va_arg (valist, type)
3240 tree valist, type;
3242 tree t;
3244 /* Variable sized types are passed by reference. */
3245 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3247 rtx addr = std_expand_builtin_va_arg (valist, build_pointer_type (type));
3248 return gen_rtx_MEM (ptr_mode, force_reg (Pmode, addr));
3251 /* Arguments with alignment larger than 8 bytes start at the next even
3252 boundary. */
3253 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3255 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3256 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3257 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3258 build_int_2 (-2 * UNITS_PER_WORD, -1));
3259 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3260 TREE_SIDE_EFFECTS (t) = 1;
3261 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3264 return std_expand_builtin_va_arg (valist, type);
3267 /* Return 1 if function return value returned in memory. Return 0 if it is
3268 in a register. */
3271 ia64_return_in_memory (valtype)
3272 tree valtype;
3274 enum machine_mode mode;
3275 enum machine_mode hfa_mode;
3276 HOST_WIDE_INT byte_size;
3278 mode = TYPE_MODE (valtype);
3279 byte_size = GET_MODE_SIZE (mode);
3280 if (mode == BLKmode)
3282 byte_size = int_size_in_bytes (valtype);
3283 if (byte_size < 0)
3284 return 1;
3287 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3289 hfa_mode = hfa_element_mode (valtype, 0);
3290 if (hfa_mode != VOIDmode)
3292 int hfa_size = GET_MODE_SIZE (hfa_mode);
3294 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3295 return 1;
3296 else
3297 return 0;
3299 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3300 return 1;
3301 else
3302 return 0;
3305 /* Return rtx for register that holds the function return value. */
3308 ia64_function_value (valtype, func)
3309 tree valtype;
3310 tree func ATTRIBUTE_UNUSED;
3312 enum machine_mode mode;
3313 enum machine_mode hfa_mode;
3315 mode = TYPE_MODE (valtype);
3316 hfa_mode = hfa_element_mode (valtype, 0);
3318 if (hfa_mode != VOIDmode)
3320 rtx loc[8];
3321 int i;
3322 int hfa_size;
3323 int byte_size;
3324 int offset;
3326 hfa_size = GET_MODE_SIZE (hfa_mode);
3327 byte_size = ((mode == BLKmode)
3328 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3329 offset = 0;
3330 for (i = 0; offset < byte_size; i++)
3332 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3333 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3334 GEN_INT (offset));
3335 offset += hfa_size;
3338 if (i == 1)
3339 return XEXP (loc[0], 0);
3340 else
3341 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3343 else if (FLOAT_TYPE_P (valtype) &&
3344 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
3345 return gen_rtx_REG (mode, FR_ARG_FIRST);
3346 else
3347 return gen_rtx_REG (mode, GR_RET_FIRST);
3350 /* Print a memory address as an operand to reference that memory location. */
3352 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3353 also call this from ia64_print_operand for memory addresses. */
3355 void
3356 ia64_print_operand_address (stream, address)
3357 FILE * stream ATTRIBUTE_UNUSED;
3358 rtx address ATTRIBUTE_UNUSED;
3362 /* Print an operand to an assembler instruction.
3363 C Swap and print a comparison operator.
3364 D Print an FP comparison operator.
3365 E Print 32 - constant, for SImode shifts as extract.
3366 e Print 64 - constant, for DImode rotates.
3367 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3368 a floating point register emitted normally.
3369 I Invert a predicate register by adding 1.
3370 J Select the proper predicate register for a condition.
3371 j Select the inverse predicate register for a condition.
3372 O Append .acq for volatile load.
3373 P Postincrement of a MEM.
3374 Q Append .rel for volatile store.
3375 S Shift amount for shladd instruction.
3376 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3377 for Intel assembler.
3378 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3379 for Intel assembler.
3380 r Print register name, or constant 0 as r0. HP compatibility for
3381 Linux kernel. */
3382 void
3383 ia64_print_operand (file, x, code)
3384 FILE * file;
3385 rtx x;
3386 int code;
3388 const char *str;
3390 switch (code)
3392 case 0:
3393 /* Handled below. */
3394 break;
3396 case 'C':
3398 enum rtx_code c = swap_condition (GET_CODE (x));
3399 fputs (GET_RTX_NAME (c), file);
3400 return;
3403 case 'D':
3404 switch (GET_CODE (x))
3406 case NE:
3407 str = "neq";
3408 break;
3409 case UNORDERED:
3410 str = "unord";
3411 break;
3412 case ORDERED:
3413 str = "ord";
3414 break;
3415 default:
3416 str = GET_RTX_NAME (GET_CODE (x));
3417 break;
3419 fputs (str, file);
3420 return;
3422 case 'E':
3423 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3424 return;
3426 case 'e':
3427 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3428 return;
3430 case 'F':
3431 if (x == CONST0_RTX (GET_MODE (x)))
3432 str = reg_names [FR_REG (0)];
3433 else if (x == CONST1_RTX (GET_MODE (x)))
3434 str = reg_names [FR_REG (1)];
3435 else if (GET_CODE (x) == REG)
3436 str = reg_names [REGNO (x)];
3437 else
3438 abort ();
3439 fputs (str, file);
3440 return;
3442 case 'I':
3443 fputs (reg_names [REGNO (x) + 1], file);
3444 return;
3446 case 'J':
3447 case 'j':
3449 unsigned int regno = REGNO (XEXP (x, 0));
3450 if (GET_CODE (x) == EQ)
3451 regno += 1;
3452 if (code == 'j')
3453 regno ^= 1;
3454 fputs (reg_names [regno], file);
3456 return;
3458 case 'O':
3459 if (MEM_VOLATILE_P (x))
3460 fputs(".acq", file);
3461 return;
3463 case 'P':
3465 HOST_WIDE_INT value;
3467 switch (GET_CODE (XEXP (x, 0)))
3469 default:
3470 return;
3472 case POST_MODIFY:
3473 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3474 if (GET_CODE (x) == CONST_INT)
3475 value = INTVAL (x);
3476 else if (GET_CODE (x) == REG)
3478 fprintf (file, ", %s", reg_names[REGNO (x)]);
3479 return;
3481 else
3482 abort ();
3483 break;
3485 case POST_INC:
3486 value = GET_MODE_SIZE (GET_MODE (x));
3487 break;
3489 case POST_DEC:
3490 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3491 break;
3494 putc (',', file);
3495 putc (' ', file);
3496 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
3497 return;
3500 case 'Q':
3501 if (MEM_VOLATILE_P (x))
3502 fputs(".rel", file);
3503 return;
3505 case 'S':
3506 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3507 return;
3509 case 'T':
3510 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3512 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3513 return;
3515 break;
3517 case 'U':
3518 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3520 const char *prefix = "0x";
3521 if (INTVAL (x) & 0x80000000)
3523 fprintf (file, "0xffffffff");
3524 prefix = "";
3526 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3527 return;
3529 break;
3531 case 'r':
3532 /* If this operand is the constant zero, write it as register zero.
3533 Any register, zero, or CONST_INT value is OK here. */
3534 if (GET_CODE (x) == REG)
3535 fputs (reg_names[REGNO (x)], file);
3536 else if (x == CONST0_RTX (GET_MODE (x)))
3537 fputs ("r0", file);
3538 else if (GET_CODE (x) == CONST_INT)
3539 output_addr_const (file, x);
3540 else
3541 output_operand_lossage ("invalid %%r value");
3542 return;
3544 case '+':
3546 const char *which;
3548 /* For conditional branches, returns or calls, substitute
3549 sptk, dptk, dpnt, or spnt for %s. */
3550 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3551 if (x)
3553 int pred_val = INTVAL (XEXP (x, 0));
3555 /* Guess top and bottom 10% statically predicted. */
3556 if (pred_val < REG_BR_PROB_BASE / 50)
3557 which = ".spnt";
3558 else if (pred_val < REG_BR_PROB_BASE / 2)
3559 which = ".dpnt";
3560 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3561 which = ".dptk";
3562 else
3563 which = ".sptk";
3565 else if (GET_CODE (current_output_insn) == CALL_INSN)
3566 which = ".sptk";
3567 else
3568 which = ".dptk";
3570 fputs (which, file);
3571 return;
3574 case ',':
3575 x = current_insn_predicate;
3576 if (x)
3578 unsigned int regno = REGNO (XEXP (x, 0));
3579 if (GET_CODE (x) == EQ)
3580 regno += 1;
3581 fprintf (file, "(%s) ", reg_names [regno]);
3583 return;
3585 default:
3586 output_operand_lossage ("ia64_print_operand: unknown code");
3587 return;
3590 switch (GET_CODE (x))
3592 /* This happens for the spill/restore instructions. */
3593 case POST_INC:
3594 case POST_DEC:
3595 case POST_MODIFY:
3596 x = XEXP (x, 0);
3597 /* ... fall through ... */
3599 case REG:
3600 fputs (reg_names [REGNO (x)], file);
3601 break;
3603 case MEM:
3605 rtx addr = XEXP (x, 0);
3606 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
3607 addr = XEXP (addr, 0);
3608 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3609 break;
3612 default:
3613 output_addr_const (file, x);
3614 break;
3617 return;
3620 /* Calulate the cost of moving data from a register in class FROM to
3621 one in class TO, using MODE. */
3624 ia64_register_move_cost (mode, from, to)
3625 enum machine_mode mode;
3626 enum reg_class from, to;
3628 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
3629 if (to == ADDL_REGS)
3630 to = GR_REGS;
3631 if (from == ADDL_REGS)
3632 from = GR_REGS;
3634 /* All costs are symmetric, so reduce cases by putting the
3635 lower number class as the destination. */
3636 if (from < to)
3638 enum reg_class tmp = to;
3639 to = from, from = tmp;
3642 /* Moving from FR<->GR in TFmode must be more expensive than 2,
3643 so that we get secondary memory reloads. Between FR_REGS,
3644 we have to make this at least as expensive as MEMORY_MOVE_COST
3645 to avoid spectacularly poor register class preferencing. */
3646 if (mode == TFmode)
3648 if (to != GR_REGS || from != GR_REGS)
3649 return MEMORY_MOVE_COST (mode, to, 0);
3650 else
3651 return 3;
3654 switch (to)
3656 case PR_REGS:
3657 /* Moving between PR registers takes two insns. */
3658 if (from == PR_REGS)
3659 return 3;
3660 /* Moving between PR and anything but GR is impossible. */
3661 if (from != GR_REGS)
3662 return MEMORY_MOVE_COST (mode, to, 0);
3663 break;
3665 case BR_REGS:
3666 /* Moving between BR and anything but GR is impossible. */
3667 if (from != GR_REGS && from != GR_AND_BR_REGS)
3668 return MEMORY_MOVE_COST (mode, to, 0);
3669 break;
3671 case AR_I_REGS:
3672 case AR_M_REGS:
3673 /* Moving between AR and anything but GR is impossible. */
3674 if (from != GR_REGS)
3675 return MEMORY_MOVE_COST (mode, to, 0);
3676 break;
3678 case GR_REGS:
3679 case FR_REGS:
3680 case GR_AND_FR_REGS:
3681 case GR_AND_BR_REGS:
3682 case ALL_REGS:
3683 break;
3685 default:
3686 abort ();
3689 return 2;
3692 /* This function returns the register class required for a secondary
3693 register when copying between one of the registers in CLASS, and X,
3694 using MODE. A return value of NO_REGS means that no secondary register
3695 is required. */
3697 enum reg_class
3698 ia64_secondary_reload_class (class, mode, x)
3699 enum reg_class class;
3700 enum machine_mode mode ATTRIBUTE_UNUSED;
3701 rtx x;
3703 int regno = -1;
3705 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3706 regno = true_regnum (x);
3708 switch (class)
3710 case BR_REGS:
3711 case AR_M_REGS:
3712 case AR_I_REGS:
3713 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
3714 interaction. We end up with two pseudos with overlapping lifetimes
3715 both of which are equiv to the same constant, and both which need
3716 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
3717 changes depending on the path length, which means the qty_first_reg
3718 check in make_regs_eqv can give different answers at different times.
3719 At some point I'll probably need a reload_indi pattern to handle
3720 this.
3722 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
3723 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
3724 non-general registers for good measure. */
3725 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
3726 return GR_REGS;
3728 /* This is needed if a pseudo used as a call_operand gets spilled to a
3729 stack slot. */
3730 if (GET_CODE (x) == MEM)
3731 return GR_REGS;
3732 break;
3734 case FR_REGS:
3735 /* Need to go through general regsters to get to other class regs. */
3736 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
3737 return GR_REGS;
3739 /* This can happen when a paradoxical subreg is an operand to the
3740 muldi3 pattern. */
3741 /* ??? This shouldn't be necessary after instruction scheduling is
3742 enabled, because paradoxical subregs are not accepted by
3743 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3744 stop the paradoxical subreg stupidity in the *_operand functions
3745 in recog.c. */
3746 if (GET_CODE (x) == MEM
3747 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3748 || GET_MODE (x) == QImode))
3749 return GR_REGS;
3751 /* This can happen because of the ior/and/etc patterns that accept FP
3752 registers as operands. If the third operand is a constant, then it
3753 needs to be reloaded into a FP register. */
3754 if (GET_CODE (x) == CONST_INT)
3755 return GR_REGS;
3757 /* This can happen because of register elimination in a muldi3 insn.
3758 E.g. `26107 * (unsigned long)&u'. */
3759 if (GET_CODE (x) == PLUS)
3760 return GR_REGS;
3761 break;
3763 case PR_REGS:
3764 /* ??? This happens if we cse/gcse a BImode value across a call,
3765 and the function has a nonlocal goto. This is because global
3766 does not allocate call crossing pseudos to hard registers when
3767 current_function_has_nonlocal_goto is true. This is relatively
3768 common for C++ programs that use exceptions. To reproduce,
3769 return NO_REGS and compile libstdc++. */
3770 if (GET_CODE (x) == MEM)
3771 return GR_REGS;
3773 /* This can happen when we take a BImode subreg of a DImode value,
3774 and that DImode value winds up in some non-GR register. */
3775 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
3776 return GR_REGS;
3777 break;
3779 case GR_REGS:
3780 /* Since we have no offsettable memory addresses, we need a temporary
3781 to hold the address of the second word. */
3782 if (mode == TImode)
3783 return GR_REGS;
3784 break;
3786 default:
3787 break;
3790 return NO_REGS;
3794 /* Emit text to declare externally defined variables and functions, because
3795 the Intel assembler does not support undefined externals. */
3797 void
3798 ia64_asm_output_external (file, decl, name)
3799 FILE *file;
3800 tree decl;
3801 const char *name;
3803 int save_referenced;
3805 /* GNU as does not need anything here. */
3806 if (TARGET_GNU_AS)
3807 return;
3809 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3810 the linker when we do this, so we need to be careful not to do this for
3811 builtin functions which have no library equivalent. Unfortunately, we
3812 can't tell here whether or not a function will actually be called by
3813 expand_expr, so we pull in library functions even if we may not need
3814 them later. */
3815 if (! strcmp (name, "__builtin_next_arg")
3816 || ! strcmp (name, "alloca")
3817 || ! strcmp (name, "__builtin_constant_p")
3818 || ! strcmp (name, "__builtin_args_info"))
3819 return;
3821 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3822 restore it. */
3823 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
3824 if (TREE_CODE (decl) == FUNCTION_DECL)
3826 fprintf (file, "%s", TYPE_ASM_OP);
3827 assemble_name (file, name);
3828 putc (',', file);
3829 fprintf (file, TYPE_OPERAND_FMT, "function");
3830 putc ('\n', file);
3832 ASM_GLOBALIZE_LABEL (file, name);
3833 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
3836 /* Parse the -mfixed-range= option string. */
3838 static void
3839 fix_range (const_str)
3840 const char *const_str;
3842 int i, first, last;
3843 char *str, *dash, *comma;
3845 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3846 REG2 are either register names or register numbers. The effect
3847 of this option is to mark the registers in the range from REG1 to
3848 REG2 as ``fixed'' so they won't be used by the compiler. This is
3849 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3851 i = strlen (const_str);
3852 str = (char *) alloca (i + 1);
3853 memcpy (str, const_str, i + 1);
3855 while (1)
3857 dash = strchr (str, '-');
3858 if (!dash)
3860 warning ("value of -mfixed-range must have form REG1-REG2");
3861 return;
3863 *dash = '\0';
3865 comma = strchr (dash + 1, ',');
3866 if (comma)
3867 *comma = '\0';
3869 first = decode_reg_name (str);
3870 if (first < 0)
3872 warning ("unknown register name: %s", str);
3873 return;
3876 last = decode_reg_name (dash + 1);
3877 if (last < 0)
3879 warning ("unknown register name: %s", dash + 1);
3880 return;
3883 *dash = '-';
3885 if (first > last)
3887 warning ("%s-%s is an empty range", str, dash + 1);
3888 return;
3891 for (i = first; i <= last; ++i)
3892 fixed_regs[i] = call_used_regs[i] = 1;
3894 if (!comma)
3895 break;
3897 *comma = ',';
3898 str = comma + 1;
3902 /* Called to register all of our global variables with the garbage
3903 collector. */
3905 static void
3906 ia64_add_gc_roots ()
3908 ggc_add_rtx_root (&ia64_compare_op0, 1);
3909 ggc_add_rtx_root (&ia64_compare_op1, 1);
3912 static void
3913 ia64_init_machine_status (p)
3914 struct function *p;
3916 p->machine =
3917 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
3920 static void
3921 ia64_mark_machine_status (p)
3922 struct function *p;
3924 struct machine_function *machine = p->machine;
3926 if (machine)
3928 ggc_mark_rtx (machine->ia64_eh_epilogue_sp);
3929 ggc_mark_rtx (machine->ia64_eh_epilogue_bsp);
3930 ggc_mark_rtx (machine->ia64_gp_save);
3934 static void
3935 ia64_free_machine_status (p)
3936 struct function *p;
3938 free (p->machine);
3939 p->machine = NULL;
3942 /* Handle TARGET_OPTIONS switches. */
3944 void
3945 ia64_override_options ()
3947 if (TARGET_AUTO_PIC)
3948 target_flags |= MASK_CONST_GP;
3950 if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
3952 warning ("cannot optimize division for both latency and throughput");
3953 target_flags &= ~MASK_INLINE_DIV_THR;
3956 if (ia64_fixed_range_string)
3957 fix_range (ia64_fixed_range_string);
3959 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
3960 flag_schedule_insns_after_reload = 0;
3962 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
3964 init_machine_status = ia64_init_machine_status;
3965 mark_machine_status = ia64_mark_machine_status;
3966 free_machine_status = ia64_free_machine_status;
3968 ia64_add_gc_roots ();
3971 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
3972 static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
3973 static enum attr_type ia64_safe_type PARAMS((rtx));
3975 static enum attr_itanium_requires_unit0
3976 ia64_safe_itanium_requires_unit0 (insn)
3977 rtx insn;
3979 if (recog_memoized (insn) >= 0)
3980 return get_attr_itanium_requires_unit0 (insn);
3981 else
3982 return ITANIUM_REQUIRES_UNIT0_NO;
3985 static enum attr_itanium_class
3986 ia64_safe_itanium_class (insn)
3987 rtx insn;
3989 if (recog_memoized (insn) >= 0)
3990 return get_attr_itanium_class (insn);
3991 else
3992 return ITANIUM_CLASS_UNKNOWN;
3995 static enum attr_type
3996 ia64_safe_type (insn)
3997 rtx insn;
3999 if (recog_memoized (insn) >= 0)
4000 return get_attr_type (insn);
4001 else
4002 return TYPE_UNKNOWN;
4005 /* The following collection of routines emit instruction group stop bits as
4006 necessary to avoid dependencies. */
4008 /* Need to track some additional registers as far as serialization is
4009 concerned so we can properly handle br.call and br.ret. We could
4010 make these registers visible to gcc, but since these registers are
4011 never explicitly used in gcc generated code, it seems wasteful to
4012 do so (plus it would make the call and return patterns needlessly
4013 complex). */
4014 #define REG_GP (GR_REG (1))
4015 #define REG_RP (BR_REG (0))
4016 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4017 /* This is used for volatile asms which may require a stop bit immediately
4018 before and after them. */
4019 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4020 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4021 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4023 /* For each register, we keep track of how it has been written in the
4024 current instruction group.
4026 If a register is written unconditionally (no qualifying predicate),
4027 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4029 If a register is written if its qualifying predicate P is true, we
4030 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4031 may be written again by the complement of P (P^1) and when this happens,
4032 WRITE_COUNT gets set to 2.
4034 The result of this is that whenever an insn attempts to write a register
4035 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4037 If a predicate register is written by a floating-point insn, we set
4038 WRITTEN_BY_FP to true.
4040 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4041 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4043 struct reg_write_state
4045 unsigned int write_count : 2;
4046 unsigned int first_pred : 16;
4047 unsigned int written_by_fp : 1;
4048 unsigned int written_by_and : 1;
4049 unsigned int written_by_or : 1;
4052 /* Cumulative info for the current instruction group. */
4053 struct reg_write_state rws_sum[NUM_REGS];
4054 /* Info for the current instruction. This gets copied to rws_sum after a
4055 stop bit is emitted. */
4056 struct reg_write_state rws_insn[NUM_REGS];
4058 /* Indicates whether this is the first instruction after a stop bit,
4059 in which case we don't need another stop bit. Without this, we hit
4060 the abort in ia64_variable_issue when scheduling an alloc. */
4061 static int first_instruction;
4063 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4064 RTL for one instruction. */
4065 struct reg_flags
4067 unsigned int is_write : 1; /* Is register being written? */
4068 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4069 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4070 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4071 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4072 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4075 static void rws_update PARAMS ((struct reg_write_state *, int,
4076 struct reg_flags, int));
4077 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
4078 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
4079 static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
4080 static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
4081 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
4082 static void init_insn_group_barriers PARAMS ((void));
4083 static int group_barrier_needed_p PARAMS ((rtx));
4084 static int safe_group_barrier_needed_p PARAMS ((rtx));
4086 /* Update *RWS for REGNO, which is being written by the current instruction,
4087 with predicate PRED, and associated register flags in FLAGS. */
4089 static void
4090 rws_update (rws, regno, flags, pred)
4091 struct reg_write_state *rws;
4092 int regno;
4093 struct reg_flags flags;
4094 int pred;
4096 if (pred)
4097 rws[regno].write_count++;
4098 else
4099 rws[regno].write_count = 2;
4100 rws[regno].written_by_fp |= flags.is_fp;
4101 /* ??? Not tracking and/or across differing predicates. */
4102 rws[regno].written_by_and = flags.is_and;
4103 rws[regno].written_by_or = flags.is_or;
4104 rws[regno].first_pred = pred;
4107 /* Handle an access to register REGNO of type FLAGS using predicate register
4108 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4109 a dependency with an earlier instruction in the same group. */
4111 static int
4112 rws_access_regno (regno, flags, pred)
4113 int regno;
4114 struct reg_flags flags;
4115 int pred;
4117 int need_barrier = 0;
4119 if (regno >= NUM_REGS)
4120 abort ();
4122 if (! PR_REGNO_P (regno))
4123 flags.is_and = flags.is_or = 0;
4125 if (flags.is_write)
4127 int write_count;
4129 /* One insn writes same reg multiple times? */
4130 if (rws_insn[regno].write_count > 0)
4131 abort ();
4133 /* Update info for current instruction. */
4134 rws_update (rws_insn, regno, flags, pred);
4135 write_count = rws_sum[regno].write_count;
4137 switch (write_count)
4139 case 0:
4140 /* The register has not been written yet. */
4141 rws_update (rws_sum, regno, flags, pred);
4142 break;
4144 case 1:
4145 /* The register has been written via a predicate. If this is
4146 not a complementary predicate, then we need a barrier. */
4147 /* ??? This assumes that P and P+1 are always complementary
4148 predicates for P even. */
4149 if (flags.is_and && rws_sum[regno].written_by_and)
4151 else if (flags.is_or && rws_sum[regno].written_by_or)
4153 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4154 need_barrier = 1;
4155 rws_update (rws_sum, regno, flags, pred);
4156 break;
4158 case 2:
4159 /* The register has been unconditionally written already. We
4160 need a barrier. */
4161 if (flags.is_and && rws_sum[regno].written_by_and)
4163 else if (flags.is_or && rws_sum[regno].written_by_or)
4165 else
4166 need_barrier = 1;
4167 rws_sum[regno].written_by_and = flags.is_and;
4168 rws_sum[regno].written_by_or = flags.is_or;
4169 break;
4171 default:
4172 abort ();
4175 else
4177 if (flags.is_branch)
4179 /* Branches have several RAW exceptions that allow to avoid
4180 barriers. */
4182 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4183 /* RAW dependencies on branch regs are permissible as long
4184 as the writer is a non-branch instruction. Since we
4185 never generate code that uses a branch register written
4186 by a branch instruction, handling this case is
4187 easy. */
4188 return 0;
4190 if (REGNO_REG_CLASS (regno) == PR_REGS
4191 && ! rws_sum[regno].written_by_fp)
4192 /* The predicates of a branch are available within the
4193 same insn group as long as the predicate was written by
4194 something other than a floating-point instruction. */
4195 return 0;
4198 if (flags.is_and && rws_sum[regno].written_by_and)
4199 return 0;
4200 if (flags.is_or && rws_sum[regno].written_by_or)
4201 return 0;
4203 switch (rws_sum[regno].write_count)
4205 case 0:
4206 /* The register has not been written yet. */
4207 break;
4209 case 1:
4210 /* The register has been written via a predicate. If this is
4211 not a complementary predicate, then we need a barrier. */
4212 /* ??? This assumes that P and P+1 are always complementary
4213 predicates for P even. */
4214 if ((rws_sum[regno].first_pred ^ 1) != pred)
4215 need_barrier = 1;
4216 break;
4218 case 2:
4219 /* The register has been unconditionally written already. We
4220 need a barrier. */
4221 need_barrier = 1;
4222 break;
4224 default:
4225 abort ();
4229 return need_barrier;
4232 static int
4233 rws_access_reg (reg, flags, pred)
4234 rtx reg;
4235 struct reg_flags flags;
4236 int pred;
4238 int regno = REGNO (reg);
4239 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4241 if (n == 1)
4242 return rws_access_regno (regno, flags, pred);
4243 else
4245 int need_barrier = 0;
4246 while (--n >= 0)
4247 need_barrier |= rws_access_regno (regno + n, flags, pred);
4248 return need_barrier;
4252 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4253 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4255 static void
4256 update_set_flags (x, pflags, ppred, pcond)
4257 rtx x;
4258 struct reg_flags *pflags;
4259 int *ppred;
4260 rtx *pcond;
4262 rtx src = SET_SRC (x);
4264 *pcond = 0;
4266 switch (GET_CODE (src))
4268 case CALL:
4269 return;
4271 case IF_THEN_ELSE:
4272 if (SET_DEST (x) == pc_rtx)
4273 /* X is a conditional branch. */
4274 return;
4275 else
4277 int is_complemented = 0;
4279 /* X is a conditional move. */
4280 rtx cond = XEXP (src, 0);
4281 if (GET_CODE (cond) == EQ)
4282 is_complemented = 1;
4283 cond = XEXP (cond, 0);
4284 if (GET_CODE (cond) != REG
4285 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4286 abort ();
4287 *pcond = cond;
4288 if (XEXP (src, 1) == SET_DEST (x)
4289 || XEXP (src, 2) == SET_DEST (x))
4291 /* X is a conditional move that conditionally writes the
4292 destination. */
4294 /* We need another complement in this case. */
4295 if (XEXP (src, 1) == SET_DEST (x))
4296 is_complemented = ! is_complemented;
4298 *ppred = REGNO (cond);
4299 if (is_complemented)
4300 ++*ppred;
4303 /* ??? If this is a conditional write to the dest, then this
4304 instruction does not actually read one source. This probably
4305 doesn't matter, because that source is also the dest. */
4306 /* ??? Multiple writes to predicate registers are allowed
4307 if they are all AND type compares, or if they are all OR
4308 type compares. We do not generate such instructions
4309 currently. */
4311 /* ... fall through ... */
4313 default:
4314 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4315 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4316 /* Set pflags->is_fp to 1 so that we know we're dealing
4317 with a floating point comparison when processing the
4318 destination of the SET. */
4319 pflags->is_fp = 1;
4321 /* Discover if this is a parallel comparison. We only handle
4322 and.orcm and or.andcm at present, since we must retain a
4323 strict inverse on the predicate pair. */
4324 else if (GET_CODE (src) == AND)
4325 pflags->is_and = 1;
4326 else if (GET_CODE (src) == IOR)
4327 pflags->is_or = 1;
4329 break;
4333 /* Subroutine of rtx_needs_barrier; this function determines whether the
4334 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4335 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4336 for this insn. */
4338 static int
4339 set_src_needs_barrier (x, flags, pred, cond)
4340 rtx x;
4341 struct reg_flags flags;
4342 int pred;
4343 rtx cond;
4345 int need_barrier = 0;
4346 rtx dst;
4347 rtx src = SET_SRC (x);
4349 if (GET_CODE (src) == CALL)
4350 /* We don't need to worry about the result registers that
4351 get written by subroutine call. */
4352 return rtx_needs_barrier (src, flags, pred);
4353 else if (SET_DEST (x) == pc_rtx)
4355 /* X is a conditional branch. */
4356 /* ??? This seems redundant, as the caller sets this bit for
4357 all JUMP_INSNs. */
4358 flags.is_branch = 1;
4359 return rtx_needs_barrier (src, flags, pred);
4362 need_barrier = rtx_needs_barrier (src, flags, pred);
4364 /* This instruction unconditionally uses a predicate register. */
4365 if (cond)
4366 need_barrier |= rws_access_reg (cond, flags, 0);
4368 dst = SET_DEST (x);
4369 if (GET_CODE (dst) == ZERO_EXTRACT)
4371 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4372 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4373 dst = XEXP (dst, 0);
4375 return need_barrier;
4378 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4379 Return 1 is this access creates a dependency with an earlier instruction
4380 in the same group. */
4382 static int
4383 rtx_needs_barrier (x, flags, pred)
4384 rtx x;
4385 struct reg_flags flags;
4386 int pred;
4388 int i, j;
4389 int is_complemented = 0;
4390 int need_barrier = 0;
4391 const char *format_ptr;
4392 struct reg_flags new_flags;
4393 rtx cond = 0;
4395 if (! x)
4396 return 0;
4398 new_flags = flags;
4400 switch (GET_CODE (x))
4402 case SET:
4403 update_set_flags (x, &new_flags, &pred, &cond);
4404 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4405 if (GET_CODE (SET_SRC (x)) != CALL)
4407 new_flags.is_write = 1;
4408 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4410 break;
4412 case CALL:
4413 new_flags.is_write = 0;
4414 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4416 /* Avoid multiple register writes, in case this is a pattern with
4417 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4418 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4420 new_flags.is_write = 1;
4421 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4422 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4423 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4425 break;
4427 case COND_EXEC:
4428 /* X is a predicated instruction. */
4430 cond = COND_EXEC_TEST (x);
4431 if (pred)
4432 abort ();
4433 need_barrier = rtx_needs_barrier (cond, flags, 0);
4435 if (GET_CODE (cond) == EQ)
4436 is_complemented = 1;
4437 cond = XEXP (cond, 0);
4438 if (GET_CODE (cond) != REG
4439 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4440 abort ();
4441 pred = REGNO (cond);
4442 if (is_complemented)
4443 ++pred;
4445 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4446 return need_barrier;
4448 case CLOBBER:
4449 case USE:
4450 /* Clobber & use are for earlier compiler-phases only. */
4451 break;
4453 case ASM_OPERANDS:
4454 case ASM_INPUT:
4455 /* We always emit stop bits for traditional asms. We emit stop bits
4456 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4457 if (GET_CODE (x) != ASM_OPERANDS
4458 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4460 /* Avoid writing the register multiple times if we have multiple
4461 asm outputs. This avoids an abort in rws_access_reg. */
4462 if (! rws_insn[REG_VOLATILE].write_count)
4464 new_flags.is_write = 1;
4465 rws_access_regno (REG_VOLATILE, new_flags, pred);
4467 return 1;
4470 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4471 We can not just fall through here since then we would be confused
4472 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4473 traditional asms unlike their normal usage. */
4475 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4476 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4477 need_barrier = 1;
4478 break;
4480 case PARALLEL:
4481 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4483 rtx pat = XVECEXP (x, 0, i);
4484 if (GET_CODE (pat) == SET)
4486 update_set_flags (pat, &new_flags, &pred, &cond);
4487 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4489 else if (GET_CODE (pat) == USE
4490 || GET_CODE (pat) == CALL
4491 || GET_CODE (pat) == ASM_OPERANDS)
4492 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4493 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4494 abort ();
4496 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4498 rtx pat = XVECEXP (x, 0, i);
4499 if (GET_CODE (pat) == SET)
4501 if (GET_CODE (SET_SRC (pat)) != CALL)
4503 new_flags.is_write = 1;
4504 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4505 pred);
4508 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
4509 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4511 break;
4513 case SUBREG:
4514 x = SUBREG_REG (x);
4515 /* FALLTHRU */
4516 case REG:
4517 if (REGNO (x) == AR_UNAT_REGNUM)
4519 for (i = 0; i < 64; ++i)
4520 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4522 else
4523 need_barrier = rws_access_reg (x, flags, pred);
4524 break;
4526 case MEM:
4527 /* Find the regs used in memory address computation. */
4528 new_flags.is_write = 0;
4529 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4530 break;
4532 case CONST_INT: case CONST_DOUBLE:
4533 case SYMBOL_REF: case LABEL_REF: case CONST:
4534 break;
4536 /* Operators with side-effects. */
4537 case POST_INC: case POST_DEC:
4538 if (GET_CODE (XEXP (x, 0)) != REG)
4539 abort ();
4541 new_flags.is_write = 0;
4542 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4543 new_flags.is_write = 1;
4544 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4545 break;
4547 case POST_MODIFY:
4548 if (GET_CODE (XEXP (x, 0)) != REG)
4549 abort ();
4551 new_flags.is_write = 0;
4552 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4553 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4554 new_flags.is_write = 1;
4555 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4556 break;
4558 /* Handle common unary and binary ops for efficiency. */
4559 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4560 case MOD: case UDIV: case UMOD: case AND: case IOR:
4561 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4562 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4563 case NE: case EQ: case GE: case GT: case LE:
4564 case LT: case GEU: case GTU: case LEU: case LTU:
4565 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4566 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4567 break;
4569 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4570 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4571 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4572 case SQRT: case FFS:
4573 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4574 break;
4576 case UNSPEC:
4577 switch (XINT (x, 1))
4579 case 1: /* st8.spill */
4580 case 2: /* ld8.fill */
4582 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4583 HOST_WIDE_INT bit = (offset >> 3) & 63;
4585 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4586 new_flags.is_write = (XINT (x, 1) == 1);
4587 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4588 new_flags, pred);
4589 break;
4592 case 3: /* stf.spill */
4593 case 4: /* ldf.spill */
4594 case 8: /* popcnt */
4595 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4596 break;
4598 case 7: /* pred_rel_mutex */
4599 case 9: /* pic call */
4600 case 12: /* mf */
4601 case 19: /* fetchadd_acq */
4602 case 20: /* mov = ar.bsp */
4603 case 21: /* flushrs */
4604 case 22: /* bundle selector */
4605 break;
4607 case 24: /* addp4 */
4608 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4609 break;
4611 case 5: /* recip_approx */
4612 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4613 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4614 break;
4616 case 13: /* cmpxchg_acq */
4617 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4618 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4619 break;
4621 default:
4622 abort ();
4624 break;
4626 case UNSPEC_VOLATILE:
4627 switch (XINT (x, 1))
4629 case 0: /* alloc */
4630 /* Alloc must always be the first instruction of a group.
4631 We force this by always returning true. */
4632 /* ??? We might get better scheduling if we explicitly check for
4633 input/local/output register dependencies, and modify the
4634 scheduler so that alloc is always reordered to the start of
4635 the current group. We could then eliminate all of the
4636 first_instruction code. */
4637 rws_access_regno (AR_PFS_REGNUM, flags, pred);
4639 new_flags.is_write = 1;
4640 rws_access_regno (REG_AR_CFM, new_flags, pred);
4641 return 1;
4643 case 1: /* blockage */
4644 case 2: /* insn group barrier */
4645 return 0;
4647 case 5: /* set_bsp */
4648 need_barrier = 1;
4649 break;
4651 case 7: /* pred.rel.mutex */
4652 case 8: /* safe_across_calls all */
4653 case 9: /* safe_across_calls normal */
4654 return 0;
4656 default:
4657 abort ();
4659 break;
4661 case RETURN:
4662 new_flags.is_write = 0;
4663 need_barrier = rws_access_regno (REG_RP, flags, pred);
4664 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
4666 new_flags.is_write = 1;
4667 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4668 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4669 break;
4671 default:
4672 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4673 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4674 switch (format_ptr[i])
4676 case '0': /* unused field */
4677 case 'i': /* integer */
4678 case 'n': /* note */
4679 case 'w': /* wide integer */
4680 case 's': /* pointer to string */
4681 case 'S': /* optional pointer to string */
4682 break;
4684 case 'e':
4685 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4686 need_barrier = 1;
4687 break;
4689 case 'E':
4690 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4691 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4692 need_barrier = 1;
4693 break;
4695 default:
4696 abort ();
4698 break;
4700 return need_barrier;
4703 /* Clear out the state for group_barrier_needed_p at the start of a
4704 sequence of insns. */
4706 static void
4707 init_insn_group_barriers ()
4709 memset (rws_sum, 0, sizeof (rws_sum));
4710 first_instruction = 1;
4713 /* Given the current state, recorded by previous calls to this function,
4714 determine whether a group barrier (a stop bit) is necessary before INSN.
4715 Return nonzero if so. */
4717 static int
4718 group_barrier_needed_p (insn)
4719 rtx insn;
4721 rtx pat;
4722 int need_barrier = 0;
4723 struct reg_flags flags;
4725 memset (&flags, 0, sizeof (flags));
4726 switch (GET_CODE (insn))
4728 case NOTE:
4729 break;
4731 case BARRIER:
4732 /* A barrier doesn't imply an instruction group boundary. */
4733 break;
4735 case CODE_LABEL:
4736 memset (rws_insn, 0, sizeof (rws_insn));
4737 return 1;
4739 case CALL_INSN:
4740 flags.is_branch = 1;
4741 flags.is_sibcall = SIBLING_CALL_P (insn);
4742 memset (rws_insn, 0, sizeof (rws_insn));
4744 /* Don't bundle a call following another call. */
4745 if ((pat = prev_active_insn (insn))
4746 && GET_CODE (pat) == CALL_INSN)
4748 need_barrier = 1;
4749 break;
4752 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
4753 break;
4755 case JUMP_INSN:
4756 flags.is_branch = 1;
4758 /* Don't bundle a jump following a call. */
4759 if ((pat = prev_active_insn (insn))
4760 && GET_CODE (pat) == CALL_INSN)
4762 need_barrier = 1;
4763 break;
4765 /* FALLTHRU */
4767 case INSN:
4768 if (GET_CODE (PATTERN (insn)) == USE
4769 || GET_CODE (PATTERN (insn)) == CLOBBER)
4770 /* Don't care about USE and CLOBBER "insns"---those are used to
4771 indicate to the optimizer that it shouldn't get rid of
4772 certain operations. */
4773 break;
4775 pat = PATTERN (insn);
4777 /* Ug. Hack hacks hacked elsewhere. */
4778 switch (recog_memoized (insn))
4780 /* We play dependency tricks with the epilogue in order
4781 to get proper schedules. Undo this for dv analysis. */
4782 case CODE_FOR_epilogue_deallocate_stack:
4783 case CODE_FOR_prologue_allocate_stack:
4784 pat = XVECEXP (pat, 0, 0);
4785 break;
4787 /* The pattern we use for br.cloop confuses the code above.
4788 The second element of the vector is representative. */
4789 case CODE_FOR_doloop_end_internal:
4790 pat = XVECEXP (pat, 0, 1);
4791 break;
4793 /* Doesn't generate code. */
4794 case CODE_FOR_pred_rel_mutex:
4795 case CODE_FOR_prologue_use:
4796 return 0;
4798 default:
4799 break;
4802 memset (rws_insn, 0, sizeof (rws_insn));
4803 need_barrier = rtx_needs_barrier (pat, flags, 0);
4805 /* Check to see if the previous instruction was a volatile
4806 asm. */
4807 if (! need_barrier)
4808 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
4809 break;
4811 default:
4812 abort ();
4815 if (first_instruction)
4817 need_barrier = 0;
4818 first_instruction = 0;
4821 return need_barrier;
4824 /* Like group_barrier_needed_p, but do not clobber the current state. */
4826 static int
4827 safe_group_barrier_needed_p (insn)
4828 rtx insn;
4830 struct reg_write_state rws_saved[NUM_REGS];
4831 int saved_first_instruction;
4832 int t;
4834 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
4835 saved_first_instruction = first_instruction;
4837 t = group_barrier_needed_p (insn);
4839 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
4840 first_instruction = saved_first_instruction;
4842 return t;
4845 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
4846 as necessary to eliminate dependendencies. This function assumes that
4847 a final instruction scheduling pass has been run which has already
4848 inserted most of the necessary stop bits. This function only inserts
4849 new ones at basic block boundaries, since these are invisible to the
4850 scheduler. */
4852 static void
4853 emit_insn_group_barriers (dump, insns)
4854 FILE *dump;
4855 rtx insns;
4857 rtx insn;
4858 rtx last_label = 0;
4859 int insns_since_last_label = 0;
4861 init_insn_group_barriers ();
4863 for (insn = insns; insn; insn = NEXT_INSN (insn))
4865 if (GET_CODE (insn) == CODE_LABEL)
4867 if (insns_since_last_label)
4868 last_label = insn;
4869 insns_since_last_label = 0;
4871 else if (GET_CODE (insn) == NOTE
4872 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4874 if (insns_since_last_label)
4875 last_label = insn;
4876 insns_since_last_label = 0;
4878 else if (GET_CODE (insn) == INSN
4879 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4880 && XINT (PATTERN (insn), 1) == 2)
4882 init_insn_group_barriers ();
4883 last_label = 0;
4885 else if (INSN_P (insn))
4887 insns_since_last_label = 1;
4889 if (group_barrier_needed_p (insn))
4891 if (last_label)
4893 if (dump)
4894 fprintf (dump, "Emitting stop before label %d\n",
4895 INSN_UID (last_label));
4896 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
4897 insn = last_label;
4899 init_insn_group_barriers ();
4900 last_label = 0;
4907 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4908 This function has to emit all necessary group barriers. */
4910 static void
4911 emit_all_insn_group_barriers (dump, insns)
4912 FILE *dump ATTRIBUTE_UNUSED;
4913 rtx insns;
4915 rtx insn;
4917 init_insn_group_barriers ();
4919 for (insn = insns; insn; insn = NEXT_INSN (insn))
4921 if (GET_CODE (insn) == BARRIER)
4923 rtx last = prev_active_insn (insn);
4925 if (! last)
4926 continue;
4927 if (GET_CODE (last) == JUMP_INSN
4928 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
4929 last = prev_active_insn (last);
4930 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
4931 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
4933 init_insn_group_barriers ();
4935 else if (INSN_P (insn))
4937 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
4938 init_insn_group_barriers ();
4939 else if (group_barrier_needed_p (insn))
4941 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4942 init_insn_group_barriers ();
4943 group_barrier_needed_p (insn);
4949 static int errata_find_address_regs PARAMS ((rtx *, void *));
4950 static void errata_emit_nops PARAMS ((rtx));
4951 static void fixup_errata PARAMS ((void));
4953 /* This structure is used to track some details about the previous insns
4954 groups so we can determine if it may be necessary to insert NOPs to
4955 workaround hardware errata. */
4956 static struct group
4958 HARD_REG_SET p_reg_set;
4959 HARD_REG_SET gr_reg_conditionally_set;
4960 } last_group[2];
4962 /* Index into the last_group array. */
4963 static int group_idx;
4965 /* Called through for_each_rtx; determines if a hard register that was
4966 conditionally set in the previous group is used as an address register.
4967 It ensures that for_each_rtx returns 1 in that case. */
4968 static int
4969 errata_find_address_regs (xp, data)
4970 rtx *xp;
4971 void *data ATTRIBUTE_UNUSED;
4973 rtx x = *xp;
4974 if (GET_CODE (x) != MEM)
4975 return 0;
4976 x = XEXP (x, 0);
4977 if (GET_CODE (x) == POST_MODIFY)
4978 x = XEXP (x, 0);
4979 if (GET_CODE (x) == REG)
4981 struct group *prev_group = last_group + (group_idx ^ 1);
4982 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
4983 REGNO (x)))
4984 return 1;
4985 return -1;
4987 return 0;
4990 /* Called for each insn; this function keeps track of the state in
4991 last_group and emits additional NOPs if necessary to work around
4992 an Itanium A/B step erratum. */
4993 static void
4994 errata_emit_nops (insn)
4995 rtx insn;
4997 struct group *this_group = last_group + group_idx;
4998 struct group *prev_group = last_group + (group_idx ^ 1);
4999 rtx pat = PATTERN (insn);
5000 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5001 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5002 enum attr_type type;
5003 rtx set = real_pat;
5005 if (GET_CODE (real_pat) == USE
5006 || GET_CODE (real_pat) == CLOBBER
5007 || GET_CODE (real_pat) == ASM_INPUT
5008 || GET_CODE (real_pat) == ADDR_VEC
5009 || GET_CODE (real_pat) == ADDR_DIFF_VEC
5010 || asm_noperands (PATTERN (insn)) >= 0)
5011 return;
5013 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5014 parts of it. */
5016 if (GET_CODE (set) == PARALLEL)
5018 int i;
5019 set = XVECEXP (real_pat, 0, 0);
5020 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5021 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5022 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5024 set = 0;
5025 break;
5029 if (set && GET_CODE (set) != SET)
5030 set = 0;
5032 type = get_attr_type (insn);
5034 if (type == TYPE_F
5035 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5036 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5038 if ((type == TYPE_M || type == TYPE_A) && cond && set
5039 && REG_P (SET_DEST (set))
5040 && GET_CODE (SET_SRC (set)) != PLUS
5041 && GET_CODE (SET_SRC (set)) != MINUS
5042 && (GET_CODE (SET_SRC (set)) != ASHIFT
5043 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5044 && (GET_CODE (SET_SRC (set)) != MEM
5045 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5046 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5048 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5049 || ! REG_P (XEXP (cond, 0)))
5050 abort ();
5052 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5053 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5055 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5057 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5058 emit_insn_before (gen_nop (), insn);
5059 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5060 group_idx = 0;
5061 memset (last_group, 0, sizeof last_group);
5065 /* Emit extra nops if they are required to work around hardware errata. */
5067 static void
5068 fixup_errata ()
5070 rtx insn;
5072 if (! TARGET_B_STEP)
5073 return;
5075 group_idx = 0;
5076 memset (last_group, 0, sizeof last_group);
5078 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5080 if (!INSN_P (insn))
5081 continue;
5083 if (ia64_safe_type (insn) == TYPE_S)
5085 group_idx ^= 1;
5086 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5088 else
5089 errata_emit_nops (insn);
5093 /* Instruction scheduling support. */
5094 /* Describe one bundle. */
5096 struct bundle
5098 /* Zero if there's no possibility of a stop in this bundle other than
5099 at the end, otherwise the position of the optional stop bit. */
5100 int possible_stop;
5101 /* The types of the three slots. */
5102 enum attr_type t[3];
5103 /* The pseudo op to be emitted into the assembler output. */
5104 const char *name;
5107 #define NR_BUNDLES 10
5109 /* A list of all available bundles. */
5111 static const struct bundle bundle[NR_BUNDLES] =
5113 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
5114 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
5115 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
5116 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
5117 #if NR_BUNDLES == 10
5118 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
5119 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
5120 #endif
5121 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
5122 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
5123 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
5124 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
5125 it matches an L type insn. Otherwise we'll try to generate L type
5126 nops. */
5127 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
5130 /* Describe a packet of instructions. Packets consist of two bundles that
5131 are visible to the hardware in one scheduling window. */
5133 struct ia64_packet
5135 const struct bundle *t1, *t2;
5136 /* Precomputed value of the first split issue in this packet if a cycle
5137 starts at its beginning. */
5138 int first_split;
5139 /* For convenience, the insn types are replicated here so we don't have
5140 to go through T1 and T2 all the time. */
5141 enum attr_type t[6];
5144 /* An array containing all possible packets. */
5145 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
5146 static struct ia64_packet packets[NR_PACKETS];
5148 /* Map attr_type to a string with the name. */
5150 static const char *const type_names[] =
5152 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
5155 /* Nonzero if we should insert stop bits into the schedule. */
5156 int ia64_final_schedule = 0;
5158 static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
5159 static rtx ia64_single_set PARAMS ((rtx));
5160 static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
5161 static void ia64_emit_insn_before PARAMS ((rtx, rtx));
5162 static void maybe_rotate PARAMS ((FILE *));
5163 static void finish_last_head PARAMS ((FILE *, int));
5164 static void rotate_one_bundle PARAMS ((FILE *));
5165 static void rotate_two_bundles PARAMS ((FILE *));
5166 static void nop_cycles_until PARAMS ((int, FILE *));
5167 static void cycle_end_fill_slots PARAMS ((FILE *));
5168 static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
5169 static int get_split PARAMS ((const struct ia64_packet *, int));
5170 static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
5171 const struct ia64_packet *, int));
5172 static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
5173 rtx *, enum attr_type *, int));
5174 static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
5175 static void dump_current_packet PARAMS ((FILE *));
5176 static void schedule_stop PARAMS ((FILE *));
5177 static rtx gen_nop_type PARAMS ((enum attr_type));
5178 static void ia64_emit_nops PARAMS ((void));
5180 /* Map a bundle number to its pseudo-op. */
5182 const char *
5183 get_bundle_name (b)
5184 int b;
5186 return bundle[b].name;
5189 /* Compute the slot which will cause a split issue in packet P if the
5190 current cycle begins at slot BEGIN. */
5192 static int
5193 itanium_split_issue (p, begin)
5194 const struct ia64_packet *p;
5195 int begin;
5197 int type_count[TYPE_S];
5198 int i;
5199 int split = 6;
5201 if (begin < 3)
5203 /* Always split before and after MMF. */
5204 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
5205 return 3;
5206 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
5207 return 3;
5208 /* Always split after MBB and BBB. */
5209 if (p->t[1] == TYPE_B)
5210 return 3;
5211 /* Split after first bundle in MIB BBB combination. */
5212 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
5213 return 3;
5216 memset (type_count, 0, sizeof type_count);
5217 for (i = begin; i < split; i++)
5219 enum attr_type t0 = p->t[i];
5220 /* An MLX bundle reserves the same units as an MFI bundle. */
5221 enum attr_type t = (t0 == TYPE_L ? TYPE_F
5222 : t0 == TYPE_X ? TYPE_I
5223 : t0);
5225 /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and
5226 2 integer per cycle. */
5227 int max = (t == TYPE_B ? 3 : 2);
5228 if (type_count[t] == max)
5229 return i;
5231 type_count[t]++;
5233 return split;
5236 /* Return the maximum number of instructions a cpu can issue. */
5238 static int
5239 ia64_issue_rate ()
5241 return 6;
5244 /* Helper function - like single_set, but look inside COND_EXEC. */
5246 static rtx
5247 ia64_single_set (insn)
5248 rtx insn;
5250 rtx x = PATTERN (insn), ret;
5251 if (GET_CODE (x) == COND_EXEC)
5252 x = COND_EXEC_CODE (x);
5253 if (GET_CODE (x) == SET)
5254 return x;
5256 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5257 Although they are not classical single set, the second set is there just
5258 to protect it from moving past FP-relative stack accesses. */
5259 switch (recog_memoized (insn))
5261 case CODE_FOR_prologue_allocate_stack:
5262 case CODE_FOR_epilogue_deallocate_stack:
5263 ret = XVECEXP (x, 0, 0);
5264 break;
5266 default:
5267 ret = single_set_2 (insn, x);
5268 break;
5271 return ret;
5274 /* Adjust the cost of a scheduling dependency. Return the new cost of
5275 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5277 static int
5278 ia64_adjust_cost (insn, link, dep_insn, cost)
5279 rtx insn, link, dep_insn;
5280 int cost;
5282 enum attr_type dep_type;
5283 enum attr_itanium_class dep_class;
5284 enum attr_itanium_class insn_class;
5285 rtx dep_set, set, src, addr;
5287 if (GET_CODE (PATTERN (insn)) == CLOBBER
5288 || GET_CODE (PATTERN (insn)) == USE
5289 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5290 || GET_CODE (PATTERN (dep_insn)) == USE
5291 /* @@@ Not accurate for indirect calls. */
5292 || GET_CODE (insn) == CALL_INSN
5293 || ia64_safe_type (insn) == TYPE_S)
5294 return 0;
5296 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5297 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5298 return 0;
5300 dep_type = ia64_safe_type (dep_insn);
5301 dep_class = ia64_safe_itanium_class (dep_insn);
5302 insn_class = ia64_safe_itanium_class (insn);
5304 /* Compares that feed a conditional branch can execute in the same
5305 cycle. */
5306 dep_set = ia64_single_set (dep_insn);
5307 set = ia64_single_set (insn);
5309 if (dep_type != TYPE_F
5310 && dep_set
5311 && GET_CODE (SET_DEST (dep_set)) == REG
5312 && PR_REG (REGNO (SET_DEST (dep_set)))
5313 && GET_CODE (insn) == JUMP_INSN)
5314 return 0;
5316 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5318 /* ??? Can't find any information in the documenation about whether
5319 a sequence
5320 st [rx] = ra
5321 ld rb = [ry]
5322 splits issue. Assume it doesn't. */
5323 return 0;
5326 src = set ? SET_SRC (set) : 0;
5327 addr = 0;
5328 if (set)
5330 if (GET_CODE (SET_DEST (set)) == MEM)
5331 addr = XEXP (SET_DEST (set), 0);
5332 else if (GET_CODE (SET_DEST (set)) == SUBREG
5333 && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM)
5334 addr = XEXP (SUBREG_REG (SET_DEST (set)), 0);
5335 else
5337 addr = src;
5338 if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0)
5339 addr = XVECEXP (addr, 0, 0);
5340 while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND)
5341 addr = XEXP (addr, 0);
5342 if (GET_CODE (addr) == MEM)
5343 addr = XEXP (addr, 0);
5344 else
5345 addr = 0;
5349 if (addr && GET_CODE (addr) == POST_MODIFY)
5350 addr = XEXP (addr, 0);
5352 set = ia64_single_set (dep_insn);
5354 if ((dep_class == ITANIUM_CLASS_IALU
5355 || dep_class == ITANIUM_CLASS_ILOG
5356 || dep_class == ITANIUM_CLASS_LD)
5357 && (insn_class == ITANIUM_CLASS_LD
5358 || insn_class == ITANIUM_CLASS_ST))
5360 if (! addr || ! set)
5361 abort ();
5362 /* This isn't completely correct - an IALU that feeds an address has
5363 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5364 otherwise. Unfortunately there's no good way to describe this. */
5365 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5366 return cost + 1;
5369 if ((dep_class == ITANIUM_CLASS_IALU
5370 || dep_class == ITANIUM_CLASS_ILOG
5371 || dep_class == ITANIUM_CLASS_LD)
5372 && (insn_class == ITANIUM_CLASS_MMMUL
5373 || insn_class == ITANIUM_CLASS_MMSHF
5374 || insn_class == ITANIUM_CLASS_MMSHFI))
5375 return 3;
5377 if (dep_class == ITANIUM_CLASS_FMAC
5378 && (insn_class == ITANIUM_CLASS_FMISC
5379 || insn_class == ITANIUM_CLASS_FCVTFX
5380 || insn_class == ITANIUM_CLASS_XMPY))
5381 return 7;
5383 if ((dep_class == ITANIUM_CLASS_FMAC
5384 || dep_class == ITANIUM_CLASS_FMISC
5385 || dep_class == ITANIUM_CLASS_FCVTFX
5386 || dep_class == ITANIUM_CLASS_XMPY)
5387 && insn_class == ITANIUM_CLASS_STF)
5388 return 8;
5390 /* Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4,
5391 but HP engineers say any non-MM operation. */
5392 if ((dep_class == ITANIUM_CLASS_MMMUL
5393 || dep_class == ITANIUM_CLASS_MMSHF
5394 || dep_class == ITANIUM_CLASS_MMSHFI)
5395 && insn_class != ITANIUM_CLASS_MMMUL
5396 && insn_class != ITANIUM_CLASS_MMSHF
5397 && insn_class != ITANIUM_CLASS_MMSHFI)
5398 return 4;
5400 return cost;
5403 /* Describe the current state of the Itanium pipeline. */
5404 static struct
5406 /* The first slot that is used in the current cycle. */
5407 int first_slot;
5408 /* The next slot to fill. */
5409 int cur;
5410 /* The packet we have selected for the current issue window. */
5411 const struct ia64_packet *packet;
5412 /* The position of the split issue that occurs due to issue width
5413 limitations (6 if there's no split issue). */
5414 int split;
5415 /* Record data about the insns scheduled so far in the same issue
5416 window. The elements up to but not including FIRST_SLOT belong
5417 to the previous cycle, the ones starting with FIRST_SLOT belong
5418 to the current cycle. */
5419 enum attr_type types[6];
5420 rtx insns[6];
5421 int stopbit[6];
5422 /* Nonzero if we decided to schedule a stop bit. */
5423 int last_was_stop;
5424 } sched_data;
5426 /* Temporary arrays; they have enough elements to hold all insns that
5427 can be ready at the same time while scheduling of the current block.
5428 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5429 static rtx *sched_ready;
5430 static enum attr_type *sched_types;
5432 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5433 of packet P. */
5435 static int
5436 insn_matches_slot (p, itype, slot, insn)
5437 const struct ia64_packet *p;
5438 enum attr_type itype;
5439 int slot;
5440 rtx insn;
5442 enum attr_itanium_requires_unit0 u0;
5443 enum attr_type stype = p->t[slot];
5445 if (insn)
5447 u0 = ia64_safe_itanium_requires_unit0 (insn);
5448 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5450 int i;
5451 for (i = sched_data.first_slot; i < slot; i++)
5452 if (p->t[i] == stype
5453 || (stype == TYPE_F && p->t[i] == TYPE_L)
5454 || (stype == TYPE_I && p->t[i] == TYPE_X))
5455 return 0;
5457 if (GET_CODE (insn) == CALL_INSN)
5459 /* Reject calls in multiway branch packets. We want to limit
5460 the number of multiway branches we generate (since the branch
5461 predictor is limited), and this seems to work fairly well.
5462 (If we didn't do this, we'd have to add another test here to
5463 force calls into the third slot of the bundle.) */
5464 if (slot < 3)
5466 if (p->t[1] == TYPE_B)
5467 return 0;
5469 else
5471 if (p->t[4] == TYPE_B)
5472 return 0;
5477 if (itype == stype)
5478 return 1;
5479 if (itype == TYPE_A)
5480 return stype == TYPE_M || stype == TYPE_I;
5481 return 0;
5484 /* Like emit_insn_before, but skip cycle_display notes.
5485 ??? When cycle display notes are implemented, update this. */
5487 static void
5488 ia64_emit_insn_before (insn, before)
5489 rtx insn, before;
5491 emit_insn_before (insn, before);
5494 /* When rotating a bundle out of the issue window, insert a bundle selector
5495 insn in front of it. DUMP is the scheduling dump file or NULL. START
5496 is either 0 or 3, depending on whether we want to emit a bundle selector
5497 for the first bundle or the second bundle in the current issue window.
5499 The selector insns are emitted this late because the selected packet can
5500 be changed until parts of it get rotated out. */
5502 static void
5503 finish_last_head (dump, start)
5504 FILE *dump;
5505 int start;
5507 const struct ia64_packet *p = sched_data.packet;
5508 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5509 int bundle_type = b - bundle;
5510 rtx insn;
5511 int i;
5513 if (! ia64_final_schedule)
5514 return;
5516 for (i = start; sched_data.insns[i] == 0; i++)
5517 if (i == start + 3)
5518 abort ();
5519 insn = sched_data.insns[i];
5521 if (dump)
5522 fprintf (dump, "// Emitting template before %d: %s\n",
5523 INSN_UID (insn), b->name);
5525 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5528 /* We can't schedule more insns this cycle. Fix up the scheduling state
5529 and advance FIRST_SLOT and CUR.
5530 We have to distribute the insns that are currently found between
5531 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5532 far, they are stored successively in the fields starting at FIRST_SLOT;
5533 now they must be moved to the correct slots.
5534 DUMP is the current scheduling dump file, or NULL. */
5536 static void
5537 cycle_end_fill_slots (dump)
5538 FILE *dump;
5540 const struct ia64_packet *packet = sched_data.packet;
5541 int slot, i;
5542 enum attr_type tmp_types[6];
5543 rtx tmp_insns[6];
5545 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5546 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5548 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5550 enum attr_type t = tmp_types[i];
5551 if (t != ia64_safe_type (tmp_insns[i]))
5552 abort ();
5553 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5555 if (slot > sched_data.split)
5556 abort ();
5557 if (dump)
5558 fprintf (dump, "// Packet needs %s, have %s\n",
5559 type_names[packet->t[slot]], type_names[t]);
5560 sched_data.types[slot] = packet->t[slot];
5561 sched_data.insns[slot] = 0;
5562 sched_data.stopbit[slot] = 0;
5564 /* ??? TYPE_L instructions always fill up two slots, but we don't
5565 support TYPE_L nops. */
5566 if (packet->t[slot] == TYPE_L)
5567 abort ();
5569 slot++;
5572 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5573 actual slot type later. */
5574 sched_data.types[slot] = packet->t[slot];
5575 sched_data.insns[slot] = tmp_insns[i];
5576 sched_data.stopbit[slot] = 0;
5577 slot++;
5579 /* TYPE_L instructions always fill up two slots. */
5580 if (t == TYPE_L)
5582 sched_data.types[slot] = packet->t[slot];
5583 sched_data.insns[slot] = 0;
5584 sched_data.stopbit[slot] = 0;
5585 slot++;
5589 /* This isn't right - there's no need to pad out until the forced split;
5590 the CPU will automatically split if an insn isn't ready. */
5591 #if 0
5592 while (slot < sched_data.split)
5594 sched_data.types[slot] = packet->t[slot];
5595 sched_data.insns[slot] = 0;
5596 sched_data.stopbit[slot] = 0;
5597 slot++;
5599 #endif
5601 sched_data.first_slot = sched_data.cur = slot;
5604 /* Bundle rotations, as described in the Itanium optimization manual.
5605 We can rotate either one or both bundles out of the issue window.
5606 DUMP is the current scheduling dump file, or NULL. */
5608 static void
5609 rotate_one_bundle (dump)
5610 FILE *dump;
5612 if (dump)
5613 fprintf (dump, "// Rotating one bundle.\n");
5615 finish_last_head (dump, 0);
5616 if (sched_data.cur > 3)
5618 sched_data.cur -= 3;
5619 sched_data.first_slot -= 3;
5620 memmove (sched_data.types,
5621 sched_data.types + 3,
5622 sched_data.cur * sizeof *sched_data.types);
5623 memmove (sched_data.stopbit,
5624 sched_data.stopbit + 3,
5625 sched_data.cur * sizeof *sched_data.stopbit);
5626 memmove (sched_data.insns,
5627 sched_data.insns + 3,
5628 sched_data.cur * sizeof *sched_data.insns);
5629 sched_data.packet
5630 = &packets[(sched_data.packet->t2 - bundle) * NR_BUNDLES];
5632 else
5634 sched_data.cur = 0;
5635 sched_data.first_slot = 0;
5639 static void
5640 rotate_two_bundles (dump)
5641 FILE *dump;
5643 if (dump)
5644 fprintf (dump, "// Rotating two bundles.\n");
5646 if (sched_data.cur == 0)
5647 return;
5649 finish_last_head (dump, 0);
5650 if (sched_data.cur > 3)
5651 finish_last_head (dump, 3);
5652 sched_data.cur = 0;
5653 sched_data.first_slot = 0;
5656 /* We're beginning a new block. Initialize data structures as necessary. */
5658 static void
5659 ia64_sched_init (dump, sched_verbose, max_ready)
5660 FILE *dump ATTRIBUTE_UNUSED;
5661 int sched_verbose ATTRIBUTE_UNUSED;
5662 int max_ready;
5664 static int initialized = 0;
5666 if (! initialized)
5668 int b1, b2, i;
5670 initialized = 1;
5672 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
5674 const struct bundle *t1 = bundle + b1;
5675 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
5677 const struct bundle *t2 = bundle + b2;
5679 packets[i].t1 = t1;
5680 packets[i].t2 = t2;
5683 for (i = 0; i < NR_PACKETS; i++)
5685 int j;
5686 for (j = 0; j < 3; j++)
5687 packets[i].t[j] = packets[i].t1->t[j];
5688 for (j = 0; j < 3; j++)
5689 packets[i].t[j + 3] = packets[i].t2->t[j];
5690 packets[i].first_split = itanium_split_issue (packets + i, 0);
5695 init_insn_group_barriers ();
5697 memset (&sched_data, 0, sizeof sched_data);
5698 sched_types = (enum attr_type *) xmalloc (max_ready
5699 * sizeof (enum attr_type));
5700 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
5703 /* See if the packet P can match the insns we have already scheduled. Return
5704 nonzero if so. In *PSLOT, we store the first slot that is available for
5705 more instructions if we choose this packet.
5706 SPLIT holds the last slot we can use, there's a split issue after it so
5707 scheduling beyond it would cause us to use more than one cycle. */
5709 static int
5710 packet_matches_p (p, split, pslot)
5711 const struct ia64_packet *p;
5712 int split;
5713 int *pslot;
5715 int filled = sched_data.cur;
5716 int first = sched_data.first_slot;
5717 int i, slot;
5719 /* First, check if the first of the two bundles must be a specific one (due
5720 to stop bits). */
5721 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
5722 return 0;
5723 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
5724 return 0;
5726 for (i = 0; i < first; i++)
5727 if (! insn_matches_slot (p, sched_data.types[i], i,
5728 sched_data.insns[i]))
5729 return 0;
5730 for (i = slot = first; i < filled; i++)
5732 while (slot < split)
5734 if (insn_matches_slot (p, sched_data.types[i], slot,
5735 sched_data.insns[i]))
5736 break;
5737 slot++;
5739 if (slot == split)
5740 return 0;
5741 slot++;
5744 if (pslot)
5745 *pslot = slot;
5746 return 1;
5749 /* A frontend for itanium_split_issue. For a packet P and a slot
5750 number FIRST that describes the start of the current clock cycle,
5751 return the slot number of the first split issue. This function
5752 uses the cached number found in P if possible. */
5754 static int
5755 get_split (p, first)
5756 const struct ia64_packet *p;
5757 int first;
5759 if (first == 0)
5760 return p->first_split;
5761 return itanium_split_issue (p, first);
5764 /* Given N_READY insns in the array READY, whose types are found in the
5765 corresponding array TYPES, return the insn that is best suited to be
5766 scheduled in slot SLOT of packet P. */
5768 static int
5769 find_best_insn (ready, types, n_ready, p, slot)
5770 rtx *ready;
5771 enum attr_type *types;
5772 int n_ready;
5773 const struct ia64_packet *p;
5774 int slot;
5776 int best = -1;
5777 int best_pri = 0;
5778 while (n_ready-- > 0)
5780 rtx insn = ready[n_ready];
5781 if (! insn)
5782 continue;
5783 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
5784 break;
5785 /* If we have equally good insns, one of which has a stricter
5786 slot requirement, prefer the one with the stricter requirement. */
5787 if (best >= 0 && types[n_ready] == TYPE_A)
5788 continue;
5789 if (insn_matches_slot (p, types[n_ready], slot, insn))
5791 best = n_ready;
5792 best_pri = INSN_PRIORITY (ready[best]);
5794 /* If there's no way we could get a stricter requirement, stop
5795 looking now. */
5796 if (types[n_ready] != TYPE_A
5797 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
5798 break;
5799 break;
5802 return best;
5805 /* Select the best packet to use given the current scheduler state and the
5806 current ready list.
5807 READY is an array holding N_READY ready insns; TYPES is a corresponding
5808 array that holds their types. Store the best packet in *PPACKET and the
5809 number of insns that can be scheduled in the current cycle in *PBEST. */
5811 static void
5812 find_best_packet (pbest, ppacket, ready, types, n_ready)
5813 int *pbest;
5814 const struct ia64_packet **ppacket;
5815 rtx *ready;
5816 enum attr_type *types;
5817 int n_ready;
5819 int first = sched_data.first_slot;
5820 int best = 0;
5821 int lowest_end = 6;
5822 const struct ia64_packet *best_packet = NULL;
5823 int i;
5825 for (i = 0; i < NR_PACKETS; i++)
5827 const struct ia64_packet *p = packets + i;
5828 int slot;
5829 int split = get_split (p, first);
5830 int win = 0;
5831 int first_slot, last_slot;
5832 int b_nops = 0;
5834 if (! packet_matches_p (p, split, &first_slot))
5835 continue;
5837 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
5839 win = 0;
5840 last_slot = 6;
5841 for (slot = first_slot; slot < split; slot++)
5843 int insn_nr;
5845 /* Disallow a degenerate case where the first bundle doesn't
5846 contain anything but NOPs! */
5847 if (first_slot == 0 && win == 0 && slot == 3)
5849 win = -1;
5850 break;
5853 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
5854 if (insn_nr >= 0)
5856 sched_ready[insn_nr] = 0;
5857 last_slot = slot;
5858 win++;
5860 else if (p->t[slot] == TYPE_B)
5861 b_nops++;
5863 /* We must disallow MBB/BBB packets if any of their B slots would be
5864 filled with nops. */
5865 if (last_slot < 3)
5867 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
5868 win = -1;
5870 else
5872 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
5873 win = -1;
5876 if (win > best
5877 || (win == best && last_slot < lowest_end))
5879 best = win;
5880 lowest_end = last_slot;
5881 best_packet = p;
5884 *pbest = best;
5885 *ppacket = best_packet;
5888 /* Reorder the ready list so that the insns that can be issued in this cycle
5889 are found in the correct order at the end of the list.
5890 DUMP is the scheduling dump file, or NULL. READY points to the start,
5891 E_READY to the end of the ready list. MAY_FAIL determines what should be
5892 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5893 otherwise we return 0.
5894 Return 1 if any insns can be scheduled in this cycle. */
5896 static int
5897 itanium_reorder (dump, ready, e_ready, may_fail)
5898 FILE *dump;
5899 rtx *ready;
5900 rtx *e_ready;
5901 int may_fail;
5903 const struct ia64_packet *best_packet;
5904 int n_ready = e_ready - ready;
5905 int first = sched_data.first_slot;
5906 int i, best, best_split, filled;
5908 for (i = 0; i < n_ready; i++)
5909 sched_types[i] = ia64_safe_type (ready[i]);
5911 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
5913 if (best == 0)
5915 if (may_fail)
5916 return 0;
5917 abort ();
5920 if (dump)
5922 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
5923 best_packet->t1->name,
5924 best_packet->t2 ? best_packet->t2->name : NULL, best);
5927 best_split = itanium_split_issue (best_packet, first);
5928 packet_matches_p (best_packet, best_split, &filled);
5930 for (i = filled; i < best_split; i++)
5932 int insn_nr;
5934 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
5935 if (insn_nr >= 0)
5937 rtx insn = ready[insn_nr];
5938 memmove (ready + insn_nr, ready + insn_nr + 1,
5939 (n_ready - insn_nr - 1) * sizeof (rtx));
5940 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
5941 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
5942 ready[--n_ready] = insn;
5946 sched_data.packet = best_packet;
5947 sched_data.split = best_split;
5948 return 1;
5951 /* Dump information about the current scheduling state to file DUMP. */
5953 static void
5954 dump_current_packet (dump)
5955 FILE *dump;
5957 int i;
5958 fprintf (dump, "// %d slots filled:", sched_data.cur);
5959 for (i = 0; i < sched_data.first_slot; i++)
5961 rtx insn = sched_data.insns[i];
5962 fprintf (dump, " %s", type_names[sched_data.types[i]]);
5963 if (insn)
5964 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
5965 if (sched_data.stopbit[i])
5966 fprintf (dump, " ;;");
5968 fprintf (dump, " :::");
5969 for (i = sched_data.first_slot; i < sched_data.cur; i++)
5971 rtx insn = sched_data.insns[i];
5972 enum attr_type t = ia64_safe_type (insn);
5973 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
5975 fprintf (dump, "\n");
5978 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
5979 NULL. */
5981 static void
5982 schedule_stop (dump)
5983 FILE *dump;
5985 const struct ia64_packet *best = sched_data.packet;
5986 int i;
5987 int best_stop = 6;
5989 if (dump)
5990 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
5992 if (sched_data.cur == 0)
5994 if (dump)
5995 fprintf (dump, "// At start of bundle, so nothing to do.\n");
5997 rotate_two_bundles (NULL);
5998 return;
6001 for (i = -1; i < NR_PACKETS; i++)
6003 /* This is a slight hack to give the current packet the first chance.
6004 This is done to avoid e.g. switching from MIB to MBB bundles. */
6005 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
6006 int split = get_split (p, sched_data.first_slot);
6007 const struct bundle *compare;
6008 int next, stoppos;
6010 if (! packet_matches_p (p, split, &next))
6011 continue;
6013 compare = next > 3 ? p->t2 : p->t1;
6015 stoppos = 3;
6016 if (compare->possible_stop)
6017 stoppos = compare->possible_stop;
6018 if (next > 3)
6019 stoppos += 3;
6021 if (stoppos < next || stoppos >= best_stop)
6023 if (compare->possible_stop == 0)
6024 continue;
6025 stoppos = (next > 3 ? 6 : 3);
6027 if (stoppos < next || stoppos >= best_stop)
6028 continue;
6030 if (dump)
6031 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
6032 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
6033 stoppos);
6035 best_stop = stoppos;
6036 best = p;
6039 sched_data.packet = best;
6040 cycle_end_fill_slots (dump);
6041 while (sched_data.cur < best_stop)
6043 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
6044 sched_data.insns[sched_data.cur] = 0;
6045 sched_data.stopbit[sched_data.cur] = 0;
6046 sched_data.cur++;
6048 sched_data.stopbit[sched_data.cur - 1] = 1;
6049 sched_data.first_slot = best_stop;
6051 if (dump)
6052 dump_current_packet (dump);
6055 /* If necessary, perform one or two rotations on the scheduling state.
6056 This should only be called if we are starting a new cycle. */
6058 static void
6059 maybe_rotate (dump)
6060 FILE *dump;
6062 cycle_end_fill_slots (dump);
6063 if (sched_data.cur == 6)
6064 rotate_two_bundles (dump);
6065 else if (sched_data.cur >= 3)
6066 rotate_one_bundle (dump);
6067 sched_data.first_slot = sched_data.cur;
6070 /* The clock cycle when ia64_sched_reorder was last called. */
6071 static int prev_cycle;
6073 /* The first insn scheduled in the previous cycle. This is the saved
6074 value of sched_data.first_slot. */
6075 static int prev_first;
6077 /* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
6078 pad out the delay between MM (shifts, etc.) and integer operations. */
6080 static void
6081 nop_cycles_until (clock_var, dump)
6082 int clock_var;
6083 FILE *dump;
6085 int prev_clock = prev_cycle;
6086 int cycles_left = clock_var - prev_clock;
6087 bool did_stop = false;
6089 /* Finish the previous cycle; pad it out with NOPs. */
6090 if (sched_data.cur == 3)
6092 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6093 did_stop = true;
6094 maybe_rotate (dump);
6096 else if (sched_data.cur > 0)
6098 int need_stop = 0;
6099 int split = itanium_split_issue (sched_data.packet, prev_first);
6101 if (sched_data.cur < 3 && split > 3)
6103 split = 3;
6104 need_stop = 1;
6107 if (split > sched_data.cur)
6109 int i;
6110 for (i = sched_data.cur; i < split; i++)
6112 rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
6113 sched_data.types[i] = sched_data.packet->t[i];
6114 sched_data.insns[i] = t;
6115 sched_data.stopbit[i] = 0;
6117 sched_data.cur = split;
6120 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
6121 && cycles_left > 1)
6123 int i;
6124 for (i = sched_data.cur; i < 6; i++)
6126 rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
6127 sched_data.types[i] = sched_data.packet->t[i];
6128 sched_data.insns[i] = t;
6129 sched_data.stopbit[i] = 0;
6131 sched_data.cur = 6;
6132 cycles_left--;
6133 need_stop = 1;
6136 if (need_stop || sched_data.cur == 6)
6138 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6139 did_stop = true;
6141 maybe_rotate (dump);
6144 cycles_left--;
6145 while (cycles_left > 0)
6147 sched_emit_insn (gen_bundle_selector (GEN_INT (0)));
6148 sched_emit_insn (gen_nop_type (TYPE_M));
6149 sched_emit_insn (gen_nop_type (TYPE_I));
6150 if (cycles_left > 1)
6152 sched_emit_insn (gen_insn_group_barrier (GEN_INT (2)));
6153 cycles_left--;
6155 sched_emit_insn (gen_nop_type (TYPE_I));
6156 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6157 did_stop = true;
6158 cycles_left--;
6161 if (did_stop)
6162 init_insn_group_barriers ();
6165 /* We are about to being issuing insns for this clock cycle.
6166 Override the default sort algorithm to better slot instructions. */
6168 static int
6169 ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready,
6170 reorder_type, clock_var)
6171 FILE *dump ATTRIBUTE_UNUSED;
6172 int sched_verbose ATTRIBUTE_UNUSED;
6173 rtx *ready;
6174 int *pn_ready;
6175 int reorder_type, clock_var;
6177 int n_asms;
6178 int n_ready = *pn_ready;
6179 rtx *e_ready = ready + n_ready;
6180 rtx *insnp;
6182 if (sched_verbose)
6184 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
6185 dump_current_packet (dump);
6188 /* Work around the pipeline flush that will occurr if the results of
6189 an MM instruction are accessed before the result is ready. Intel
6190 documentation says this only happens with IALU, ISHF, ILOG, LD,
6191 and ST consumers, but experimental evidence shows that *any* non-MM
6192 type instruction will incurr the flush. */
6193 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
6195 for (insnp = ready; insnp < e_ready; insnp++)
6197 rtx insn = *insnp, link;
6198 enum attr_itanium_class t = ia64_safe_itanium_class (insn);
6200 if (t == ITANIUM_CLASS_MMMUL
6201 || t == ITANIUM_CLASS_MMSHF
6202 || t == ITANIUM_CLASS_MMSHFI)
6203 continue;
6205 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6206 if (REG_NOTE_KIND (link) == 0)
6208 rtx other = XEXP (link, 0);
6209 enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
6210 if (t0 == ITANIUM_CLASS_MMSHF || t0 == ITANIUM_CLASS_MMMUL)
6212 nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
6213 goto out;
6218 out:
6220 prev_first = sched_data.first_slot;
6221 prev_cycle = clock_var;
6223 if (reorder_type == 0)
6224 maybe_rotate (sched_verbose ? dump : NULL);
6226 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6227 n_asms = 0;
6228 for (insnp = ready; insnp < e_ready; insnp++)
6229 if (insnp < e_ready)
6231 rtx insn = *insnp;
6232 enum attr_type t = ia64_safe_type (insn);
6233 if (t == TYPE_UNKNOWN)
6235 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6236 || asm_noperands (PATTERN (insn)) >= 0)
6238 rtx lowest = ready[n_asms];
6239 ready[n_asms] = insn;
6240 *insnp = lowest;
6241 n_asms++;
6243 else
6245 rtx highest = ready[n_ready - 1];
6246 ready[n_ready - 1] = insn;
6247 *insnp = highest;
6248 if (ia64_final_schedule && group_barrier_needed_p (insn))
6250 schedule_stop (sched_verbose ? dump : NULL);
6251 sched_data.last_was_stop = 1;
6252 maybe_rotate (sched_verbose ? dump : NULL);
6255 return 1;
6259 if (n_asms < n_ready)
6261 /* Some normal insns to process. Skip the asms. */
6262 ready += n_asms;
6263 n_ready -= n_asms;
6265 else if (n_ready > 0)
6267 /* Only asm insns left. */
6268 if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1]))
6270 schedule_stop (sched_verbose ? dump : NULL);
6271 sched_data.last_was_stop = 1;
6272 maybe_rotate (sched_verbose ? dump : NULL);
6274 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6275 return 1;
6278 if (ia64_final_schedule)
6280 int nr_need_stop = 0;
6282 for (insnp = ready; insnp < e_ready; insnp++)
6283 if (safe_group_barrier_needed_p (*insnp))
6284 nr_need_stop++;
6286 /* Schedule a stop bit if
6287 - all insns require a stop bit, or
6288 - we are starting a new cycle and _any_ insns require a stop bit.
6289 The reason for the latter is that if our schedule is accurate, then
6290 the additional stop won't decrease performance at this point (since
6291 there's a split issue at this point anyway), but it gives us more
6292 freedom when scheduling the currently ready insns. */
6293 if ((reorder_type == 0 && nr_need_stop)
6294 || (reorder_type == 1 && n_ready == nr_need_stop))
6296 schedule_stop (sched_verbose ? dump : NULL);
6297 sched_data.last_was_stop = 1;
6298 maybe_rotate (sched_verbose ? dump : NULL);
6299 if (reorder_type == 1)
6300 return 0;
6302 else
6304 int deleted = 0;
6305 insnp = e_ready;
6306 /* Move down everything that needs a stop bit, preserving relative
6307 order. */
6308 while (insnp-- > ready + deleted)
6309 while (insnp >= ready + deleted)
6311 rtx insn = *insnp;
6312 if (! safe_group_barrier_needed_p (insn))
6313 break;
6314 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6315 *ready = insn;
6316 deleted++;
6318 n_ready -= deleted;
6319 ready += deleted;
6320 if (deleted != nr_need_stop)
6321 abort ();
6325 return itanium_reorder (sched_verbose ? dump : NULL,
6326 ready, e_ready, reorder_type == 1);
6329 static int
6330 ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
6331 FILE *dump;
6332 int sched_verbose;
6333 rtx *ready;
6334 int *pn_ready;
6335 int clock_var;
6337 return ia64_internal_sched_reorder (dump, sched_verbose, ready,
6338 pn_ready, 0, clock_var);
6341 /* Like ia64_sched_reorder, but called after issuing each insn.
6342 Override the default sort algorithm to better slot instructions. */
6344 static int
6345 ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6346 FILE *dump ATTRIBUTE_UNUSED;
6347 int sched_verbose ATTRIBUTE_UNUSED;
6348 rtx *ready;
6349 int *pn_ready;
6350 int clock_var;
6352 if (sched_data.last_was_stop)
6353 return 0;
6355 /* Detect one special case and try to optimize it.
6356 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6357 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6358 if (sched_data.first_slot == 1
6359 && sched_data.stopbit[0]
6360 && ((sched_data.cur == 4
6361 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6362 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6363 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6364 || (sched_data.cur == 3
6365 && (sched_data.types[1] == TYPE_M
6366 || sched_data.types[1] == TYPE_A)
6367 && (sched_data.types[2] != TYPE_M
6368 && sched_data.types[2] != TYPE_I
6369 && sched_data.types[2] != TYPE_A))))
6372 int i, best;
6373 rtx stop = sched_data.insns[1];
6375 /* Search backward for the stop bit that must be there. */
6376 while (1)
6378 int insn_code;
6380 stop = PREV_INSN (stop);
6381 if (GET_CODE (stop) != INSN)
6382 abort ();
6383 insn_code = recog_memoized (stop);
6385 /* Ignore .pred.rel.mutex.
6387 ??? Update this to ignore cycle display notes too
6388 ??? once those are implemented */
6389 if (insn_code == CODE_FOR_pred_rel_mutex
6390 || insn_code == CODE_FOR_prologue_use)
6391 continue;
6393 if (insn_code == CODE_FOR_insn_group_barrier)
6394 break;
6395 abort ();
6398 /* Adjust the stop bit's slot selector. */
6399 if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1)
6400 abort ();
6401 XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3);
6403 sched_data.stopbit[0] = 0;
6404 sched_data.stopbit[2] = 1;
6406 sched_data.types[5] = sched_data.types[3];
6407 sched_data.types[4] = sched_data.types[2];
6408 sched_data.types[3] = sched_data.types[1];
6409 sched_data.insns[5] = sched_data.insns[3];
6410 sched_data.insns[4] = sched_data.insns[2];
6411 sched_data.insns[3] = sched_data.insns[1];
6412 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6413 sched_data.cur += 2;
6414 sched_data.first_slot = 3;
6415 for (i = 0; i < NR_PACKETS; i++)
6417 const struct ia64_packet *p = packets + i;
6418 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6420 sched_data.packet = p;
6421 break;
6424 rotate_one_bundle (sched_verbose ? dump : NULL);
6426 best = 6;
6427 for (i = 0; i < NR_PACKETS; i++)
6429 const struct ia64_packet *p = packets + i;
6430 int split = get_split (p, sched_data.first_slot);
6431 int next;
6433 /* Disallow multiway branches here. */
6434 if (p->t[1] == TYPE_B)
6435 continue;
6437 if (packet_matches_p (p, split, &next) && next < best)
6439 best = next;
6440 sched_data.packet = p;
6441 sched_data.split = split;
6444 if (best == 6)
6445 abort ();
6448 if (*pn_ready > 0)
6450 int more = ia64_internal_sched_reorder (dump, sched_verbose,
6451 ready, pn_ready, 1,
6452 clock_var);
6453 if (more)
6454 return more;
6455 /* Did we schedule a stop? If so, finish this cycle. */
6456 if (sched_data.cur == sched_data.first_slot)
6457 return 0;
6460 if (sched_verbose)
6461 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
6463 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6464 if (sched_verbose)
6465 dump_current_packet (dump);
6466 return 0;
6469 /* We are about to issue INSN. Return the number of insns left on the
6470 ready queue that can be issued this cycle. */
6472 static int
6473 ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6474 FILE *dump;
6475 int sched_verbose;
6476 rtx insn;
6477 int can_issue_more ATTRIBUTE_UNUSED;
6479 enum attr_type t = ia64_safe_type (insn);
6481 if (sched_data.last_was_stop)
6483 int t = sched_data.first_slot;
6484 if (t == 0)
6485 t = 3;
6486 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6487 init_insn_group_barriers ();
6488 sched_data.last_was_stop = 0;
6491 if (t == TYPE_UNKNOWN)
6493 if (sched_verbose)
6494 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
6495 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6496 || asm_noperands (PATTERN (insn)) >= 0)
6498 /* This must be some kind of asm. Clear the scheduling state. */
6499 rotate_two_bundles (sched_verbose ? dump : NULL);
6500 if (ia64_final_schedule)
6501 group_barrier_needed_p (insn);
6503 return 1;
6506 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6507 important state info. Don't delete this test. */
6508 if (ia64_final_schedule
6509 && group_barrier_needed_p (insn))
6510 abort ();
6512 sched_data.stopbit[sched_data.cur] = 0;
6513 sched_data.insns[sched_data.cur] = insn;
6514 sched_data.types[sched_data.cur] = t;
6516 sched_data.cur++;
6517 if (sched_verbose)
6518 fprintf (dump, "// Scheduling insn %d of type %s\n",
6519 INSN_UID (insn), type_names[t]);
6521 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6523 schedule_stop (sched_verbose ? dump : NULL);
6524 sched_data.last_was_stop = 1;
6527 return 1;
6530 /* Free data allocated by ia64_sched_init. */
6532 static void
6533 ia64_sched_finish (dump, sched_verbose)
6534 FILE *dump;
6535 int sched_verbose;
6537 if (sched_verbose)
6538 fprintf (dump, "// Finishing schedule.\n");
6539 rotate_two_bundles (NULL);
6540 free (sched_types);
6541 free (sched_ready);
6544 /* Emit pseudo-ops for the assembler to describe predicate relations.
6545 At present this assumes that we only consider predicate pairs to
6546 be mutex, and that the assembler can deduce proper values from
6547 straight-line code. */
6549 static void
6550 emit_predicate_relation_info ()
6552 int i;
6554 for (i = n_basic_blocks - 1; i >= 0; --i)
6556 basic_block bb = BASIC_BLOCK (i);
6557 int r;
6558 rtx head = bb->head;
6560 /* We only need such notes at code labels. */
6561 if (GET_CODE (head) != CODE_LABEL)
6562 continue;
6563 if (GET_CODE (NEXT_INSN (head)) == NOTE
6564 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6565 head = NEXT_INSN (head);
6567 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6568 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6570 rtx p = gen_rtx_REG (BImode, r);
6571 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6572 if (head == bb->end)
6573 bb->end = n;
6574 head = n;
6578 /* Look for conditional calls that do not return, and protect predicate
6579 relations around them. Otherwise the assembler will assume the call
6580 returns, and complain about uses of call-clobbered predicates after
6581 the call. */
6582 for (i = n_basic_blocks - 1; i >= 0; --i)
6584 basic_block bb = BASIC_BLOCK (i);
6585 rtx insn = bb->head;
6587 while (1)
6589 if (GET_CODE (insn) == CALL_INSN
6590 && GET_CODE (PATTERN (insn)) == COND_EXEC
6591 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6593 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6594 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6595 if (bb->head == insn)
6596 bb->head = b;
6597 if (bb->end == insn)
6598 bb->end = a;
6601 if (insn == bb->end)
6602 break;
6603 insn = NEXT_INSN (insn);
6608 /* Generate a NOP instruction of type T. We will never generate L type
6609 nops. */
6611 static rtx
6612 gen_nop_type (t)
6613 enum attr_type t;
6615 switch (t)
6617 case TYPE_M:
6618 return gen_nop_m ();
6619 case TYPE_I:
6620 return gen_nop_i ();
6621 case TYPE_B:
6622 return gen_nop_b ();
6623 case TYPE_F:
6624 return gen_nop_f ();
6625 case TYPE_X:
6626 return gen_nop_x ();
6627 default:
6628 abort ();
6632 /* After the last scheduling pass, fill in NOPs. It's easier to do this
6633 here than while scheduling. */
6635 static void
6636 ia64_emit_nops ()
6638 rtx insn;
6639 const struct bundle *b = 0;
6640 int bundle_pos = 0;
6642 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6644 rtx pat;
6645 enum attr_type t;
6646 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
6647 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
6648 continue;
6649 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 22)
6650 || GET_CODE (insn) == CODE_LABEL)
6652 if (b)
6653 while (bundle_pos < 3)
6655 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6656 bundle_pos++;
6658 if (GET_CODE (insn) != CODE_LABEL)
6659 b = bundle + INTVAL (XVECEXP (pat, 0, 0));
6660 else
6661 b = 0;
6662 bundle_pos = 0;
6663 continue;
6665 else if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 2)
6667 int t = INTVAL (XVECEXP (pat, 0, 0));
6668 if (b)
6669 while (bundle_pos < t)
6671 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6672 bundle_pos++;
6674 continue;
6677 if (bundle_pos == 3)
6678 b = 0;
6680 if (b && INSN_P (insn))
6682 t = ia64_safe_type (insn);
6683 if (asm_noperands (PATTERN (insn)) >= 0
6684 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
6686 while (bundle_pos < 3)
6688 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6689 bundle_pos++;
6691 continue;
6694 if (t == TYPE_UNKNOWN)
6695 continue;
6696 while (bundle_pos < 3)
6698 if (t == b->t[bundle_pos]
6699 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
6700 || b->t[bundle_pos] == TYPE_I)))
6701 break;
6703 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6704 bundle_pos++;
6706 if (bundle_pos < 3)
6707 bundle_pos++;
6712 /* Perform machine dependent operations on the rtl chain INSNS. */
6714 void
6715 ia64_reorg (insns)
6716 rtx insns;
6718 /* If optimizing, we'll have split before scheduling. */
6719 if (optimize == 0)
6720 split_all_insns_noflow ();
6722 /* We are freeing block_for_insn in the toplev to keep compatibility
6723 with old MDEP_REORGS that are not CFG based. Recompute it now. */
6724 compute_bb_for_insn (get_max_uid ());
6725 /* update_life_info_in_dirty_blocks should be enought here. */
6726 life_analysis (insns, NULL, PROP_DEATH_NOTES);
6728 if (ia64_flag_schedule_insns2)
6730 timevar_push (TV_SCHED2);
6731 ia64_final_schedule = 1;
6732 schedule_ebbs (rtl_dump_file);
6733 ia64_final_schedule = 0;
6734 timevar_pop (TV_SCHED2);
6736 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6737 place as they were during scheduling. */
6738 emit_insn_group_barriers (rtl_dump_file, insns);
6739 ia64_emit_nops ();
6741 else
6742 emit_all_insn_group_barriers (rtl_dump_file, insns);
6744 /* A call must not be the last instruction in a function, so that the
6745 return address is still within the function, so that unwinding works
6746 properly. Note that IA-64 differs from dwarf2 on this point. */
6747 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
6749 rtx insn;
6750 int saw_stop = 0;
6752 insn = get_last_insn ();
6753 if (! INSN_P (insn))
6754 insn = prev_active_insn (insn);
6755 if (GET_CODE (insn) == INSN
6756 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6757 && XINT (PATTERN (insn), 1) == 2)
6759 saw_stop = 1;
6760 insn = prev_active_insn (insn);
6762 if (GET_CODE (insn) == CALL_INSN)
6764 if (! saw_stop)
6765 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6766 emit_insn (gen_break_f ());
6767 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6771 fixup_errata ();
6772 emit_predicate_relation_info ();
6775 /* Return true if REGNO is used by the epilogue. */
6778 ia64_epilogue_uses (regno)
6779 int regno;
6781 switch (regno)
6783 case R_GR (1):
6784 /* When a function makes a call through a function descriptor, we
6785 will write a (potentially) new value to "gp". After returning
6786 from such a call, we need to make sure the function restores the
6787 original gp-value, even if the function itself does not use the
6788 gp anymore. */
6789 return (TARGET_CONST_GP && !(TARGET_AUTO_PIC || TARGET_NO_PIC));
6791 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
6792 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
6793 /* For functions defined with the syscall_linkage attribute, all
6794 input registers are marked as live at all function exits. This
6795 prevents the register allocator from using the input registers,
6796 which in turn makes it possible to restart a system call after
6797 an interrupt without having to save/restore the input registers.
6798 This also prevents kernel data from leaking to application code. */
6799 return lookup_attribute ("syscall_linkage",
6800 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
6802 case R_BR (0):
6803 /* Conditional return patterns can't represent the use of `b0' as
6804 the return address, so we force the value live this way. */
6805 return 1;
6807 case AR_PFS_REGNUM:
6808 /* Likewise for ar.pfs, which is used by br.ret. */
6809 return 1;
6811 default:
6812 return 0;
6816 /* Return true if REGNO is used by the frame unwinder. */
6819 ia64_eh_uses (regno)
6820 int regno;
6822 if (! reload_completed)
6823 return 0;
6825 if (current_frame_info.reg_save_b0
6826 && regno == current_frame_info.reg_save_b0)
6827 return 1;
6828 if (current_frame_info.reg_save_pr
6829 && regno == current_frame_info.reg_save_pr)
6830 return 1;
6831 if (current_frame_info.reg_save_ar_pfs
6832 && regno == current_frame_info.reg_save_ar_pfs)
6833 return 1;
6834 if (current_frame_info.reg_save_ar_unat
6835 && regno == current_frame_info.reg_save_ar_unat)
6836 return 1;
6837 if (current_frame_info.reg_save_ar_lc
6838 && regno == current_frame_info.reg_save_ar_lc)
6839 return 1;
6841 return 0;
6844 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6846 We add @ to the name if this goes in small data/bss. We can only put
6847 a variable in small data/bss if it is defined in this module or a module
6848 that we are statically linked with. We can't check the second condition,
6849 but TREE_STATIC gives us the first one. */
6851 /* ??? If we had IPA, we could check the second condition. We could support
6852 programmer added section attributes if the variable is not defined in this
6853 module. */
6855 /* ??? See the v850 port for a cleaner way to do this. */
6857 /* ??? We could also support own long data here. Generating movl/add/ld8
6858 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6859 code faster because there is one less load. This also includes incomplete
6860 types which can't go in sdata/sbss. */
6862 /* ??? See select_section. We must put short own readonly variables in
6863 sdata/sbss instead of the more natural rodata, because we can't perform
6864 the DECL_READONLY_SECTION test here. */
6866 extern struct obstack * saveable_obstack;
6868 void
6869 ia64_encode_section_info (decl, first)
6870 tree decl;
6871 int first ATTRIBUTE_UNUSED;
6873 const char *symbol_str;
6875 if (TREE_CODE (decl) == FUNCTION_DECL)
6877 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
6878 return;
6881 /* Careful not to prod global register variables. */
6882 if (TREE_CODE (decl) != VAR_DECL
6883 || GET_CODE (DECL_RTL (decl)) != MEM
6884 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
6885 return;
6887 symbol_str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
6889 /* We assume that -fpic is used only to create a shared library (dso).
6890 With -fpic, no global data can ever be sdata.
6891 Without -fpic, global common uninitialized data can never be sdata, since
6892 it can unify with a real definition in a dso. */
6893 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
6894 to access them. The linker may then be able to do linker relaxation to
6895 optimize references to them. Currently sdata implies use of gprel. */
6896 /* We need the DECL_EXTERNAL check for C++. static class data members get
6897 both TREE_STATIC and DECL_EXTERNAL set, to indicate that they are
6898 statically allocated, but the space is allocated somewhere else. Such
6899 decls can not be own data. */
6900 if (! TARGET_NO_SDATA
6901 && ((TREE_STATIC (decl) && ! DECL_EXTERNAL (decl)
6902 && ! (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
6903 && ! (TREE_PUBLIC (decl)
6904 && (flag_pic
6905 || (DECL_COMMON (decl)
6906 && (DECL_INITIAL (decl) == 0
6907 || DECL_INITIAL (decl) == error_mark_node)))))
6908 || MODULE_LOCAL_P (decl))
6909 /* Either the variable must be declared without a section attribute,
6910 or the section must be sdata or sbss. */
6911 && (DECL_SECTION_NAME (decl) == 0
6912 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6913 ".sdata")
6914 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6915 ".sbss")))
6917 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
6919 /* If the variable has already been defined in the output file, then it
6920 is too late to put it in sdata if it wasn't put there in the first
6921 place. The test is here rather than above, because if it is already
6922 in sdata, then it can stay there. */
6924 if (TREE_ASM_WRITTEN (decl))
6927 /* If this is an incomplete type with size 0, then we can't put it in
6928 sdata because it might be too big when completed.
6929 Objects bigger than threshold should have SDATA_NAME_FLAG_CHAR
6930 added if they are in .sdata or .sbss explicitely. */
6931 else if (((size > 0
6932 && size <= (HOST_WIDE_INT) ia64_section_threshold)
6933 || DECL_SECTION_NAME (decl))
6934 && symbol_str[0] != SDATA_NAME_FLAG_CHAR)
6936 size_t len = strlen (symbol_str);
6937 char *newstr = alloca (len + 1);
6938 const char *string;
6940 *newstr = SDATA_NAME_FLAG_CHAR;
6941 memcpy (newstr + 1, symbol_str, len + 1);
6943 string = ggc_alloc_string (newstr, len + 1);
6944 XSTR (XEXP (DECL_RTL (decl), 0), 0) = string;
6947 /* This decl is marked as being in small data/bss but it shouldn't
6948 be; one likely explanation for this is that the decl has been
6949 moved into a different section from the one it was in when
6950 ENCODE_SECTION_INFO was first called. Remove the '@'. */
6951 else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
6953 XSTR (XEXP (DECL_RTL (decl), 0), 0)
6954 = ggc_strdup (symbol_str + 1);
6958 /* Output assembly directives for prologue regions. */
6960 /* The current basic block number. */
6962 static int block_num;
6964 /* True if we need a copy_state command at the start of the next block. */
6966 static int need_copy_state;
6968 /* The function emits unwind directives for the start of an epilogue. */
6970 static void
6971 process_epilogue ()
6973 /* If this isn't the last block of the function, then we need to label the
6974 current state, and copy it back in at the start of the next block. */
6976 if (block_num != n_basic_blocks - 1)
6978 fprintf (asm_out_file, "\t.label_state 1\n");
6979 need_copy_state = 1;
6982 fprintf (asm_out_file, "\t.restore sp\n");
6985 /* This function processes a SET pattern looking for specific patterns
6986 which result in emitting an assembly directive required for unwinding. */
6988 static int
6989 process_set (asm_out_file, pat)
6990 FILE *asm_out_file;
6991 rtx pat;
6993 rtx src = SET_SRC (pat);
6994 rtx dest = SET_DEST (pat);
6995 int src_regno, dest_regno;
6997 /* Look for the ALLOC insn. */
6998 if (GET_CODE (src) == UNSPEC_VOLATILE
6999 && XINT (src, 1) == 0
7000 && GET_CODE (dest) == REG)
7002 dest_regno = REGNO (dest);
7004 /* If this isn't the final destination for ar.pfs, the alloc
7005 shouldn't have been marked frame related. */
7006 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7007 abort ();
7009 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7010 ia64_dbx_register_number (dest_regno));
7011 return 1;
7014 /* Look for SP = .... */
7015 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7017 if (GET_CODE (src) == PLUS)
7019 rtx op0 = XEXP (src, 0);
7020 rtx op1 = XEXP (src, 1);
7021 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7023 if (INTVAL (op1) < 0)
7025 fputs ("\t.fframe ", asm_out_file);
7026 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
7027 -INTVAL (op1));
7028 fputc ('\n', asm_out_file);
7030 else
7031 process_epilogue ();
7033 else
7034 abort ();
7036 else if (GET_CODE (src) == REG
7037 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7038 process_epilogue ();
7039 else
7040 abort ();
7042 return 1;
7045 /* Register move we need to look at. */
7046 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7048 src_regno = REGNO (src);
7049 dest_regno = REGNO (dest);
7051 switch (src_regno)
7053 case BR_REG (0):
7054 /* Saving return address pointer. */
7055 if (dest_regno != current_frame_info.reg_save_b0)
7056 abort ();
7057 fprintf (asm_out_file, "\t.save rp, r%d\n",
7058 ia64_dbx_register_number (dest_regno));
7059 return 1;
7061 case PR_REG (0):
7062 if (dest_regno != current_frame_info.reg_save_pr)
7063 abort ();
7064 fprintf (asm_out_file, "\t.save pr, r%d\n",
7065 ia64_dbx_register_number (dest_regno));
7066 return 1;
7068 case AR_UNAT_REGNUM:
7069 if (dest_regno != current_frame_info.reg_save_ar_unat)
7070 abort ();
7071 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7072 ia64_dbx_register_number (dest_regno));
7073 return 1;
7075 case AR_LC_REGNUM:
7076 if (dest_regno != current_frame_info.reg_save_ar_lc)
7077 abort ();
7078 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7079 ia64_dbx_register_number (dest_regno));
7080 return 1;
7082 case STACK_POINTER_REGNUM:
7083 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7084 || ! frame_pointer_needed)
7085 abort ();
7086 fprintf (asm_out_file, "\t.vframe r%d\n",
7087 ia64_dbx_register_number (dest_regno));
7088 return 1;
7090 default:
7091 /* Everything else should indicate being stored to memory. */
7092 abort ();
7096 /* Memory store we need to look at. */
7097 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7099 long off;
7100 rtx base;
7101 const char *saveop;
7103 if (GET_CODE (XEXP (dest, 0)) == REG)
7105 base = XEXP (dest, 0);
7106 off = 0;
7108 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7109 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7111 base = XEXP (XEXP (dest, 0), 0);
7112 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7114 else
7115 abort ();
7117 if (base == hard_frame_pointer_rtx)
7119 saveop = ".savepsp";
7120 off = - off;
7122 else if (base == stack_pointer_rtx)
7123 saveop = ".savesp";
7124 else
7125 abort ();
7127 src_regno = REGNO (src);
7128 switch (src_regno)
7130 case BR_REG (0):
7131 if (current_frame_info.reg_save_b0 != 0)
7132 abort ();
7133 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7134 return 1;
7136 case PR_REG (0):
7137 if (current_frame_info.reg_save_pr != 0)
7138 abort ();
7139 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7140 return 1;
7142 case AR_LC_REGNUM:
7143 if (current_frame_info.reg_save_ar_lc != 0)
7144 abort ();
7145 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7146 return 1;
7148 case AR_PFS_REGNUM:
7149 if (current_frame_info.reg_save_ar_pfs != 0)
7150 abort ();
7151 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7152 return 1;
7154 case AR_UNAT_REGNUM:
7155 if (current_frame_info.reg_save_ar_unat != 0)
7156 abort ();
7157 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7158 return 1;
7160 case GR_REG (4):
7161 case GR_REG (5):
7162 case GR_REG (6):
7163 case GR_REG (7):
7164 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7165 1 << (src_regno - GR_REG (4)));
7166 return 1;
7168 case BR_REG (1):
7169 case BR_REG (2):
7170 case BR_REG (3):
7171 case BR_REG (4):
7172 case BR_REG (5):
7173 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7174 1 << (src_regno - BR_REG (1)));
7175 return 1;
7177 case FR_REG (2):
7178 case FR_REG (3):
7179 case FR_REG (4):
7180 case FR_REG (5):
7181 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7182 1 << (src_regno - FR_REG (2)));
7183 return 1;
7185 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7186 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7187 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7188 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7189 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7190 1 << (src_regno - FR_REG (12)));
7191 return 1;
7193 default:
7194 return 0;
7198 return 0;
7202 /* This function looks at a single insn and emits any directives
7203 required to unwind this insn. */
7204 void
7205 process_for_unwind_directive (asm_out_file, insn)
7206 FILE *asm_out_file;
7207 rtx insn;
7209 if (flag_unwind_tables
7210 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7212 rtx pat;
7214 if (GET_CODE (insn) == NOTE
7215 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7217 block_num = NOTE_BASIC_BLOCK (insn)->index;
7219 /* Restore unwind state from immediately before the epilogue. */
7220 if (need_copy_state)
7222 fprintf (asm_out_file, "\t.body\n");
7223 fprintf (asm_out_file, "\t.copy_state 1\n");
7224 need_copy_state = 0;
7228 if (! RTX_FRAME_RELATED_P (insn))
7229 return;
7231 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7232 if (pat)
7233 pat = XEXP (pat, 0);
7234 else
7235 pat = PATTERN (insn);
7237 switch (GET_CODE (pat))
7239 case SET:
7240 process_set (asm_out_file, pat);
7241 break;
7243 case PARALLEL:
7245 int par_index;
7246 int limit = XVECLEN (pat, 0);
7247 for (par_index = 0; par_index < limit; par_index++)
7249 rtx x = XVECEXP (pat, 0, par_index);
7250 if (GET_CODE (x) == SET)
7251 process_set (asm_out_file, x);
7253 break;
7256 default:
7257 abort ();
7263 void
7264 ia64_init_builtins ()
7266 tree psi_type_node = build_pointer_type (integer_type_node);
7267 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7268 tree endlink = void_list_node;
7270 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7271 tree si_ftype_psi_si_si
7272 = build_function_type (integer_type_node,
7273 tree_cons (NULL_TREE, psi_type_node,
7274 tree_cons (NULL_TREE, integer_type_node,
7275 tree_cons (NULL_TREE,
7276 integer_type_node,
7277 endlink))));
7279 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
7280 tree di_ftype_pdi_di_di
7281 = build_function_type (long_integer_type_node,
7282 tree_cons (NULL_TREE, pdi_type_node,
7283 tree_cons (NULL_TREE,
7284 long_integer_type_node,
7285 tree_cons (NULL_TREE,
7286 long_integer_type_node,
7287 endlink))));
7288 /* __sync_synchronize */
7289 tree void_ftype_void
7290 = build_function_type (void_type_node, endlink);
7292 /* __sync_lock_test_and_set_si */
7293 tree si_ftype_psi_si
7294 = build_function_type (integer_type_node,
7295 tree_cons (NULL_TREE, psi_type_node,
7296 tree_cons (NULL_TREE, integer_type_node, endlink)));
7298 /* __sync_lock_test_and_set_di */
7299 tree di_ftype_pdi_di
7300 = build_function_type (long_integer_type_node,
7301 tree_cons (NULL_TREE, pdi_type_node,
7302 tree_cons (NULL_TREE, long_integer_type_node,
7303 endlink)));
7305 /* __sync_lock_release_si */
7306 tree void_ftype_psi
7307 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
7308 endlink));
7310 /* __sync_lock_release_di */
7311 tree void_ftype_pdi
7312 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
7313 endlink));
7315 #define def_builtin(name, type, code) \
7316 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
7318 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7319 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7320 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7321 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7322 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7323 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7324 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
7325 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7327 def_builtin ("__sync_synchronize", void_ftype_void,
7328 IA64_BUILTIN_SYNCHRONIZE);
7330 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7331 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7332 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7333 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7334 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7335 IA64_BUILTIN_LOCK_RELEASE_SI);
7336 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7337 IA64_BUILTIN_LOCK_RELEASE_DI);
7339 def_builtin ("__builtin_ia64_bsp",
7340 build_function_type (ptr_type_node, endlink),
7341 IA64_BUILTIN_BSP);
7343 def_builtin ("__builtin_ia64_flushrs",
7344 build_function_type (void_type_node, endlink),
7345 IA64_BUILTIN_FLUSHRS);
7347 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7348 IA64_BUILTIN_FETCH_AND_ADD_SI);
7349 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7350 IA64_BUILTIN_FETCH_AND_SUB_SI);
7351 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7352 IA64_BUILTIN_FETCH_AND_OR_SI);
7353 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7354 IA64_BUILTIN_FETCH_AND_AND_SI);
7355 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7356 IA64_BUILTIN_FETCH_AND_XOR_SI);
7357 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7358 IA64_BUILTIN_FETCH_AND_NAND_SI);
7360 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7361 IA64_BUILTIN_ADD_AND_FETCH_SI);
7362 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7363 IA64_BUILTIN_SUB_AND_FETCH_SI);
7364 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7365 IA64_BUILTIN_OR_AND_FETCH_SI);
7366 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7367 IA64_BUILTIN_AND_AND_FETCH_SI);
7368 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7369 IA64_BUILTIN_XOR_AND_FETCH_SI);
7370 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7371 IA64_BUILTIN_NAND_AND_FETCH_SI);
7373 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7374 IA64_BUILTIN_FETCH_AND_ADD_DI);
7375 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7376 IA64_BUILTIN_FETCH_AND_SUB_DI);
7377 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7378 IA64_BUILTIN_FETCH_AND_OR_DI);
7379 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7380 IA64_BUILTIN_FETCH_AND_AND_DI);
7381 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7382 IA64_BUILTIN_FETCH_AND_XOR_DI);
7383 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7384 IA64_BUILTIN_FETCH_AND_NAND_DI);
7386 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7387 IA64_BUILTIN_ADD_AND_FETCH_DI);
7388 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7389 IA64_BUILTIN_SUB_AND_FETCH_DI);
7390 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7391 IA64_BUILTIN_OR_AND_FETCH_DI);
7392 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7393 IA64_BUILTIN_AND_AND_FETCH_DI);
7394 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7395 IA64_BUILTIN_XOR_AND_FETCH_DI);
7396 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7397 IA64_BUILTIN_NAND_AND_FETCH_DI);
7399 #undef def_builtin
7402 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7405 tmp = [ptr];
7406 do {
7407 ret = tmp;
7408 ar.ccv = tmp;
7409 tmp <op>= value;
7410 cmpxchgsz.acq tmp = [ptr], tmp
7411 } while (tmp != ret)
7414 static rtx
7415 ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7416 optab binoptab;
7417 enum machine_mode mode;
7418 tree arglist;
7419 rtx target;
7421 rtx ret, label, tmp, ccv, insn, mem, value;
7422 tree arg0, arg1;
7424 arg0 = TREE_VALUE (arglist);
7425 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7426 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7427 value = expand_expr (arg1, NULL_RTX, mode, 0);
7429 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7430 MEM_VOLATILE_P (mem) = 1;
7432 if (target && register_operand (target, mode))
7433 ret = target;
7434 else
7435 ret = gen_reg_rtx (mode);
7437 emit_insn (gen_mf ());
7439 /* Special case for fetchadd instructions. */
7440 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7442 if (mode == SImode)
7443 insn = gen_fetchadd_acq_si (ret, mem, value);
7444 else
7445 insn = gen_fetchadd_acq_di (ret, mem, value);
7446 emit_insn (insn);
7447 return ret;
7450 tmp = gen_reg_rtx (mode);
7451 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7452 emit_move_insn (tmp, mem);
7454 label = gen_label_rtx ();
7455 emit_label (label);
7456 emit_move_insn (ret, tmp);
7457 emit_move_insn (ccv, tmp);
7459 /* Perform the specific operation. Special case NAND by noticing
7460 one_cmpl_optab instead. */
7461 if (binoptab == one_cmpl_optab)
7463 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7464 binoptab = and_optab;
7466 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7468 if (mode == SImode)
7469 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7470 else
7471 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7472 emit_insn (insn);
7474 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
7476 return ret;
7479 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7482 tmp = [ptr];
7483 do {
7484 old = tmp;
7485 ar.ccv = tmp;
7486 ret = tmp + value;
7487 cmpxchgsz.acq tmp = [ptr], ret
7488 } while (tmp != old)
7491 static rtx
7492 ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7493 optab binoptab;
7494 enum machine_mode mode;
7495 tree arglist;
7496 rtx target;
7498 rtx old, label, tmp, ret, ccv, insn, mem, value;
7499 tree arg0, arg1;
7501 arg0 = TREE_VALUE (arglist);
7502 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7503 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7504 value = expand_expr (arg1, NULL_RTX, mode, 0);
7506 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7507 MEM_VOLATILE_P (mem) = 1;
7509 if (target && ! register_operand (target, mode))
7510 target = NULL_RTX;
7512 emit_insn (gen_mf ());
7513 tmp = gen_reg_rtx (mode);
7514 old = gen_reg_rtx (mode);
7515 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7517 emit_move_insn (tmp, mem);
7519 label = gen_label_rtx ();
7520 emit_label (label);
7521 emit_move_insn (old, tmp);
7522 emit_move_insn (ccv, tmp);
7524 /* Perform the specific operation. Special case NAND by noticing
7525 one_cmpl_optab instead. */
7526 if (binoptab == one_cmpl_optab)
7528 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7529 binoptab = and_optab;
7531 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
7533 if (mode == SImode)
7534 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
7535 else
7536 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7537 emit_insn (insn);
7539 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
7541 return ret;
7544 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7546 ar.ccv = oldval
7548 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7549 return ret
7551 For bool_ it's the same except return ret == oldval.
7554 static rtx
7555 ia64_expand_compare_and_swap (mode, boolp, arglist, target)
7556 enum machine_mode mode;
7557 int boolp;
7558 tree arglist;
7559 rtx target;
7561 tree arg0, arg1, arg2;
7562 rtx mem, old, new, ccv, tmp, insn;
7564 arg0 = TREE_VALUE (arglist);
7565 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7566 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7567 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7568 old = expand_expr (arg1, NULL_RTX, mode, 0);
7569 new = expand_expr (arg2, NULL_RTX, mode, 0);
7571 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7572 MEM_VOLATILE_P (mem) = 1;
7574 if (! register_operand (old, mode))
7575 old = copy_to_mode_reg (mode, old);
7576 if (! register_operand (new, mode))
7577 new = copy_to_mode_reg (mode, new);
7579 if (! boolp && target && register_operand (target, mode))
7580 tmp = target;
7581 else
7582 tmp = gen_reg_rtx (mode);
7584 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7585 emit_move_insn (ccv, old);
7586 emit_insn (gen_mf ());
7587 if (mode == SImode)
7588 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7589 else
7590 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7591 emit_insn (insn);
7593 if (boolp)
7595 if (! target)
7596 target = gen_reg_rtx (mode);
7597 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
7599 else
7600 return tmp;
7603 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7605 static rtx
7606 ia64_expand_lock_test_and_set (mode, arglist, target)
7607 enum machine_mode mode;
7608 tree arglist;
7609 rtx target;
7611 tree arg0, arg1;
7612 rtx mem, new, ret, insn;
7614 arg0 = TREE_VALUE (arglist);
7615 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7616 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7617 new = expand_expr (arg1, NULL_RTX, mode, 0);
7619 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7620 MEM_VOLATILE_P (mem) = 1;
7621 if (! register_operand (new, mode))
7622 new = copy_to_mode_reg (mode, new);
7624 if (target && register_operand (target, mode))
7625 ret = target;
7626 else
7627 ret = gen_reg_rtx (mode);
7629 if (mode == SImode)
7630 insn = gen_xchgsi (ret, mem, new);
7631 else
7632 insn = gen_xchgdi (ret, mem, new);
7633 emit_insn (insn);
7635 return ret;
7638 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7640 static rtx
7641 ia64_expand_lock_release (mode, arglist, target)
7642 enum machine_mode mode;
7643 tree arglist;
7644 rtx target ATTRIBUTE_UNUSED;
7646 tree arg0;
7647 rtx mem;
7649 arg0 = TREE_VALUE (arglist);
7650 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7652 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7653 MEM_VOLATILE_P (mem) = 1;
7655 emit_move_insn (mem, const0_rtx);
7657 return const0_rtx;
7661 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
7662 tree exp;
7663 rtx target;
7664 rtx subtarget ATTRIBUTE_UNUSED;
7665 enum machine_mode mode ATTRIBUTE_UNUSED;
7666 int ignore ATTRIBUTE_UNUSED;
7668 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7669 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
7670 tree arglist = TREE_OPERAND (exp, 1);
7672 switch (fcode)
7674 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7675 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7676 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7677 case IA64_BUILTIN_LOCK_RELEASE_SI:
7678 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7679 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7680 case IA64_BUILTIN_FETCH_AND_OR_SI:
7681 case IA64_BUILTIN_FETCH_AND_AND_SI:
7682 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7683 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7684 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7685 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7686 case IA64_BUILTIN_OR_AND_FETCH_SI:
7687 case IA64_BUILTIN_AND_AND_FETCH_SI:
7688 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7689 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7690 mode = SImode;
7691 break;
7693 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7694 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7695 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7696 case IA64_BUILTIN_LOCK_RELEASE_DI:
7697 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7698 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7699 case IA64_BUILTIN_FETCH_AND_OR_DI:
7700 case IA64_BUILTIN_FETCH_AND_AND_DI:
7701 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7702 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7703 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7704 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7705 case IA64_BUILTIN_OR_AND_FETCH_DI:
7706 case IA64_BUILTIN_AND_AND_FETCH_DI:
7707 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7708 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7709 mode = DImode;
7710 break;
7712 default:
7713 break;
7716 switch (fcode)
7718 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7719 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7720 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
7722 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7723 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7724 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
7726 case IA64_BUILTIN_SYNCHRONIZE:
7727 emit_insn (gen_mf ());
7728 return const0_rtx;
7730 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7731 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7732 return ia64_expand_lock_test_and_set (mode, arglist, target);
7734 case IA64_BUILTIN_LOCK_RELEASE_SI:
7735 case IA64_BUILTIN_LOCK_RELEASE_DI:
7736 return ia64_expand_lock_release (mode, arglist, target);
7738 case IA64_BUILTIN_BSP:
7739 if (! target || ! register_operand (target, DImode))
7740 target = gen_reg_rtx (DImode);
7741 emit_insn (gen_bsp_value (target));
7742 return target;
7744 case IA64_BUILTIN_FLUSHRS:
7745 emit_insn (gen_flushrs ());
7746 return const0_rtx;
7748 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7749 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7750 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
7752 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7753 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7754 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
7756 case IA64_BUILTIN_FETCH_AND_OR_SI:
7757 case IA64_BUILTIN_FETCH_AND_OR_DI:
7758 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
7760 case IA64_BUILTIN_FETCH_AND_AND_SI:
7761 case IA64_BUILTIN_FETCH_AND_AND_DI:
7762 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
7764 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7765 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7766 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
7768 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7769 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7770 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
7772 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7773 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7774 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
7776 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7777 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7778 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
7780 case IA64_BUILTIN_OR_AND_FETCH_SI:
7781 case IA64_BUILTIN_OR_AND_FETCH_DI:
7782 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
7784 case IA64_BUILTIN_AND_AND_FETCH_SI:
7785 case IA64_BUILTIN_AND_AND_FETCH_DI:
7786 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
7788 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7789 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7790 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
7792 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7793 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7794 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
7796 default:
7797 break;
7800 return NULL_RTX;
7803 /* For the HP-UX IA64 aggregate parameters are passed stored in the
7804 most significant bits of the stack slot. */
7806 enum direction
7807 ia64_hpux_function_arg_padding (mode, type)
7808 enum machine_mode mode;
7809 tree type;
7811 /* Exception to normal case for structures/unions/etc. */
7813 if (type && AGGREGATE_TYPE_P (type)
7814 && int_size_in_bytes (type) < UNITS_PER_WORD)
7815 return upward;
7817 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
7818 hardwired to be true. */
7820 return((mode == BLKmode
7821 ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
7822 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
7823 : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
7824 ? downward : upward);