* system.h (ENCODE_SECTION_INFO): Poison it.
[official-gcc.git] / gcc / config / ia64 / ia64.c
blobf62f92a27350ea5c779a25b8056b04f92da2f978
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "recog.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "except.h"
39 #include "function.h"
40 #include "ggc.h"
41 #include "basic-block.h"
42 #include "toplev.h"
43 #include "sched-int.h"
44 #include "timevar.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "tm_p.h"
49 /* This is used for communication between ASM_OUTPUT_LABEL and
50 ASM_OUTPUT_LABELREF. */
51 int ia64_asm_output_label = 0;
53 /* Define the information needed to generate branch and scc insns. This is
54 stored from the compare operation. */
55 struct rtx_def * ia64_compare_op0;
56 struct rtx_def * ia64_compare_op1;
58 /* Register names for ia64_expand_prologue. */
59 static const char * const ia64_reg_numbers[96] =
60 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
61 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
62 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
63 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
64 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
65 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
66 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
67 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
68 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
69 "r104","r105","r106","r107","r108","r109","r110","r111",
70 "r112","r113","r114","r115","r116","r117","r118","r119",
71 "r120","r121","r122","r123","r124","r125","r126","r127"};
73 /* ??? These strings could be shared with REGISTER_NAMES. */
74 static const char * const ia64_input_reg_names[8] =
75 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
77 /* ??? These strings could be shared with REGISTER_NAMES. */
78 static const char * const ia64_local_reg_names[80] =
79 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
80 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
81 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
82 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
83 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
84 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
85 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
86 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
87 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
88 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
90 /* ??? These strings could be shared with REGISTER_NAMES. */
91 static const char * const ia64_output_reg_names[8] =
92 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
94 /* String used with the -mfixed-range= option. */
95 const char *ia64_fixed_range_string;
97 /* Determines whether we run our final scheduling pass or not. We always
98 avoid the normal second scheduling pass. */
99 static int ia64_flag_schedule_insns2;
101 /* Variables which are this size or smaller are put in the sdata/sbss
102 sections. */
104 unsigned int ia64_section_threshold;
106 static int find_gr_spill PARAMS ((int));
107 static int next_scratch_gr_reg PARAMS ((void));
108 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
109 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
110 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
111 static void finish_spill_pointers PARAMS ((void));
112 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
113 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
114 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
115 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
116 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
117 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
119 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
120 static void fix_range PARAMS ((const char *));
121 static void ia64_add_gc_roots PARAMS ((void));
122 static void ia64_init_machine_status PARAMS ((struct function *));
123 static void ia64_mark_machine_status PARAMS ((struct function *));
124 static void ia64_free_machine_status PARAMS ((struct function *));
125 static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
126 static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
127 static void emit_predicate_relation_info PARAMS ((void));
128 static bool ia64_in_small_data_p PARAMS ((tree));
129 static void ia64_encode_section_info PARAMS ((tree, int));
130 static void process_epilogue PARAMS ((void));
131 static int process_set PARAMS ((FILE *, rtx));
133 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
134 tree, rtx));
135 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
136 tree, rtx));
137 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
138 tree, rtx));
139 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
140 tree, rtx));
141 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
142 static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int));
143 static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
144 static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
145 static void ia64_output_function_end_prologue PARAMS ((FILE *));
147 static int ia64_issue_rate PARAMS ((void));
148 static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
149 static void ia64_sched_init PARAMS ((FILE *, int, int));
150 static void ia64_sched_finish PARAMS ((FILE *, int));
151 static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *,
152 int *, int, int));
153 static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
154 static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
155 static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
157 static void ia64_select_rtx_section PARAMS ((enum machine_mode, rtx,
158 unsigned HOST_WIDE_INT));
159 static void ia64_aix_select_section PARAMS ((tree, int,
160 unsigned HOST_WIDE_INT))
161 ATTRIBUTE_UNUSED;
162 static void ia64_aix_unique_section PARAMS ((tree, int))
163 ATTRIBUTE_UNUSED;
164 static void ia64_aix_select_rtx_section PARAMS ((enum machine_mode, rtx,
165 unsigned HOST_WIDE_INT))
166 ATTRIBUTE_UNUSED;
168 /* Table of valid machine attributes. */
169 static const struct attribute_spec ia64_attribute_table[] =
171 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
172 { "syscall_linkage", 0, 0, false, true, true, NULL },
173 { NULL, 0, 0, false, false, false, NULL }
176 /* Initialize the GCC target structure. */
177 #undef TARGET_ATTRIBUTE_TABLE
178 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
180 #undef TARGET_INIT_BUILTINS
181 #define TARGET_INIT_BUILTINS ia64_init_builtins
183 #undef TARGET_EXPAND_BUILTIN
184 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
186 #undef TARGET_ASM_BYTE_OP
187 #define TARGET_ASM_BYTE_OP "\tdata1\t"
188 #undef TARGET_ASM_ALIGNED_HI_OP
189 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
190 #undef TARGET_ASM_ALIGNED_SI_OP
191 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
192 #undef TARGET_ASM_ALIGNED_DI_OP
193 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
194 #undef TARGET_ASM_UNALIGNED_HI_OP
195 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
196 #undef TARGET_ASM_UNALIGNED_SI_OP
197 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
198 #undef TARGET_ASM_UNALIGNED_DI_OP
199 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
200 #undef TARGET_ASM_INTEGER
201 #define TARGET_ASM_INTEGER ia64_assemble_integer
203 #undef TARGET_ASM_FUNCTION_PROLOGUE
204 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
205 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
206 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
207 #undef TARGET_ASM_FUNCTION_EPILOGUE
208 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
210 #undef TARGET_IN_SMALL_DATA_P
211 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
212 #undef TARGET_ENCODE_SECTION_INFO
213 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
215 #undef TARGET_SCHED_ADJUST_COST
216 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
217 #undef TARGET_SCHED_ISSUE_RATE
218 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
219 #undef TARGET_SCHED_VARIABLE_ISSUE
220 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
221 #undef TARGET_SCHED_INIT
222 #define TARGET_SCHED_INIT ia64_sched_init
223 #undef TARGET_SCHED_FINISH
224 #define TARGET_SCHED_FINISH ia64_sched_finish
225 #undef TARGET_SCHED_REORDER
226 #define TARGET_SCHED_REORDER ia64_sched_reorder
227 #undef TARGET_SCHED_REORDER2
228 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
230 struct gcc_target targetm = TARGET_INITIALIZER;
232 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
235 call_operand (op, mode)
236 rtx op;
237 enum machine_mode mode;
239 if (mode != GET_MODE (op))
240 return 0;
242 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
243 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
246 /* Return 1 if OP refers to a symbol in the sdata section. */
249 sdata_symbolic_operand (op, mode)
250 rtx op;
251 enum machine_mode mode ATTRIBUTE_UNUSED;
253 switch (GET_CODE (op))
255 case CONST:
256 if (GET_CODE (XEXP (op, 0)) != PLUS
257 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
258 break;
259 op = XEXP (XEXP (op, 0), 0);
260 /* FALLTHRU */
262 case SYMBOL_REF:
263 if (CONSTANT_POOL_ADDRESS_P (op))
264 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
265 else
266 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
268 default:
269 break;
272 return 0;
275 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
278 got_symbolic_operand (op, mode)
279 rtx op;
280 enum machine_mode mode ATTRIBUTE_UNUSED;
282 switch (GET_CODE (op))
284 case CONST:
285 op = XEXP (op, 0);
286 if (GET_CODE (op) != PLUS)
287 return 0;
288 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
289 return 0;
290 op = XEXP (op, 1);
291 if (GET_CODE (op) != CONST_INT)
292 return 0;
294 return 1;
296 /* Ok if we're not using GOT entries at all. */
297 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
298 return 1;
300 /* "Ok" while emitting rtl, since otherwise we won't be provided
301 with the entire offset during emission, which makes it very
302 hard to split the offset into high and low parts. */
303 if (rtx_equal_function_value_matters)
304 return 1;
306 /* Force the low 14 bits of the constant to zero so that we do not
307 use up so many GOT entries. */
308 return (INTVAL (op) & 0x3fff) == 0;
310 case SYMBOL_REF:
311 case LABEL_REF:
312 return 1;
314 default:
315 break;
317 return 0;
320 /* Return 1 if OP refers to a symbol. */
323 symbolic_operand (op, mode)
324 rtx op;
325 enum machine_mode mode ATTRIBUTE_UNUSED;
327 switch (GET_CODE (op))
329 case CONST:
330 case SYMBOL_REF:
331 case LABEL_REF:
332 return 1;
334 default:
335 break;
337 return 0;
340 /* Return 1 if OP refers to a function. */
343 function_operand (op, mode)
344 rtx op;
345 enum machine_mode mode ATTRIBUTE_UNUSED;
347 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
348 return 1;
349 else
350 return 0;
353 /* Return 1 if OP is setjmp or a similar function. */
355 /* ??? This is an unsatisfying solution. Should rethink. */
358 setjmp_operand (op, mode)
359 rtx op;
360 enum machine_mode mode ATTRIBUTE_UNUSED;
362 const char *name;
363 int retval = 0;
365 if (GET_CODE (op) != SYMBOL_REF)
366 return 0;
368 name = XSTR (op, 0);
370 /* The following code is borrowed from special_function_p in calls.c. */
372 /* Disregard prefix _, __ or __x. */
373 if (name[0] == '_')
375 if (name[1] == '_' && name[2] == 'x')
376 name += 3;
377 else if (name[1] == '_')
378 name += 2;
379 else
380 name += 1;
383 if (name[0] == 's')
385 retval
386 = ((name[1] == 'e'
387 && (! strcmp (name, "setjmp")
388 || ! strcmp (name, "setjmp_syscall")))
389 || (name[1] == 'i'
390 && ! strcmp (name, "sigsetjmp"))
391 || (name[1] == 'a'
392 && ! strcmp (name, "savectx")));
394 else if ((name[0] == 'q' && name[1] == 's'
395 && ! strcmp (name, "qsetjmp"))
396 || (name[0] == 'v' && name[1] == 'f'
397 && ! strcmp (name, "vfork")))
398 retval = 1;
400 return retval;
403 /* Return 1 if OP is a general operand, but when pic exclude symbolic
404 operands. */
406 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
407 from PREDICATE_CODES. */
410 move_operand (op, mode)
411 rtx op;
412 enum machine_mode mode;
414 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
415 return 0;
417 return general_operand (op, mode);
420 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
423 gr_register_operand (op, mode)
424 rtx op;
425 enum machine_mode mode;
427 if (! register_operand (op, mode))
428 return 0;
429 if (GET_CODE (op) == SUBREG)
430 op = SUBREG_REG (op);
431 if (GET_CODE (op) == REG)
433 unsigned int regno = REGNO (op);
434 if (regno < FIRST_PSEUDO_REGISTER)
435 return GENERAL_REGNO_P (regno);
437 return 1;
440 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
443 fr_register_operand (op, mode)
444 rtx op;
445 enum machine_mode mode;
447 if (! register_operand (op, mode))
448 return 0;
449 if (GET_CODE (op) == SUBREG)
450 op = SUBREG_REG (op);
451 if (GET_CODE (op) == REG)
453 unsigned int regno = REGNO (op);
454 if (regno < FIRST_PSEUDO_REGISTER)
455 return FR_REGNO_P (regno);
457 return 1;
460 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
463 grfr_register_operand (op, mode)
464 rtx op;
465 enum machine_mode mode;
467 if (! register_operand (op, mode))
468 return 0;
469 if (GET_CODE (op) == SUBREG)
470 op = SUBREG_REG (op);
471 if (GET_CODE (op) == REG)
473 unsigned int regno = REGNO (op);
474 if (regno < FIRST_PSEUDO_REGISTER)
475 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
477 return 1;
480 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
483 gr_nonimmediate_operand (op, mode)
484 rtx op;
485 enum machine_mode mode;
487 if (! nonimmediate_operand (op, mode))
488 return 0;
489 if (GET_CODE (op) == SUBREG)
490 op = SUBREG_REG (op);
491 if (GET_CODE (op) == REG)
493 unsigned int regno = REGNO (op);
494 if (regno < FIRST_PSEUDO_REGISTER)
495 return GENERAL_REGNO_P (regno);
497 return 1;
500 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
503 fr_nonimmediate_operand (op, mode)
504 rtx op;
505 enum machine_mode mode;
507 if (! nonimmediate_operand (op, mode))
508 return 0;
509 if (GET_CODE (op) == SUBREG)
510 op = SUBREG_REG (op);
511 if (GET_CODE (op) == REG)
513 unsigned int regno = REGNO (op);
514 if (regno < FIRST_PSEUDO_REGISTER)
515 return FR_REGNO_P (regno);
517 return 1;
520 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
523 grfr_nonimmediate_operand (op, mode)
524 rtx op;
525 enum machine_mode mode;
527 if (! nonimmediate_operand (op, mode))
528 return 0;
529 if (GET_CODE (op) == SUBREG)
530 op = SUBREG_REG (op);
531 if (GET_CODE (op) == REG)
533 unsigned int regno = REGNO (op);
534 if (regno < FIRST_PSEUDO_REGISTER)
535 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
537 return 1;
540 /* Return 1 if OP is a GR register operand, or zero. */
543 gr_reg_or_0_operand (op, mode)
544 rtx op;
545 enum machine_mode mode;
547 return (op == const0_rtx || gr_register_operand (op, mode));
550 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
553 gr_reg_or_5bit_operand (op, mode)
554 rtx op;
555 enum machine_mode mode;
557 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
558 || GET_CODE (op) == CONSTANT_P_RTX
559 || gr_register_operand (op, mode));
562 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
565 gr_reg_or_6bit_operand (op, mode)
566 rtx op;
567 enum machine_mode mode;
569 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
570 || GET_CODE (op) == CONSTANT_P_RTX
571 || gr_register_operand (op, mode));
574 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
577 gr_reg_or_8bit_operand (op, mode)
578 rtx op;
579 enum machine_mode mode;
581 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
582 || GET_CODE (op) == CONSTANT_P_RTX
583 || gr_register_operand (op, mode));
586 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
589 grfr_reg_or_8bit_operand (op, mode)
590 rtx op;
591 enum machine_mode mode;
593 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
594 || GET_CODE (op) == CONSTANT_P_RTX
595 || grfr_register_operand (op, mode));
598 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
599 operand. */
602 gr_reg_or_8bit_adjusted_operand (op, mode)
603 rtx op;
604 enum machine_mode mode;
606 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
607 || GET_CODE (op) == CONSTANT_P_RTX
608 || gr_register_operand (op, mode));
611 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
612 immediate and an 8 bit adjusted immediate operand. This is necessary
613 because when we emit a compare, we don't know what the condition will be,
614 so we need the union of the immediates accepted by GT and LT. */
617 gr_reg_or_8bit_and_adjusted_operand (op, mode)
618 rtx op;
619 enum machine_mode mode;
621 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
622 && CONST_OK_FOR_L (INTVAL (op)))
623 || GET_CODE (op) == CONSTANT_P_RTX
624 || gr_register_operand (op, mode));
627 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
630 gr_reg_or_14bit_operand (op, mode)
631 rtx op;
632 enum machine_mode mode;
634 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
635 || GET_CODE (op) == CONSTANT_P_RTX
636 || gr_register_operand (op, mode));
639 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
642 gr_reg_or_22bit_operand (op, mode)
643 rtx op;
644 enum machine_mode mode;
646 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
647 || GET_CODE (op) == CONSTANT_P_RTX
648 || gr_register_operand (op, mode));
651 /* Return 1 if OP is a 6 bit immediate operand. */
654 shift_count_operand (op, mode)
655 rtx op;
656 enum machine_mode mode ATTRIBUTE_UNUSED;
658 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
659 || GET_CODE (op) == CONSTANT_P_RTX);
662 /* Return 1 if OP is a 5 bit immediate operand. */
665 shift_32bit_count_operand (op, mode)
666 rtx op;
667 enum machine_mode mode ATTRIBUTE_UNUSED;
669 return ((GET_CODE (op) == CONST_INT
670 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
671 || GET_CODE (op) == CONSTANT_P_RTX);
674 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
677 shladd_operand (op, mode)
678 rtx op;
679 enum machine_mode mode ATTRIBUTE_UNUSED;
681 return (GET_CODE (op) == CONST_INT
682 && (INTVAL (op) == 2 || INTVAL (op) == 4
683 || INTVAL (op) == 8 || INTVAL (op) == 16));
686 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
689 fetchadd_operand (op, mode)
690 rtx op;
691 enum machine_mode mode ATTRIBUTE_UNUSED;
693 return (GET_CODE (op) == CONST_INT
694 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
695 INTVAL (op) == -4 || INTVAL (op) == -1 ||
696 INTVAL (op) == 1 || INTVAL (op) == 4 ||
697 INTVAL (op) == 8 || INTVAL (op) == 16));
700 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
703 fr_reg_or_fp01_operand (op, mode)
704 rtx op;
705 enum machine_mode mode;
707 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
708 || fr_register_operand (op, mode));
711 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
712 POST_MODIFY with a REG as displacement. */
715 destination_operand (op, mode)
716 rtx op;
717 enum machine_mode mode;
719 if (! nonimmediate_operand (op, mode))
720 return 0;
721 if (GET_CODE (op) == MEM
722 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
723 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
724 return 0;
725 return 1;
728 /* Like memory_operand, but don't allow post-increments. */
731 not_postinc_memory_operand (op, mode)
732 rtx op;
733 enum machine_mode mode;
735 return (memory_operand (op, mode)
736 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
739 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
740 signed immediate operand. */
743 normal_comparison_operator (op, mode)
744 register rtx op;
745 enum machine_mode mode;
747 enum rtx_code code = GET_CODE (op);
748 return ((mode == VOIDmode || GET_MODE (op) == mode)
749 && (code == EQ || code == NE
750 || code == GT || code == LE || code == GTU || code == LEU));
753 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
754 signed immediate operand. */
757 adjusted_comparison_operator (op, mode)
758 register rtx op;
759 enum machine_mode mode;
761 enum rtx_code code = GET_CODE (op);
762 return ((mode == VOIDmode || GET_MODE (op) == mode)
763 && (code == LT || code == GE || code == LTU || code == GEU));
766 /* Return 1 if this is a signed inequality operator. */
769 signed_inequality_operator (op, mode)
770 register rtx op;
771 enum machine_mode mode;
773 enum rtx_code code = GET_CODE (op);
774 return ((mode == VOIDmode || GET_MODE (op) == mode)
775 && (code == GE || code == GT
776 || code == LE || code == LT));
779 /* Return 1 if this operator is valid for predication. */
782 predicate_operator (op, mode)
783 register rtx op;
784 enum machine_mode mode;
786 enum rtx_code code = GET_CODE (op);
787 return ((GET_MODE (op) == mode || mode == VOIDmode)
788 && (code == EQ || code == NE));
791 /* Return 1 if this operator can be used in a conditional operation. */
794 condop_operator (op, mode)
795 register rtx op;
796 enum machine_mode mode;
798 enum rtx_code code = GET_CODE (op);
799 return ((GET_MODE (op) == mode || mode == VOIDmode)
800 && (code == PLUS || code == MINUS || code == AND
801 || code == IOR || code == XOR));
804 /* Return 1 if this is the ar.lc register. */
807 ar_lc_reg_operand (op, mode)
808 register rtx op;
809 enum machine_mode mode;
811 return (GET_MODE (op) == DImode
812 && (mode == DImode || mode == VOIDmode)
813 && GET_CODE (op) == REG
814 && REGNO (op) == AR_LC_REGNUM);
817 /* Return 1 if this is the ar.ccv register. */
820 ar_ccv_reg_operand (op, mode)
821 register rtx op;
822 enum machine_mode mode;
824 return ((GET_MODE (op) == mode || mode == VOIDmode)
825 && GET_CODE (op) == REG
826 && REGNO (op) == AR_CCV_REGNUM);
829 /* Return 1 if this is the ar.pfs register. */
832 ar_pfs_reg_operand (op, mode)
833 register rtx op;
834 enum machine_mode mode;
836 return ((GET_MODE (op) == mode || mode == VOIDmode)
837 && GET_CODE (op) == REG
838 && REGNO (op) == AR_PFS_REGNUM);
841 /* Like general_operand, but don't allow (mem (addressof)). */
844 general_tfmode_operand (op, mode)
845 rtx op;
846 enum machine_mode mode;
848 if (! general_operand (op, mode))
849 return 0;
850 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
851 return 0;
852 return 1;
855 /* Similarly. */
858 destination_tfmode_operand (op, mode)
859 rtx op;
860 enum machine_mode mode;
862 if (! destination_operand (op, mode))
863 return 0;
864 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
865 return 0;
866 return 1;
869 /* Similarly. */
872 tfreg_or_fp01_operand (op, mode)
873 rtx op;
874 enum machine_mode mode;
876 if (GET_CODE (op) == SUBREG)
877 return 0;
878 return fr_reg_or_fp01_operand (op, mode);
881 /* Return 1 if OP is valid as a base register in a reg + offset address. */
884 basereg_operand (op, mode)
885 rtx op;
886 enum machine_mode mode;
888 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
889 checks from pa.c basereg_operand as well? Seems to be OK without them
890 in test runs. */
892 return (register_operand (op, mode) &&
893 REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
896 /* Return 1 if the operands of a move are ok. */
899 ia64_move_ok (dst, src)
900 rtx dst, src;
902 /* If we're under init_recog_no_volatile, we'll not be able to use
903 memory_operand. So check the code directly and don't worry about
904 the validity of the underlying address, which should have been
905 checked elsewhere anyway. */
906 if (GET_CODE (dst) != MEM)
907 return 1;
908 if (GET_CODE (src) == MEM)
909 return 0;
910 if (register_operand (src, VOIDmode))
911 return 1;
913 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
914 if (INTEGRAL_MODE_P (GET_MODE (dst)))
915 return src == const0_rtx;
916 else
917 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
920 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
921 Return the length of the field, or <= 0 on failure. */
924 ia64_depz_field_mask (rop, rshift)
925 rtx rop, rshift;
927 unsigned HOST_WIDE_INT op = INTVAL (rop);
928 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
930 /* Get rid of the zero bits we're shifting in. */
931 op >>= shift;
933 /* We must now have a solid block of 1's at bit 0. */
934 return exact_log2 (op + 1);
937 /* Expand a symbolic constant load. */
938 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
940 void
941 ia64_expand_load_address (dest, src, scratch)
942 rtx dest, src, scratch;
944 rtx temp;
946 /* The destination could be a MEM during initial rtl generation,
947 which isn't a valid destination for the PIC load address patterns. */
948 if (! register_operand (dest, DImode))
949 temp = gen_reg_rtx (DImode);
950 else
951 temp = dest;
953 if (TARGET_AUTO_PIC)
954 emit_insn (gen_load_gprel64 (temp, src));
955 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
956 emit_insn (gen_load_fptr (temp, src));
957 else if (sdata_symbolic_operand (src, DImode))
958 emit_insn (gen_load_gprel (temp, src));
959 else if (GET_CODE (src) == CONST
960 && GET_CODE (XEXP (src, 0)) == PLUS
961 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
962 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
964 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
965 rtx sym = XEXP (XEXP (src, 0), 0);
966 HOST_WIDE_INT ofs, hi, lo;
968 /* Split the offset into a sign extended 14-bit low part
969 and a complementary high part. */
970 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
971 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
972 hi = ofs - lo;
974 if (! scratch)
975 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
977 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
978 scratch));
979 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
981 else
983 rtx insn;
984 if (! scratch)
985 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
987 insn = emit_insn (gen_load_symptr (temp, src, scratch));
988 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
991 if (temp != dest)
992 emit_move_insn (dest, temp);
996 ia64_gp_save_reg (setjmp_p)
997 int setjmp_p;
999 rtx save = cfun->machine->ia64_gp_save;
1001 if (save != NULL)
1003 /* We can't save GP in a pseudo if we are calling setjmp, because
1004 pseudos won't be restored by longjmp. For now, we save it in r4. */
1005 /* ??? It would be more efficient to save this directly into a stack
1006 slot. Unfortunately, the stack slot address gets cse'd across
1007 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
1008 place. */
1010 /* ??? Get the barf bag, Virginia. We've got to replace this thing
1011 in place, since this rtx is used in exception handling receivers.
1012 Moreover, we must get this rtx out of regno_reg_rtx or reload
1013 will do the wrong thing. */
1014 unsigned int old_regno = REGNO (save);
1015 if (setjmp_p && old_regno != GR_REG (4))
1017 REGNO (save) = GR_REG (4);
1018 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
1021 else
1023 if (setjmp_p)
1024 save = gen_rtx_REG (DImode, GR_REG (4));
1025 else if (! optimize)
1026 save = gen_rtx_REG (DImode, LOC_REG (0));
1027 else
1028 save = gen_reg_rtx (DImode);
1029 cfun->machine->ia64_gp_save = save;
1032 return save;
1035 /* Split a post-reload TImode reference into two DImode components. */
1038 ia64_split_timode (out, in, scratch)
1039 rtx out[2];
1040 rtx in, scratch;
1042 switch (GET_CODE (in))
1044 case REG:
1045 out[0] = gen_rtx_REG (DImode, REGNO (in));
1046 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
1047 return NULL_RTX;
1049 case MEM:
1051 rtx base = XEXP (in, 0);
1053 switch (GET_CODE (base))
1055 case REG:
1056 out[0] = adjust_address (in, DImode, 0);
1057 break;
1058 case POST_MODIFY:
1059 base = XEXP (base, 0);
1060 out[0] = adjust_address (in, DImode, 0);
1061 break;
1063 /* Since we're changing the mode, we need to change to POST_MODIFY
1064 as well to preserve the size of the increment. Either that or
1065 do the update in two steps, but we've already got this scratch
1066 register handy so let's use it. */
1067 case POST_INC:
1068 base = XEXP (base, 0);
1069 out[0]
1070 = change_address (in, DImode,
1071 gen_rtx_POST_MODIFY
1072 (Pmode, base, plus_constant (base, 16)));
1073 break;
1074 case POST_DEC:
1075 base = XEXP (base, 0);
1076 out[0]
1077 = change_address (in, DImode,
1078 gen_rtx_POST_MODIFY
1079 (Pmode, base, plus_constant (base, -16)));
1080 break;
1081 default:
1082 abort ();
1085 if (scratch == NULL_RTX)
1086 abort ();
1087 out[1] = change_address (in, DImode, scratch);
1088 return gen_adddi3 (scratch, base, GEN_INT (8));
1091 case CONST_INT:
1092 case CONST_DOUBLE:
1093 split_double (in, &out[0], &out[1]);
1094 return NULL_RTX;
1096 default:
1097 abort ();
1101 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1102 through memory plus an extra GR scratch register. Except that you can
1103 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1104 SECONDARY_RELOAD_CLASS, but not both.
1106 We got into problems in the first place by allowing a construct like
1107 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1108 This solution attempts to prevent this situation from occurring. When
1109 we see something like the above, we spill the inner register to memory. */
1112 spill_tfmode_operand (in, force)
1113 rtx in;
1114 int force;
1116 if (GET_CODE (in) == SUBREG
1117 && GET_MODE (SUBREG_REG (in)) == TImode
1118 && GET_CODE (SUBREG_REG (in)) == REG)
1120 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
1121 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1123 else if (force && GET_CODE (in) == REG)
1125 rtx mem = gen_mem_addressof (in, NULL_TREE);
1126 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1128 else if (GET_CODE (in) == MEM
1129 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1130 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
1131 else
1132 return in;
1135 /* Emit comparison instruction if necessary, returning the expression
1136 that holds the compare result in the proper mode. */
1139 ia64_expand_compare (code, mode)
1140 enum rtx_code code;
1141 enum machine_mode mode;
1143 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1144 rtx cmp;
1146 /* If we have a BImode input, then we already have a compare result, and
1147 do not need to emit another comparison. */
1148 if (GET_MODE (op0) == BImode)
1150 if ((code == NE || code == EQ) && op1 == const0_rtx)
1151 cmp = op0;
1152 else
1153 abort ();
1155 else
1157 cmp = gen_reg_rtx (BImode);
1158 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1159 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1160 code = NE;
1163 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1166 /* Emit the appropriate sequence for a call. */
1168 void
1169 ia64_expand_call (retval, addr, nextarg, sibcall_p)
1170 rtx retval;
1171 rtx addr;
1172 rtx nextarg;
1173 int sibcall_p;
1175 rtx insn, b0, pfs, gp_save, narg_rtx, dest;
1176 bool indirect_p;
1177 int narg;
1179 addr = XEXP (addr, 0);
1180 b0 = gen_rtx_REG (DImode, R_BR (0));
1181 pfs = gen_rtx_REG (DImode, AR_PFS_REGNUM);
1183 if (! nextarg)
1184 narg = 0;
1185 else if (IN_REGNO_P (REGNO (nextarg)))
1186 narg = REGNO (nextarg) - IN_REG (0);
1187 else
1188 narg = REGNO (nextarg) - OUT_REG (0);
1189 narg_rtx = GEN_INT (narg);
1191 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1193 if (sibcall_p)
1194 insn = gen_sibcall_nopic (addr, narg_rtx, b0, pfs);
1195 else if (! retval)
1196 insn = gen_call_nopic (addr, narg_rtx, b0);
1197 else
1198 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1199 emit_call_insn (insn);
1200 return;
1203 indirect_p = ! symbolic_operand (addr, VOIDmode);
1205 if (sibcall_p || (TARGET_CONST_GP && !indirect_p))
1206 gp_save = NULL_RTX;
1207 else
1208 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1210 if (gp_save)
1211 emit_move_insn (gp_save, pic_offset_table_rtx);
1213 /* If this is an indirect call, then we have the address of a descriptor. */
1214 if (indirect_p)
1216 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1217 emit_move_insn (pic_offset_table_rtx,
1218 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
1220 else
1221 dest = addr;
1223 if (sibcall_p)
1224 insn = gen_sibcall_pic (dest, narg_rtx, b0, pfs);
1225 else if (! retval)
1226 insn = gen_call_pic (dest, narg_rtx, b0);
1227 else
1228 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1229 emit_call_insn (insn);
1231 if (gp_save)
1232 emit_move_insn (pic_offset_table_rtx, gp_save);
1235 /* Begin the assembly file. */
1237 void
1238 emit_safe_across_calls (f)
1239 FILE *f;
1241 unsigned int rs, re;
1242 int out_state;
1244 rs = 1;
1245 out_state = 0;
1246 while (1)
1248 while (rs < 64 && call_used_regs[PR_REG (rs)])
1249 rs++;
1250 if (rs >= 64)
1251 break;
1252 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1253 continue;
1254 if (out_state == 0)
1256 fputs ("\t.pred.safe_across_calls ", f);
1257 out_state = 1;
1259 else
1260 fputc (',', f);
1261 if (re == rs + 1)
1262 fprintf (f, "p%u", rs);
1263 else
1264 fprintf (f, "p%u-p%u", rs, re - 1);
1265 rs = re + 1;
1267 if (out_state)
1268 fputc ('\n', f);
1272 /* Structure to be filled in by ia64_compute_frame_size with register
1273 save masks and offsets for the current function. */
1275 struct ia64_frame_info
1277 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1278 the caller's scratch area. */
1279 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1280 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1281 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
1282 HARD_REG_SET mask; /* mask of saved registers. */
1283 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1284 registers or long-term scratches. */
1285 int n_spilled; /* number of spilled registers. */
1286 int reg_fp; /* register for fp. */
1287 int reg_save_b0; /* save register for b0. */
1288 int reg_save_pr; /* save register for prs. */
1289 int reg_save_ar_pfs; /* save register for ar.pfs. */
1290 int reg_save_ar_unat; /* save register for ar.unat. */
1291 int reg_save_ar_lc; /* save register for ar.lc. */
1292 int n_input_regs; /* number of input registers used. */
1293 int n_local_regs; /* number of local registers used. */
1294 int n_output_regs; /* number of output registers used. */
1295 int n_rotate_regs; /* number of rotating registers used. */
1297 char need_regstk; /* true if a .regstk directive needed. */
1298 char initialized; /* true if the data is finalized. */
1301 /* Current frame information calculated by ia64_compute_frame_size. */
1302 static struct ia64_frame_info current_frame_info;
1304 /* Helper function for ia64_compute_frame_size: find an appropriate general
1305 register to spill some special register to. SPECIAL_SPILL_MASK contains
1306 bits in GR0 to GR31 that have already been allocated by this routine.
1307 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1309 static int
1310 find_gr_spill (try_locals)
1311 int try_locals;
1313 int regno;
1315 /* If this is a leaf function, first try an otherwise unused
1316 call-clobbered register. */
1317 if (current_function_is_leaf)
1319 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1320 if (! regs_ever_live[regno]
1321 && call_used_regs[regno]
1322 && ! fixed_regs[regno]
1323 && ! global_regs[regno]
1324 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1326 current_frame_info.gr_used_mask |= 1 << regno;
1327 return regno;
1331 if (try_locals)
1333 regno = current_frame_info.n_local_regs;
1334 /* If there is a frame pointer, then we can't use loc79, because
1335 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1336 reg_name switching code in ia64_expand_prologue. */
1337 if (regno < (80 - frame_pointer_needed))
1339 current_frame_info.n_local_regs = regno + 1;
1340 return LOC_REG (0) + regno;
1344 /* Failed to find a general register to spill to. Must use stack. */
1345 return 0;
1348 /* In order to make for nice schedules, we try to allocate every temporary
1349 to a different register. We must of course stay away from call-saved,
1350 fixed, and global registers. We must also stay away from registers
1351 allocated in current_frame_info.gr_used_mask, since those include regs
1352 used all through the prologue.
1354 Any register allocated here must be used immediately. The idea is to
1355 aid scheduling, not to solve data flow problems. */
1357 static int last_scratch_gr_reg;
1359 static int
1360 next_scratch_gr_reg ()
1362 int i, regno;
1364 for (i = 0; i < 32; ++i)
1366 regno = (last_scratch_gr_reg + i + 1) & 31;
1367 if (call_used_regs[regno]
1368 && ! fixed_regs[regno]
1369 && ! global_regs[regno]
1370 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1372 last_scratch_gr_reg = regno;
1373 return regno;
1377 /* There must be _something_ available. */
1378 abort ();
1381 /* Helper function for ia64_compute_frame_size, called through
1382 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1384 static void
1385 mark_reg_gr_used_mask (reg, data)
1386 rtx reg;
1387 void *data ATTRIBUTE_UNUSED;
1389 unsigned int regno = REGNO (reg);
1390 if (regno < 32)
1392 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1393 for (i = 0; i < n; ++i)
1394 current_frame_info.gr_used_mask |= 1 << (regno + i);
1398 /* Returns the number of bytes offset between the frame pointer and the stack
1399 pointer for the current function. SIZE is the number of bytes of space
1400 needed for local variables. */
1402 static void
1403 ia64_compute_frame_size (size)
1404 HOST_WIDE_INT size;
1406 HOST_WIDE_INT total_size;
1407 HOST_WIDE_INT spill_size = 0;
1408 HOST_WIDE_INT extra_spill_size = 0;
1409 HOST_WIDE_INT pretend_args_size;
1410 HARD_REG_SET mask;
1411 int n_spilled = 0;
1412 int spilled_gr_p = 0;
1413 int spilled_fr_p = 0;
1414 unsigned int regno;
1415 int i;
1417 if (current_frame_info.initialized)
1418 return;
1420 memset (&current_frame_info, 0, sizeof current_frame_info);
1421 CLEAR_HARD_REG_SET (mask);
1423 /* Don't allocate scratches to the return register. */
1424 diddle_return_value (mark_reg_gr_used_mask, NULL);
1426 /* Don't allocate scratches to the EH scratch registers. */
1427 if (cfun->machine->ia64_eh_epilogue_sp)
1428 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1429 if (cfun->machine->ia64_eh_epilogue_bsp)
1430 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1432 /* Find the size of the register stack frame. We have only 80 local
1433 registers, because we reserve 8 for the inputs and 8 for the
1434 outputs. */
1436 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1437 since we'll be adjusting that down later. */
1438 regno = LOC_REG (78) + ! frame_pointer_needed;
1439 for (; regno >= LOC_REG (0); regno--)
1440 if (regs_ever_live[regno])
1441 break;
1442 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1444 /* For functions marked with the syscall_linkage attribute, we must mark
1445 all eight input registers as in use, so that locals aren't visible to
1446 the caller. */
1448 if (cfun->machine->n_varargs > 0
1449 || lookup_attribute ("syscall_linkage",
1450 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1451 current_frame_info.n_input_regs = 8;
1452 else
1454 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1455 if (regs_ever_live[regno])
1456 break;
1457 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1460 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1461 if (regs_ever_live[regno])
1462 break;
1463 i = regno - OUT_REG (0) + 1;
1465 /* When -p profiling, we need one output register for the mcount argument.
1466 Likwise for -a profiling for the bb_init_func argument. For -ax
1467 profiling, we need two output registers for the two bb_init_trace_func
1468 arguments. */
1469 if (current_function_profile)
1470 i = MAX (i, 1);
1471 current_frame_info.n_output_regs = i;
1473 /* ??? No rotating register support yet. */
1474 current_frame_info.n_rotate_regs = 0;
1476 /* Discover which registers need spilling, and how much room that
1477 will take. Begin with floating point and general registers,
1478 which will always wind up on the stack. */
1480 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1481 if (regs_ever_live[regno] && ! call_used_regs[regno])
1483 SET_HARD_REG_BIT (mask, regno);
1484 spill_size += 16;
1485 n_spilled += 1;
1486 spilled_fr_p = 1;
1489 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1490 if (regs_ever_live[regno] && ! call_used_regs[regno])
1492 SET_HARD_REG_BIT (mask, regno);
1493 spill_size += 8;
1494 n_spilled += 1;
1495 spilled_gr_p = 1;
1498 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1499 if (regs_ever_live[regno] && ! call_used_regs[regno])
1501 SET_HARD_REG_BIT (mask, regno);
1502 spill_size += 8;
1503 n_spilled += 1;
1506 /* Now come all special registers that might get saved in other
1507 general registers. */
1509 if (frame_pointer_needed)
1511 current_frame_info.reg_fp = find_gr_spill (1);
1512 /* If we did not get a register, then we take LOC79. This is guaranteed
1513 to be free, even if regs_ever_live is already set, because this is
1514 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1515 as we don't count loc79 above. */
1516 if (current_frame_info.reg_fp == 0)
1518 current_frame_info.reg_fp = LOC_REG (79);
1519 current_frame_info.n_local_regs++;
1523 if (! current_function_is_leaf)
1525 /* Emit a save of BR0 if we call other functions. Do this even
1526 if this function doesn't return, as EH depends on this to be
1527 able to unwind the stack. */
1528 SET_HARD_REG_BIT (mask, BR_REG (0));
1530 current_frame_info.reg_save_b0 = find_gr_spill (1);
1531 if (current_frame_info.reg_save_b0 == 0)
1533 spill_size += 8;
1534 n_spilled += 1;
1537 /* Similarly for ar.pfs. */
1538 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1539 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1540 if (current_frame_info.reg_save_ar_pfs == 0)
1542 extra_spill_size += 8;
1543 n_spilled += 1;
1546 else
1548 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1550 SET_HARD_REG_BIT (mask, BR_REG (0));
1551 spill_size += 8;
1552 n_spilled += 1;
1556 /* Unwind descriptor hackery: things are most efficient if we allocate
1557 consecutive GR save registers for RP, PFS, FP in that order. However,
1558 it is absolutely critical that FP get the only hard register that's
1559 guaranteed to be free, so we allocated it first. If all three did
1560 happen to be allocated hard regs, and are consecutive, rearrange them
1561 into the preferred order now. */
1562 if (current_frame_info.reg_fp != 0
1563 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1564 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1566 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1567 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1568 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1571 /* See if we need to store the predicate register block. */
1572 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1573 if (regs_ever_live[regno] && ! call_used_regs[regno])
1574 break;
1575 if (regno <= PR_REG (63))
1577 SET_HARD_REG_BIT (mask, PR_REG (0));
1578 current_frame_info.reg_save_pr = find_gr_spill (1);
1579 if (current_frame_info.reg_save_pr == 0)
1581 extra_spill_size += 8;
1582 n_spilled += 1;
1585 /* ??? Mark them all as used so that register renaming and such
1586 are free to use them. */
1587 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1588 regs_ever_live[regno] = 1;
1591 /* If we're forced to use st8.spill, we're forced to save and restore
1592 ar.unat as well. */
1593 if (spilled_gr_p || cfun->machine->n_varargs)
1595 regs_ever_live[AR_UNAT_REGNUM] = 1;
1596 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1597 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1598 if (current_frame_info.reg_save_ar_unat == 0)
1600 extra_spill_size += 8;
1601 n_spilled += 1;
1605 if (regs_ever_live[AR_LC_REGNUM])
1607 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1608 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1609 if (current_frame_info.reg_save_ar_lc == 0)
1611 extra_spill_size += 8;
1612 n_spilled += 1;
1616 /* If we have an odd number of words of pretend arguments written to
1617 the stack, then the FR save area will be unaligned. We round the
1618 size of this area up to keep things 16 byte aligned. */
1619 if (spilled_fr_p)
1620 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1621 else
1622 pretend_args_size = current_function_pretend_args_size;
1624 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1625 + current_function_outgoing_args_size);
1626 total_size = IA64_STACK_ALIGN (total_size);
1628 /* We always use the 16-byte scratch area provided by the caller, but
1629 if we are a leaf function, there's no one to which we need to provide
1630 a scratch area. */
1631 if (current_function_is_leaf)
1632 total_size = MAX (0, total_size - 16);
1634 current_frame_info.total_size = total_size;
1635 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1636 current_frame_info.spill_size = spill_size;
1637 current_frame_info.extra_spill_size = extra_spill_size;
1638 COPY_HARD_REG_SET (current_frame_info.mask, mask);
1639 current_frame_info.n_spilled = n_spilled;
1640 current_frame_info.initialized = reload_completed;
1643 /* Compute the initial difference between the specified pair of registers. */
1645 HOST_WIDE_INT
1646 ia64_initial_elimination_offset (from, to)
1647 int from, to;
1649 HOST_WIDE_INT offset;
1651 ia64_compute_frame_size (get_frame_size ());
1652 switch (from)
1654 case FRAME_POINTER_REGNUM:
1655 if (to == HARD_FRAME_POINTER_REGNUM)
1657 if (current_function_is_leaf)
1658 offset = -current_frame_info.total_size;
1659 else
1660 offset = -(current_frame_info.total_size
1661 - current_function_outgoing_args_size - 16);
1663 else if (to == STACK_POINTER_REGNUM)
1665 if (current_function_is_leaf)
1666 offset = 0;
1667 else
1668 offset = 16 + current_function_outgoing_args_size;
1670 else
1671 abort ();
1672 break;
1674 case ARG_POINTER_REGNUM:
1675 /* Arguments start above the 16 byte save area, unless stdarg
1676 in which case we store through the 16 byte save area. */
1677 if (to == HARD_FRAME_POINTER_REGNUM)
1678 offset = 16 - current_function_pretend_args_size;
1679 else if (to == STACK_POINTER_REGNUM)
1680 offset = (current_frame_info.total_size
1681 + 16 - current_function_pretend_args_size);
1682 else
1683 abort ();
1684 break;
1686 case RETURN_ADDRESS_POINTER_REGNUM:
1687 offset = 0;
1688 break;
1690 default:
1691 abort ();
1694 return offset;
1697 /* If there are more than a trivial number of register spills, we use
1698 two interleaved iterators so that we can get two memory references
1699 per insn group.
1701 In order to simplify things in the prologue and epilogue expanders,
1702 we use helper functions to fix up the memory references after the
1703 fact with the appropriate offsets to a POST_MODIFY memory mode.
1704 The following data structure tracks the state of the two iterators
1705 while insns are being emitted. */
1707 struct spill_fill_data
1709 rtx init_after; /* point at which to emit initializations */
1710 rtx init_reg[2]; /* initial base register */
1711 rtx iter_reg[2]; /* the iterator registers */
1712 rtx *prev_addr[2]; /* address of last memory use */
1713 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
1714 HOST_WIDE_INT prev_off[2]; /* last offset */
1715 int n_iter; /* number of iterators in use */
1716 int next_iter; /* next iterator to use */
1717 unsigned int save_gr_used_mask;
1720 static struct spill_fill_data spill_fill_data;
1722 static void
1723 setup_spill_pointers (n_spills, init_reg, cfa_off)
1724 int n_spills;
1725 rtx init_reg;
1726 HOST_WIDE_INT cfa_off;
1728 int i;
1730 spill_fill_data.init_after = get_last_insn ();
1731 spill_fill_data.init_reg[0] = init_reg;
1732 spill_fill_data.init_reg[1] = init_reg;
1733 spill_fill_data.prev_addr[0] = NULL;
1734 spill_fill_data.prev_addr[1] = NULL;
1735 spill_fill_data.prev_insn[0] = NULL;
1736 spill_fill_data.prev_insn[1] = NULL;
1737 spill_fill_data.prev_off[0] = cfa_off;
1738 spill_fill_data.prev_off[1] = cfa_off;
1739 spill_fill_data.next_iter = 0;
1740 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1742 spill_fill_data.n_iter = 1 + (n_spills > 2);
1743 for (i = 0; i < spill_fill_data.n_iter; ++i)
1745 int regno = next_scratch_gr_reg ();
1746 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1747 current_frame_info.gr_used_mask |= 1 << regno;
1751 static void
1752 finish_spill_pointers ()
1754 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1757 static rtx
1758 spill_restore_mem (reg, cfa_off)
1759 rtx reg;
1760 HOST_WIDE_INT cfa_off;
1762 int iter = spill_fill_data.next_iter;
1763 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1764 rtx disp_rtx = GEN_INT (disp);
1765 rtx mem;
1767 if (spill_fill_data.prev_addr[iter])
1769 if (CONST_OK_FOR_N (disp))
1771 *spill_fill_data.prev_addr[iter]
1772 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1773 gen_rtx_PLUS (DImode,
1774 spill_fill_data.iter_reg[iter],
1775 disp_rtx));
1776 REG_NOTES (spill_fill_data.prev_insn[iter])
1777 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
1778 REG_NOTES (spill_fill_data.prev_insn[iter]));
1780 else
1782 /* ??? Could use register post_modify for loads. */
1783 if (! CONST_OK_FOR_I (disp))
1785 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1786 emit_move_insn (tmp, disp_rtx);
1787 disp_rtx = tmp;
1789 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1790 spill_fill_data.iter_reg[iter], disp_rtx));
1793 /* Micro-optimization: if we've created a frame pointer, it's at
1794 CFA 0, which may allow the real iterator to be initialized lower,
1795 slightly increasing parallelism. Also, if there are few saves
1796 it may eliminate the iterator entirely. */
1797 else if (disp == 0
1798 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1799 && frame_pointer_needed)
1801 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1802 set_mem_alias_set (mem, get_varargs_alias_set ());
1803 return mem;
1805 else
1807 rtx seq, insn;
1809 if (disp == 0)
1810 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1811 spill_fill_data.init_reg[iter]);
1812 else
1814 start_sequence ();
1816 if (! CONST_OK_FOR_I (disp))
1818 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1819 emit_move_insn (tmp, disp_rtx);
1820 disp_rtx = tmp;
1823 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1824 spill_fill_data.init_reg[iter],
1825 disp_rtx));
1827 seq = gen_sequence ();
1828 end_sequence ();
1831 /* Careful for being the first insn in a sequence. */
1832 if (spill_fill_data.init_after)
1833 insn = emit_insn_after (seq, spill_fill_data.init_after);
1834 else
1836 rtx first = get_insns ();
1837 if (first)
1838 insn = emit_insn_before (seq, first);
1839 else
1840 insn = emit_insn (seq);
1842 spill_fill_data.init_after = insn;
1844 /* If DISP is 0, we may or may not have a further adjustment
1845 afterward. If we do, then the load/store insn may be modified
1846 to be a post-modify. If we don't, then this copy may be
1847 eliminated by copyprop_hardreg_forward, which makes this
1848 insn garbage, which runs afoul of the sanity check in
1849 propagate_one_insn. So mark this insn as legal to delete. */
1850 if (disp == 0)
1851 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1852 REG_NOTES (insn));
1855 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
1857 /* ??? Not all of the spills are for varargs, but some of them are.
1858 The rest of the spills belong in an alias set of their own. But
1859 it doesn't actually hurt to include them here. */
1860 set_mem_alias_set (mem, get_varargs_alias_set ());
1862 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1863 spill_fill_data.prev_off[iter] = cfa_off;
1865 if (++iter >= spill_fill_data.n_iter)
1866 iter = 0;
1867 spill_fill_data.next_iter = iter;
1869 return mem;
1872 static void
1873 do_spill (move_fn, reg, cfa_off, frame_reg)
1874 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1875 rtx reg, frame_reg;
1876 HOST_WIDE_INT cfa_off;
1878 int iter = spill_fill_data.next_iter;
1879 rtx mem, insn;
1881 mem = spill_restore_mem (reg, cfa_off);
1882 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
1883 spill_fill_data.prev_insn[iter] = insn;
1885 if (frame_reg)
1887 rtx base;
1888 HOST_WIDE_INT off;
1890 RTX_FRAME_RELATED_P (insn) = 1;
1892 /* Don't even pretend that the unwind code can intuit its way
1893 through a pair of interleaved post_modify iterators. Just
1894 provide the correct answer. */
1896 if (frame_pointer_needed)
1898 base = hard_frame_pointer_rtx;
1899 off = - cfa_off;
1901 else
1903 base = stack_pointer_rtx;
1904 off = current_frame_info.total_size - cfa_off;
1907 REG_NOTES (insn)
1908 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1909 gen_rtx_SET (VOIDmode,
1910 gen_rtx_MEM (GET_MODE (reg),
1911 plus_constant (base, off)),
1912 frame_reg),
1913 REG_NOTES (insn));
1917 static void
1918 do_restore (move_fn, reg, cfa_off)
1919 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1920 rtx reg;
1921 HOST_WIDE_INT cfa_off;
1923 int iter = spill_fill_data.next_iter;
1924 rtx insn;
1926 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1927 GEN_INT (cfa_off)));
1928 spill_fill_data.prev_insn[iter] = insn;
1931 /* Wrapper functions that discards the CONST_INT spill offset. These
1932 exist so that we can give gr_spill/gr_fill the offset they need and
1933 use a consistant function interface. */
1935 static rtx
1936 gen_movdi_x (dest, src, offset)
1937 rtx dest, src;
1938 rtx offset ATTRIBUTE_UNUSED;
1940 return gen_movdi (dest, src);
1943 static rtx
1944 gen_fr_spill_x (dest, src, offset)
1945 rtx dest, src;
1946 rtx offset ATTRIBUTE_UNUSED;
1948 return gen_fr_spill (dest, src);
1951 static rtx
1952 gen_fr_restore_x (dest, src, offset)
1953 rtx dest, src;
1954 rtx offset ATTRIBUTE_UNUSED;
1956 return gen_fr_restore (dest, src);
1959 /* Called after register allocation to add any instructions needed for the
1960 prologue. Using a prologue insn is favored compared to putting all of the
1961 instructions in output_function_prologue(), since it allows the scheduler
1962 to intermix instructions with the saves of the caller saved registers. In
1963 some cases, it might be necessary to emit a barrier instruction as the last
1964 insn to prevent such scheduling.
1966 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
1967 so that the debug info generation code can handle them properly.
1969 The register save area is layed out like so:
1970 cfa+16
1971 [ varargs spill area ]
1972 [ fr register spill area ]
1973 [ br register spill area ]
1974 [ ar register spill area ]
1975 [ pr register spill area ]
1976 [ gr register spill area ] */
1978 /* ??? Get inefficient code when the frame size is larger than can fit in an
1979 adds instruction. */
1981 void
1982 ia64_expand_prologue ()
1984 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
1985 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
1986 rtx reg, alt_reg;
1988 ia64_compute_frame_size (get_frame_size ());
1989 last_scratch_gr_reg = 15;
1991 /* If there is no epilogue, then we don't need some prologue insns.
1992 We need to avoid emitting the dead prologue insns, because flow
1993 will complain about them. */
1994 if (optimize)
1996 edge e;
1998 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
1999 if ((e->flags & EDGE_FAKE) == 0
2000 && (e->flags & EDGE_FALLTHRU) != 0)
2001 break;
2002 epilogue_p = (e != NULL);
2004 else
2005 epilogue_p = 1;
2007 /* Set the local, input, and output register names. We need to do this
2008 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2009 half. If we use in/loc/out register names, then we get assembler errors
2010 in crtn.S because there is no alloc insn or regstk directive in there. */
2011 if (! TARGET_REG_NAMES)
2013 int inputs = current_frame_info.n_input_regs;
2014 int locals = current_frame_info.n_local_regs;
2015 int outputs = current_frame_info.n_output_regs;
2017 for (i = 0; i < inputs; i++)
2018 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2019 for (i = 0; i < locals; i++)
2020 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2021 for (i = 0; i < outputs; i++)
2022 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2025 /* Set the frame pointer register name. The regnum is logically loc79,
2026 but of course we'll not have allocated that many locals. Rather than
2027 worrying about renumbering the existing rtxs, we adjust the name. */
2028 /* ??? This code means that we can never use one local register when
2029 there is a frame pointer. loc79 gets wasted in this case, as it is
2030 renamed to a register that will never be used. See also the try_locals
2031 code in find_gr_spill. */
2032 if (current_frame_info.reg_fp)
2034 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2035 reg_names[HARD_FRAME_POINTER_REGNUM]
2036 = reg_names[current_frame_info.reg_fp];
2037 reg_names[current_frame_info.reg_fp] = tmp;
2040 /* Fix up the return address placeholder. */
2041 /* ??? We can fail if __builtin_return_address is used, and we didn't
2042 allocate a register in which to save b0. I can't think of a way to
2043 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
2044 then be sure that I got the right one. Further, reload doesn't seem
2045 to care if an eliminable register isn't used, and "eliminates" it
2046 anyway. */
2047 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
2048 && current_frame_info.reg_save_b0 != 0)
2049 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
2051 /* We don't need an alloc instruction if we've used no outputs or locals. */
2052 if (current_frame_info.n_local_regs == 0
2053 && current_frame_info.n_output_regs == 0
2054 && current_frame_info.n_input_regs <= current_function_args_info.int_regs)
2056 /* If there is no alloc, but there are input registers used, then we
2057 need a .regstk directive. */
2058 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2059 ar_pfs_save_reg = NULL_RTX;
2061 else
2063 current_frame_info.need_regstk = 0;
2065 if (current_frame_info.reg_save_ar_pfs)
2066 regno = current_frame_info.reg_save_ar_pfs;
2067 else
2068 regno = next_scratch_gr_reg ();
2069 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2071 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2072 GEN_INT (current_frame_info.n_input_regs),
2073 GEN_INT (current_frame_info.n_local_regs),
2074 GEN_INT (current_frame_info.n_output_regs),
2075 GEN_INT (current_frame_info.n_rotate_regs)));
2076 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2079 /* Set up frame pointer, stack pointer, and spill iterators. */
2081 n_varargs = cfun->machine->n_varargs;
2082 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2083 stack_pointer_rtx, 0);
2085 if (frame_pointer_needed)
2087 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2088 RTX_FRAME_RELATED_P (insn) = 1;
2091 if (current_frame_info.total_size != 0)
2093 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2094 rtx offset;
2096 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2097 offset = frame_size_rtx;
2098 else
2100 regno = next_scratch_gr_reg ();
2101 offset = gen_rtx_REG (DImode, regno);
2102 emit_move_insn (offset, frame_size_rtx);
2105 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2106 stack_pointer_rtx, offset));
2108 if (! frame_pointer_needed)
2110 RTX_FRAME_RELATED_P (insn) = 1;
2111 if (GET_CODE (offset) != CONST_INT)
2113 REG_NOTES (insn)
2114 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2115 gen_rtx_SET (VOIDmode,
2116 stack_pointer_rtx,
2117 gen_rtx_PLUS (DImode,
2118 stack_pointer_rtx,
2119 frame_size_rtx)),
2120 REG_NOTES (insn));
2124 /* ??? At this point we must generate a magic insn that appears to
2125 modify the stack pointer, the frame pointer, and all spill
2126 iterators. This would allow the most scheduling freedom. For
2127 now, just hard stop. */
2128 emit_insn (gen_blockage ());
2131 /* Must copy out ar.unat before doing any integer spills. */
2132 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2134 if (current_frame_info.reg_save_ar_unat)
2135 ar_unat_save_reg
2136 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2137 else
2139 alt_regno = next_scratch_gr_reg ();
2140 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2141 current_frame_info.gr_used_mask |= 1 << alt_regno;
2144 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2145 insn = emit_move_insn (ar_unat_save_reg, reg);
2146 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2148 /* Even if we're not going to generate an epilogue, we still
2149 need to save the register so that EH works. */
2150 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2151 emit_insn (gen_prologue_use (ar_unat_save_reg));
2153 else
2154 ar_unat_save_reg = NULL_RTX;
2156 /* Spill all varargs registers. Do this before spilling any GR registers,
2157 since we want the UNAT bits for the GR registers to override the UNAT
2158 bits from varargs, which we don't care about. */
2160 cfa_off = -16;
2161 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2163 reg = gen_rtx_REG (DImode, regno);
2164 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2167 /* Locate the bottom of the register save area. */
2168 cfa_off = (current_frame_info.spill_cfa_off
2169 + current_frame_info.spill_size
2170 + current_frame_info.extra_spill_size);
2172 /* Save the predicate register block either in a register or in memory. */
2173 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2175 reg = gen_rtx_REG (DImode, PR_REG (0));
2176 if (current_frame_info.reg_save_pr != 0)
2178 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2179 insn = emit_move_insn (alt_reg, reg);
2181 /* ??? Denote pr spill/fill by a DImode move that modifies all
2182 64 hard registers. */
2183 RTX_FRAME_RELATED_P (insn) = 1;
2184 REG_NOTES (insn)
2185 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2186 gen_rtx_SET (VOIDmode, alt_reg, reg),
2187 REG_NOTES (insn));
2189 /* Even if we're not going to generate an epilogue, we still
2190 need to save the register so that EH works. */
2191 if (! epilogue_p)
2192 emit_insn (gen_prologue_use (alt_reg));
2194 else
2196 alt_regno = next_scratch_gr_reg ();
2197 alt_reg = gen_rtx_REG (DImode, alt_regno);
2198 insn = emit_move_insn (alt_reg, reg);
2199 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2200 cfa_off -= 8;
2204 /* Handle AR regs in numerical order. All of them get special handling. */
2205 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2206 && current_frame_info.reg_save_ar_unat == 0)
2208 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2209 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2210 cfa_off -= 8;
2213 /* The alloc insn already copied ar.pfs into a general register. The
2214 only thing we have to do now is copy that register to a stack slot
2215 if we'd not allocated a local register for the job. */
2216 if (current_frame_info.reg_save_ar_pfs == 0
2217 && ! current_function_is_leaf)
2219 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2220 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2221 cfa_off -= 8;
2224 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2226 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2227 if (current_frame_info.reg_save_ar_lc != 0)
2229 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2230 insn = emit_move_insn (alt_reg, reg);
2231 RTX_FRAME_RELATED_P (insn) = 1;
2233 /* Even if we're not going to generate an epilogue, we still
2234 need to save the register so that EH works. */
2235 if (! epilogue_p)
2236 emit_insn (gen_prologue_use (alt_reg));
2238 else
2240 alt_regno = next_scratch_gr_reg ();
2241 alt_reg = gen_rtx_REG (DImode, alt_regno);
2242 emit_move_insn (alt_reg, reg);
2243 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2244 cfa_off -= 8;
2248 /* We should now be at the base of the gr/br/fr spill area. */
2249 if (cfa_off != (current_frame_info.spill_cfa_off
2250 + current_frame_info.spill_size))
2251 abort ();
2253 /* Spill all general registers. */
2254 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2255 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2257 reg = gen_rtx_REG (DImode, regno);
2258 do_spill (gen_gr_spill, reg, cfa_off, reg);
2259 cfa_off -= 8;
2262 /* Handle BR0 specially -- it may be getting stored permanently in
2263 some GR register. */
2264 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2266 reg = gen_rtx_REG (DImode, BR_REG (0));
2267 if (current_frame_info.reg_save_b0 != 0)
2269 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2270 insn = emit_move_insn (alt_reg, reg);
2271 RTX_FRAME_RELATED_P (insn) = 1;
2273 /* Even if we're not going to generate an epilogue, we still
2274 need to save the register so that EH works. */
2275 if (! epilogue_p)
2276 emit_insn (gen_prologue_use (alt_reg));
2278 else
2280 alt_regno = next_scratch_gr_reg ();
2281 alt_reg = gen_rtx_REG (DImode, alt_regno);
2282 emit_move_insn (alt_reg, reg);
2283 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2284 cfa_off -= 8;
2288 /* Spill the rest of the BR registers. */
2289 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2290 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2292 alt_regno = next_scratch_gr_reg ();
2293 alt_reg = gen_rtx_REG (DImode, alt_regno);
2294 reg = gen_rtx_REG (DImode, regno);
2295 emit_move_insn (alt_reg, reg);
2296 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2297 cfa_off -= 8;
2300 /* Align the frame and spill all FR registers. */
2301 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2302 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2304 if (cfa_off & 15)
2305 abort ();
2306 reg = gen_rtx_REG (TFmode, regno);
2307 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2308 cfa_off -= 16;
2311 if (cfa_off != current_frame_info.spill_cfa_off)
2312 abort ();
2314 finish_spill_pointers ();
2317 /* Called after register allocation to add any instructions needed for the
2318 epilogue. Using an epilogue insn is favored compared to putting all of the
2319 instructions in output_function_prologue(), since it allows the scheduler
2320 to intermix instructions with the saves of the caller saved registers. In
2321 some cases, it might be necessary to emit a barrier instruction as the last
2322 insn to prevent such scheduling. */
2324 void
2325 ia64_expand_epilogue (sibcall_p)
2326 int sibcall_p;
2328 rtx insn, reg, alt_reg, ar_unat_save_reg;
2329 int regno, alt_regno, cfa_off;
2331 ia64_compute_frame_size (get_frame_size ());
2333 /* If there is a frame pointer, then we use it instead of the stack
2334 pointer, so that the stack pointer does not need to be valid when
2335 the epilogue starts. See EXIT_IGNORE_STACK. */
2336 if (frame_pointer_needed)
2337 setup_spill_pointers (current_frame_info.n_spilled,
2338 hard_frame_pointer_rtx, 0);
2339 else
2340 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2341 current_frame_info.total_size);
2343 if (current_frame_info.total_size != 0)
2345 /* ??? At this point we must generate a magic insn that appears to
2346 modify the spill iterators and the frame pointer. This would
2347 allow the most scheduling freedom. For now, just hard stop. */
2348 emit_insn (gen_blockage ());
2351 /* Locate the bottom of the register save area. */
2352 cfa_off = (current_frame_info.spill_cfa_off
2353 + current_frame_info.spill_size
2354 + current_frame_info.extra_spill_size);
2356 /* Restore the predicate registers. */
2357 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2359 if (current_frame_info.reg_save_pr != 0)
2360 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2361 else
2363 alt_regno = next_scratch_gr_reg ();
2364 alt_reg = gen_rtx_REG (DImode, alt_regno);
2365 do_restore (gen_movdi_x, alt_reg, cfa_off);
2366 cfa_off -= 8;
2368 reg = gen_rtx_REG (DImode, PR_REG (0));
2369 emit_move_insn (reg, alt_reg);
2372 /* Restore the application registers. */
2374 /* Load the saved unat from the stack, but do not restore it until
2375 after the GRs have been restored. */
2376 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2378 if (current_frame_info.reg_save_ar_unat != 0)
2379 ar_unat_save_reg
2380 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2381 else
2383 alt_regno = next_scratch_gr_reg ();
2384 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2385 current_frame_info.gr_used_mask |= 1 << alt_regno;
2386 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2387 cfa_off -= 8;
2390 else
2391 ar_unat_save_reg = NULL_RTX;
2393 if (current_frame_info.reg_save_ar_pfs != 0)
2395 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2396 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2397 emit_move_insn (reg, alt_reg);
2399 else if (! current_function_is_leaf)
2401 alt_regno = next_scratch_gr_reg ();
2402 alt_reg = gen_rtx_REG (DImode, alt_regno);
2403 do_restore (gen_movdi_x, alt_reg, cfa_off);
2404 cfa_off -= 8;
2405 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2406 emit_move_insn (reg, alt_reg);
2409 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2411 if (current_frame_info.reg_save_ar_lc != 0)
2412 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2413 else
2415 alt_regno = next_scratch_gr_reg ();
2416 alt_reg = gen_rtx_REG (DImode, alt_regno);
2417 do_restore (gen_movdi_x, alt_reg, cfa_off);
2418 cfa_off -= 8;
2420 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2421 emit_move_insn (reg, alt_reg);
2424 /* We should now be at the base of the gr/br/fr spill area. */
2425 if (cfa_off != (current_frame_info.spill_cfa_off
2426 + current_frame_info.spill_size))
2427 abort ();
2429 /* Restore all general registers. */
2430 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2431 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2433 reg = gen_rtx_REG (DImode, regno);
2434 do_restore (gen_gr_restore, reg, cfa_off);
2435 cfa_off -= 8;
2438 /* Restore the branch registers. Handle B0 specially, as it may
2439 have gotten stored in some GR register. */
2440 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2442 if (current_frame_info.reg_save_b0 != 0)
2443 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2444 else
2446 alt_regno = next_scratch_gr_reg ();
2447 alt_reg = gen_rtx_REG (DImode, alt_regno);
2448 do_restore (gen_movdi_x, alt_reg, cfa_off);
2449 cfa_off -= 8;
2451 reg = gen_rtx_REG (DImode, BR_REG (0));
2452 emit_move_insn (reg, alt_reg);
2455 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2456 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2458 alt_regno = next_scratch_gr_reg ();
2459 alt_reg = gen_rtx_REG (DImode, alt_regno);
2460 do_restore (gen_movdi_x, alt_reg, cfa_off);
2461 cfa_off -= 8;
2462 reg = gen_rtx_REG (DImode, regno);
2463 emit_move_insn (reg, alt_reg);
2466 /* Restore floating point registers. */
2467 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2468 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2470 if (cfa_off & 15)
2471 abort ();
2472 reg = gen_rtx_REG (TFmode, regno);
2473 do_restore (gen_fr_restore_x, reg, cfa_off);
2474 cfa_off -= 16;
2477 /* Restore ar.unat for real. */
2478 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2480 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2481 emit_move_insn (reg, ar_unat_save_reg);
2484 if (cfa_off != current_frame_info.spill_cfa_off)
2485 abort ();
2487 finish_spill_pointers ();
2489 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2491 /* ??? At this point we must generate a magic insn that appears to
2492 modify the spill iterators, the stack pointer, and the frame
2493 pointer. This would allow the most scheduling freedom. For now,
2494 just hard stop. */
2495 emit_insn (gen_blockage ());
2498 if (cfun->machine->ia64_eh_epilogue_sp)
2499 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2500 else if (frame_pointer_needed)
2502 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2503 RTX_FRAME_RELATED_P (insn) = 1;
2505 else if (current_frame_info.total_size)
2507 rtx offset, frame_size_rtx;
2509 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2510 if (CONST_OK_FOR_I (current_frame_info.total_size))
2511 offset = frame_size_rtx;
2512 else
2514 regno = next_scratch_gr_reg ();
2515 offset = gen_rtx_REG (DImode, regno);
2516 emit_move_insn (offset, frame_size_rtx);
2519 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2520 offset));
2522 RTX_FRAME_RELATED_P (insn) = 1;
2523 if (GET_CODE (offset) != CONST_INT)
2525 REG_NOTES (insn)
2526 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2527 gen_rtx_SET (VOIDmode,
2528 stack_pointer_rtx,
2529 gen_rtx_PLUS (DImode,
2530 stack_pointer_rtx,
2531 frame_size_rtx)),
2532 REG_NOTES (insn));
2536 if (cfun->machine->ia64_eh_epilogue_bsp)
2537 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2539 if (! sibcall_p)
2540 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2541 else
2543 int fp = GR_REG (2);
2544 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2545 first available call clobbered register. If there was a frame_pointer
2546 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2547 so we have to make sure we're using the string "r2" when emitting
2548 the register name for the assmbler. */
2549 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2550 fp = HARD_FRAME_POINTER_REGNUM;
2552 /* We must emit an alloc to force the input registers to become output
2553 registers. Otherwise, if the callee tries to pass its parameters
2554 through to another call without an intervening alloc, then these
2555 values get lost. */
2556 /* ??? We don't need to preserve all input registers. We only need to
2557 preserve those input registers used as arguments to the sibling call.
2558 It is unclear how to compute that number here. */
2559 if (current_frame_info.n_input_regs != 0)
2560 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2561 GEN_INT (0), GEN_INT (0),
2562 GEN_INT (current_frame_info.n_input_regs),
2563 GEN_INT (0)));
2567 /* Return 1 if br.ret can do all the work required to return from a
2568 function. */
2571 ia64_direct_return ()
2573 if (reload_completed && ! frame_pointer_needed)
2575 ia64_compute_frame_size (get_frame_size ());
2577 return (current_frame_info.total_size == 0
2578 && current_frame_info.n_spilled == 0
2579 && current_frame_info.reg_save_b0 == 0
2580 && current_frame_info.reg_save_pr == 0
2581 && current_frame_info.reg_save_ar_pfs == 0
2582 && current_frame_info.reg_save_ar_unat == 0
2583 && current_frame_info.reg_save_ar_lc == 0);
2585 return 0;
2589 ia64_hard_regno_rename_ok (from, to)
2590 int from;
2591 int to;
2593 /* Don't clobber any of the registers we reserved for the prologue. */
2594 if (to == current_frame_info.reg_fp
2595 || to == current_frame_info.reg_save_b0
2596 || to == current_frame_info.reg_save_pr
2597 || to == current_frame_info.reg_save_ar_pfs
2598 || to == current_frame_info.reg_save_ar_unat
2599 || to == current_frame_info.reg_save_ar_lc)
2600 return 0;
2602 if (from == current_frame_info.reg_fp
2603 || from == current_frame_info.reg_save_b0
2604 || from == current_frame_info.reg_save_pr
2605 || from == current_frame_info.reg_save_ar_pfs
2606 || from == current_frame_info.reg_save_ar_unat
2607 || from == current_frame_info.reg_save_ar_lc)
2608 return 0;
2610 /* Don't use output registers outside the register frame. */
2611 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2612 return 0;
2614 /* Retain even/oddness on predicate register pairs. */
2615 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2616 return (from & 1) == (to & 1);
2618 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2619 if (from == GR_REG (4) && current_function_calls_setjmp)
2620 return 0;
2622 return 1;
2625 /* Target hook for assembling integer objects. Handle word-sized
2626 aligned objects and detect the cases when @fptr is needed. */
2628 static bool
2629 ia64_assemble_integer (x, size, aligned_p)
2630 rtx x;
2631 unsigned int size;
2632 int aligned_p;
2634 if (size == UNITS_PER_WORD && aligned_p
2635 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
2636 && GET_CODE (x) == SYMBOL_REF
2637 && SYMBOL_REF_FLAG (x))
2639 fputs ("\tdata8\t@fptr(", asm_out_file);
2640 output_addr_const (asm_out_file, x);
2641 fputs (")\n", asm_out_file);
2642 return true;
2644 return default_assemble_integer (x, size, aligned_p);
2647 /* Emit the function prologue. */
2649 static void
2650 ia64_output_function_prologue (file, size)
2651 FILE *file;
2652 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2654 int mask, grsave, grsave_prev;
2656 if (current_frame_info.need_regstk)
2657 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2658 current_frame_info.n_input_regs,
2659 current_frame_info.n_local_regs,
2660 current_frame_info.n_output_regs,
2661 current_frame_info.n_rotate_regs);
2663 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2664 return;
2666 /* Emit the .prologue directive. */
2668 mask = 0;
2669 grsave = grsave_prev = 0;
2670 if (current_frame_info.reg_save_b0 != 0)
2672 mask |= 8;
2673 grsave = grsave_prev = current_frame_info.reg_save_b0;
2675 if (current_frame_info.reg_save_ar_pfs != 0
2676 && (grsave_prev == 0
2677 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2679 mask |= 4;
2680 if (grsave_prev == 0)
2681 grsave = current_frame_info.reg_save_ar_pfs;
2682 grsave_prev = current_frame_info.reg_save_ar_pfs;
2684 if (current_frame_info.reg_fp != 0
2685 && (grsave_prev == 0
2686 || current_frame_info.reg_fp == grsave_prev + 1))
2688 mask |= 2;
2689 if (grsave_prev == 0)
2690 grsave = HARD_FRAME_POINTER_REGNUM;
2691 grsave_prev = current_frame_info.reg_fp;
2693 if (current_frame_info.reg_save_pr != 0
2694 && (grsave_prev == 0
2695 || current_frame_info.reg_save_pr == grsave_prev + 1))
2697 mask |= 1;
2698 if (grsave_prev == 0)
2699 grsave = current_frame_info.reg_save_pr;
2702 if (mask)
2703 fprintf (file, "\t.prologue %d, %d\n", mask,
2704 ia64_dbx_register_number (grsave));
2705 else
2706 fputs ("\t.prologue\n", file);
2708 /* Emit a .spill directive, if necessary, to relocate the base of
2709 the register spill area. */
2710 if (current_frame_info.spill_cfa_off != -16)
2711 fprintf (file, "\t.spill %ld\n",
2712 (long) (current_frame_info.spill_cfa_off
2713 + current_frame_info.spill_size));
2716 /* Emit the .body directive at the scheduled end of the prologue. */
2718 static void
2719 ia64_output_function_end_prologue (file)
2720 FILE *file;
2722 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2723 return;
2725 fputs ("\t.body\n", file);
2728 /* Emit the function epilogue. */
2730 static void
2731 ia64_output_function_epilogue (file, size)
2732 FILE *file ATTRIBUTE_UNUSED;
2733 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2735 int i;
2737 /* Reset from the function's potential modifications. */
2738 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
2740 if (current_frame_info.reg_fp)
2742 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2743 reg_names[HARD_FRAME_POINTER_REGNUM]
2744 = reg_names[current_frame_info.reg_fp];
2745 reg_names[current_frame_info.reg_fp] = tmp;
2747 if (! TARGET_REG_NAMES)
2749 for (i = 0; i < current_frame_info.n_input_regs; i++)
2750 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2751 for (i = 0; i < current_frame_info.n_local_regs; i++)
2752 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2753 for (i = 0; i < current_frame_info.n_output_regs; i++)
2754 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2757 current_frame_info.initialized = 0;
2761 ia64_dbx_register_number (regno)
2762 int regno;
2764 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2765 from its home at loc79 to something inside the register frame. We
2766 must perform the same renumbering here for the debug info. */
2767 if (current_frame_info.reg_fp)
2769 if (regno == HARD_FRAME_POINTER_REGNUM)
2770 regno = current_frame_info.reg_fp;
2771 else if (regno == current_frame_info.reg_fp)
2772 regno = HARD_FRAME_POINTER_REGNUM;
2775 if (IN_REGNO_P (regno))
2776 return 32 + regno - IN_REG (0);
2777 else if (LOC_REGNO_P (regno))
2778 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2779 else if (OUT_REGNO_P (regno))
2780 return (32 + current_frame_info.n_input_regs
2781 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2782 else
2783 return regno;
2786 void
2787 ia64_initialize_trampoline (addr, fnaddr, static_chain)
2788 rtx addr, fnaddr, static_chain;
2790 rtx addr_reg, eight = GEN_INT (8);
2792 /* Load up our iterator. */
2793 addr_reg = gen_reg_rtx (Pmode);
2794 emit_move_insn (addr_reg, addr);
2796 /* The first two words are the fake descriptor:
2797 __ia64_trampoline, ADDR+16. */
2798 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2799 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2800 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2802 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2803 copy_to_reg (plus_constant (addr, 16)));
2804 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2806 /* The third word is the target descriptor. */
2807 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2808 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2810 /* The fourth word is the static chain. */
2811 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2814 /* Do any needed setup for a variadic function. CUM has not been updated
2815 for the last named argument which has type TYPE and mode MODE.
2817 We generate the actual spill instructions during prologue generation. */
2819 void
2820 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
2821 CUMULATIVE_ARGS cum;
2822 int int_mode;
2823 tree type;
2824 int * pretend_size;
2825 int second_time ATTRIBUTE_UNUSED;
2827 /* If this is a stdarg function, then skip the current argument. */
2828 if (! current_function_varargs)
2829 ia64_function_arg_advance (&cum, int_mode, type, 1);
2831 if (cum.words < MAX_ARGUMENT_SLOTS)
2833 int n = MAX_ARGUMENT_SLOTS - cum.words;
2834 *pretend_size = n * UNITS_PER_WORD;
2835 cfun->machine->n_varargs = n;
2839 /* Check whether TYPE is a homogeneous floating point aggregate. If
2840 it is, return the mode of the floating point type that appears
2841 in all leafs. If it is not, return VOIDmode.
2843 An aggregate is a homogeneous floating point aggregate is if all
2844 fields/elements in it have the same floating point type (e.g,
2845 SFmode). 128-bit quad-precision floats are excluded. */
2847 static enum machine_mode
2848 hfa_element_mode (type, nested)
2849 tree type;
2850 int nested;
2852 enum machine_mode element_mode = VOIDmode;
2853 enum machine_mode mode;
2854 enum tree_code code = TREE_CODE (type);
2855 int know_element_mode = 0;
2856 tree t;
2858 switch (code)
2860 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2861 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2862 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2863 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2864 case FUNCTION_TYPE:
2865 return VOIDmode;
2867 /* Fortran complex types are supposed to be HFAs, so we need to handle
2868 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2869 types though. */
2870 case COMPLEX_TYPE:
2871 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
2872 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
2873 * BITS_PER_UNIT, MODE_FLOAT, 0);
2874 else
2875 return VOIDmode;
2877 case REAL_TYPE:
2878 /* ??? Should exclude 128-bit long double here. */
2879 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2880 mode if this is contained within an aggregate. */
2881 if (nested)
2882 return TYPE_MODE (type);
2883 else
2884 return VOIDmode;
2886 case ARRAY_TYPE:
2887 return hfa_element_mode (TREE_TYPE (type), 1);
2889 case RECORD_TYPE:
2890 case UNION_TYPE:
2891 case QUAL_UNION_TYPE:
2892 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
2894 if (TREE_CODE (t) != FIELD_DECL)
2895 continue;
2897 mode = hfa_element_mode (TREE_TYPE (t), 1);
2898 if (know_element_mode)
2900 if (mode != element_mode)
2901 return VOIDmode;
2903 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
2904 return VOIDmode;
2905 else
2907 know_element_mode = 1;
2908 element_mode = mode;
2911 return element_mode;
2913 default:
2914 /* If we reach here, we probably have some front-end specific type
2915 that the backend doesn't know about. This can happen via the
2916 aggregate_value_p call in init_function_start. All we can do is
2917 ignore unknown tree types. */
2918 return VOIDmode;
2921 return VOIDmode;
2924 /* Return rtx for register where argument is passed, or zero if it is passed
2925 on the stack. */
2927 /* ??? 128-bit quad-precision floats are always passed in general
2928 registers. */
2931 ia64_function_arg (cum, mode, type, named, incoming)
2932 CUMULATIVE_ARGS *cum;
2933 enum machine_mode mode;
2934 tree type;
2935 int named;
2936 int incoming;
2938 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
2939 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2940 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2941 / UNITS_PER_WORD);
2942 int offset = 0;
2943 enum machine_mode hfa_mode = VOIDmode;
2945 /* Integer and float arguments larger than 8 bytes start at the next even
2946 boundary. Aggregates larger than 8 bytes start at the next even boundary
2947 if the aggregate has 16 byte alignment. Net effect is that types with
2948 alignment greater than 8 start at the next even boundary. */
2949 /* ??? The ABI does not specify how to handle aggregates with alignment from
2950 9 to 15 bytes, or greater than 16. We handle them all as if they had
2951 16 byte alignment. Such aggregates can occur only if gcc extensions are
2952 used. */
2953 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2954 : (words > 1))
2955 && (cum->words & 1))
2956 offset = 1;
2958 /* If all argument slots are used, then it must go on the stack. */
2959 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2960 return 0;
2962 /* Check for and handle homogeneous FP aggregates. */
2963 if (type)
2964 hfa_mode = hfa_element_mode (type, 0);
2966 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2967 and unprototyped hfas are passed specially. */
2968 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2970 rtx loc[16];
2971 int i = 0;
2972 int fp_regs = cum->fp_regs;
2973 int int_regs = cum->words + offset;
2974 int hfa_size = GET_MODE_SIZE (hfa_mode);
2975 int byte_size;
2976 int args_byte_size;
2978 /* If prototyped, pass it in FR regs then GR regs.
2979 If not prototyped, pass it in both FR and GR regs.
2981 If this is an SFmode aggregate, then it is possible to run out of
2982 FR regs while GR regs are still left. In that case, we pass the
2983 remaining part in the GR regs. */
2985 /* Fill the FP regs. We do this always. We stop if we reach the end
2986 of the argument, the last FP register, or the last argument slot. */
2988 byte_size = ((mode == BLKmode)
2989 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2990 args_byte_size = int_regs * UNITS_PER_WORD;
2991 offset = 0;
2992 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2993 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
2995 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2996 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
2997 + fp_regs)),
2998 GEN_INT (offset));
2999 offset += hfa_size;
3000 args_byte_size += hfa_size;
3001 fp_regs++;
3004 /* If no prototype, then the whole thing must go in GR regs. */
3005 if (! cum->prototype)
3006 offset = 0;
3007 /* If this is an SFmode aggregate, then we might have some left over
3008 that needs to go in GR regs. */
3009 else if (byte_size != offset)
3010 int_regs += offset / UNITS_PER_WORD;
3012 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3014 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3016 enum machine_mode gr_mode = DImode;
3018 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3019 then this goes in a GR reg left adjusted/little endian, right
3020 adjusted/big endian. */
3021 /* ??? Currently this is handled wrong, because 4-byte hunks are
3022 always right adjusted/little endian. */
3023 if (offset & 0x4)
3024 gr_mode = SImode;
3025 /* If we have an even 4 byte hunk because the aggregate is a
3026 multiple of 4 bytes in size, then this goes in a GR reg right
3027 adjusted/little endian. */
3028 else if (byte_size - offset == 4)
3029 gr_mode = SImode;
3030 /* Complex floats need to have float mode. */
3031 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3032 gr_mode = hfa_mode;
3034 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3035 gen_rtx_REG (gr_mode, (basereg
3036 + int_regs)),
3037 GEN_INT (offset));
3038 offset += GET_MODE_SIZE (gr_mode);
3039 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
3040 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
3043 /* If we ended up using just one location, just return that one loc. */
3044 if (i == 1)
3045 return XEXP (loc[0], 0);
3046 else
3047 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3050 /* Integral and aggregates go in general registers. If we have run out of
3051 FR registers, then FP values must also go in general registers. This can
3052 happen when we have a SFmode HFA. */
3053 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
3054 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3055 return gen_rtx_REG (mode, basereg + cum->words + offset);
3057 /* If there is a prototype, then FP values go in a FR register when
3058 named, and in a GR registeer when unnamed. */
3059 else if (cum->prototype)
3061 if (! named)
3062 return gen_rtx_REG (mode, basereg + cum->words + offset);
3063 else
3064 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3066 /* If there is no prototype, then FP values go in both FR and GR
3067 registers. */
3068 else
3070 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3071 gen_rtx_REG (mode, (FR_ARG_FIRST
3072 + cum->fp_regs)),
3073 const0_rtx);
3074 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3075 gen_rtx_REG (mode,
3076 (basereg + cum->words
3077 + offset)),
3078 const0_rtx);
3080 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3084 /* Return number of words, at the beginning of the argument, that must be
3085 put in registers. 0 is the argument is entirely in registers or entirely
3086 in memory. */
3089 ia64_function_arg_partial_nregs (cum, mode, type, named)
3090 CUMULATIVE_ARGS *cum;
3091 enum machine_mode mode;
3092 tree type;
3093 int named ATTRIBUTE_UNUSED;
3095 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3096 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3097 / UNITS_PER_WORD);
3098 int offset = 0;
3100 /* Arguments with alignment larger than 8 bytes start at the next even
3101 boundary. */
3102 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3103 : (words > 1))
3104 && (cum->words & 1))
3105 offset = 1;
3107 /* If all argument slots are used, then it must go on the stack. */
3108 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3109 return 0;
3111 /* It doesn't matter whether the argument goes in FR or GR regs. If
3112 it fits within the 8 argument slots, then it goes entirely in
3113 registers. If it extends past the last argument slot, then the rest
3114 goes on the stack. */
3116 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3117 return 0;
3119 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3122 /* Update CUM to point after this argument. This is patterned after
3123 ia64_function_arg. */
3125 void
3126 ia64_function_arg_advance (cum, mode, type, named)
3127 CUMULATIVE_ARGS *cum;
3128 enum machine_mode mode;
3129 tree type;
3130 int named;
3132 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3133 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3134 / UNITS_PER_WORD);
3135 int offset = 0;
3136 enum machine_mode hfa_mode = VOIDmode;
3138 /* If all arg slots are already full, then there is nothing to do. */
3139 if (cum->words >= MAX_ARGUMENT_SLOTS)
3140 return;
3142 /* Arguments with alignment larger than 8 bytes start at the next even
3143 boundary. */
3144 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3145 : (words > 1))
3146 && (cum->words & 1))
3147 offset = 1;
3149 cum->words += words + offset;
3151 /* Check for and handle homogeneous FP aggregates. */
3152 if (type)
3153 hfa_mode = hfa_element_mode (type, 0);
3155 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3156 and unprototyped hfas are passed specially. */
3157 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3159 int fp_regs = cum->fp_regs;
3160 /* This is the original value of cum->words + offset. */
3161 int int_regs = cum->words - words;
3162 int hfa_size = GET_MODE_SIZE (hfa_mode);
3163 int byte_size;
3164 int args_byte_size;
3166 /* If prototyped, pass it in FR regs then GR regs.
3167 If not prototyped, pass it in both FR and GR regs.
3169 If this is an SFmode aggregate, then it is possible to run out of
3170 FR regs while GR regs are still left. In that case, we pass the
3171 remaining part in the GR regs. */
3173 /* Fill the FP regs. We do this always. We stop if we reach the end
3174 of the argument, the last FP register, or the last argument slot. */
3176 byte_size = ((mode == BLKmode)
3177 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3178 args_byte_size = int_regs * UNITS_PER_WORD;
3179 offset = 0;
3180 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3181 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3183 offset += hfa_size;
3184 args_byte_size += hfa_size;
3185 fp_regs++;
3188 cum->fp_regs = fp_regs;
3191 /* Integral and aggregates go in general registers. If we have run out of
3192 FR registers, then FP values must also go in general registers. This can
3193 happen when we have a SFmode HFA. */
3194 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3195 cum->int_regs = cum->words;
3197 /* If there is a prototype, then FP values go in a FR register when
3198 named, and in a GR registeer when unnamed. */
3199 else if (cum->prototype)
3201 if (! named)
3202 cum->int_regs = cum->words;
3203 else
3204 /* ??? Complex types should not reach here. */
3205 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3207 /* If there is no prototype, then FP values go in both FR and GR
3208 registers. */
3209 else
3211 /* ??? Complex types should not reach here. */
3212 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3213 cum->int_regs = cum->words;
3217 /* Variable sized types are passed by reference. */
3218 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3221 ia64_function_arg_pass_by_reference (cum, mode, type, named)
3222 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
3223 enum machine_mode mode ATTRIBUTE_UNUSED;
3224 tree type;
3225 int named ATTRIBUTE_UNUSED;
3227 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3230 /* Implement va_start. */
3232 void
3233 ia64_va_start (stdarg_p, valist, nextarg)
3234 int stdarg_p;
3235 tree valist;
3236 rtx nextarg;
3238 int arg_words;
3239 int ofs;
3241 arg_words = current_function_args_info.words;
3243 if (stdarg_p)
3244 ofs = 0;
3245 else
3246 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
3248 nextarg = plus_constant (nextarg, ofs);
3249 std_expand_builtin_va_start (1, valist, nextarg);
3252 /* Implement va_arg. */
3255 ia64_va_arg (valist, type)
3256 tree valist, type;
3258 tree t;
3260 /* Variable sized types are passed by reference. */
3261 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3263 rtx addr = std_expand_builtin_va_arg (valist, build_pointer_type (type));
3264 return gen_rtx_MEM (ptr_mode, force_reg (Pmode, addr));
3267 /* Arguments with alignment larger than 8 bytes start at the next even
3268 boundary. */
3269 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3271 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3272 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3273 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3274 build_int_2 (-2 * UNITS_PER_WORD, -1));
3275 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3276 TREE_SIDE_EFFECTS (t) = 1;
3277 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3280 return std_expand_builtin_va_arg (valist, type);
3283 /* Return 1 if function return value returned in memory. Return 0 if it is
3284 in a register. */
3287 ia64_return_in_memory (valtype)
3288 tree valtype;
3290 enum machine_mode mode;
3291 enum machine_mode hfa_mode;
3292 HOST_WIDE_INT byte_size;
3294 mode = TYPE_MODE (valtype);
3295 byte_size = GET_MODE_SIZE (mode);
3296 if (mode == BLKmode)
3298 byte_size = int_size_in_bytes (valtype);
3299 if (byte_size < 0)
3300 return 1;
3303 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3305 hfa_mode = hfa_element_mode (valtype, 0);
3306 if (hfa_mode != VOIDmode)
3308 int hfa_size = GET_MODE_SIZE (hfa_mode);
3310 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3311 return 1;
3312 else
3313 return 0;
3315 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3316 return 1;
3317 else
3318 return 0;
3321 /* Return rtx for register that holds the function return value. */
3324 ia64_function_value (valtype, func)
3325 tree valtype;
3326 tree func ATTRIBUTE_UNUSED;
3328 enum machine_mode mode;
3329 enum machine_mode hfa_mode;
3331 mode = TYPE_MODE (valtype);
3332 hfa_mode = hfa_element_mode (valtype, 0);
3334 if (hfa_mode != VOIDmode)
3336 rtx loc[8];
3337 int i;
3338 int hfa_size;
3339 int byte_size;
3340 int offset;
3342 hfa_size = GET_MODE_SIZE (hfa_mode);
3343 byte_size = ((mode == BLKmode)
3344 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3345 offset = 0;
3346 for (i = 0; offset < byte_size; i++)
3348 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3349 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3350 GEN_INT (offset));
3351 offset += hfa_size;
3354 if (i == 1)
3355 return XEXP (loc[0], 0);
3356 else
3357 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3359 else if (FLOAT_TYPE_P (valtype) &&
3360 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
3361 return gen_rtx_REG (mode, FR_ARG_FIRST);
3362 else
3363 return gen_rtx_REG (mode, GR_RET_FIRST);
3366 /* Print a memory address as an operand to reference that memory location. */
3368 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3369 also call this from ia64_print_operand for memory addresses. */
3371 void
3372 ia64_print_operand_address (stream, address)
3373 FILE * stream ATTRIBUTE_UNUSED;
3374 rtx address ATTRIBUTE_UNUSED;
3378 /* Print an operand to an assembler instruction.
3379 C Swap and print a comparison operator.
3380 D Print an FP comparison operator.
3381 E Print 32 - constant, for SImode shifts as extract.
3382 e Print 64 - constant, for DImode rotates.
3383 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3384 a floating point register emitted normally.
3385 I Invert a predicate register by adding 1.
3386 J Select the proper predicate register for a condition.
3387 j Select the inverse predicate register for a condition.
3388 O Append .acq for volatile load.
3389 P Postincrement of a MEM.
3390 Q Append .rel for volatile store.
3391 S Shift amount for shladd instruction.
3392 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3393 for Intel assembler.
3394 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3395 for Intel assembler.
3396 r Print register name, or constant 0 as r0. HP compatibility for
3397 Linux kernel. */
3398 void
3399 ia64_print_operand (file, x, code)
3400 FILE * file;
3401 rtx x;
3402 int code;
3404 const char *str;
3406 switch (code)
3408 case 0:
3409 /* Handled below. */
3410 break;
3412 case 'C':
3414 enum rtx_code c = swap_condition (GET_CODE (x));
3415 fputs (GET_RTX_NAME (c), file);
3416 return;
3419 case 'D':
3420 switch (GET_CODE (x))
3422 case NE:
3423 str = "neq";
3424 break;
3425 case UNORDERED:
3426 str = "unord";
3427 break;
3428 case ORDERED:
3429 str = "ord";
3430 break;
3431 default:
3432 str = GET_RTX_NAME (GET_CODE (x));
3433 break;
3435 fputs (str, file);
3436 return;
3438 case 'E':
3439 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3440 return;
3442 case 'e':
3443 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3444 return;
3446 case 'F':
3447 if (x == CONST0_RTX (GET_MODE (x)))
3448 str = reg_names [FR_REG (0)];
3449 else if (x == CONST1_RTX (GET_MODE (x)))
3450 str = reg_names [FR_REG (1)];
3451 else if (GET_CODE (x) == REG)
3452 str = reg_names [REGNO (x)];
3453 else
3454 abort ();
3455 fputs (str, file);
3456 return;
3458 case 'I':
3459 fputs (reg_names [REGNO (x) + 1], file);
3460 return;
3462 case 'J':
3463 case 'j':
3465 unsigned int regno = REGNO (XEXP (x, 0));
3466 if (GET_CODE (x) == EQ)
3467 regno += 1;
3468 if (code == 'j')
3469 regno ^= 1;
3470 fputs (reg_names [regno], file);
3472 return;
3474 case 'O':
3475 if (MEM_VOLATILE_P (x))
3476 fputs(".acq", file);
3477 return;
3479 case 'P':
3481 HOST_WIDE_INT value;
3483 switch (GET_CODE (XEXP (x, 0)))
3485 default:
3486 return;
3488 case POST_MODIFY:
3489 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3490 if (GET_CODE (x) == CONST_INT)
3491 value = INTVAL (x);
3492 else if (GET_CODE (x) == REG)
3494 fprintf (file, ", %s", reg_names[REGNO (x)]);
3495 return;
3497 else
3498 abort ();
3499 break;
3501 case POST_INC:
3502 value = GET_MODE_SIZE (GET_MODE (x));
3503 break;
3505 case POST_DEC:
3506 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3507 break;
3510 putc (',', file);
3511 putc (' ', file);
3512 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
3513 return;
3516 case 'Q':
3517 if (MEM_VOLATILE_P (x))
3518 fputs(".rel", file);
3519 return;
3521 case 'S':
3522 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3523 return;
3525 case 'T':
3526 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3528 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3529 return;
3531 break;
3533 case 'U':
3534 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3536 const char *prefix = "0x";
3537 if (INTVAL (x) & 0x80000000)
3539 fprintf (file, "0xffffffff");
3540 prefix = "";
3542 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3543 return;
3545 break;
3547 case 'r':
3548 /* If this operand is the constant zero, write it as register zero.
3549 Any register, zero, or CONST_INT value is OK here. */
3550 if (GET_CODE (x) == REG)
3551 fputs (reg_names[REGNO (x)], file);
3552 else if (x == CONST0_RTX (GET_MODE (x)))
3553 fputs ("r0", file);
3554 else if (GET_CODE (x) == CONST_INT)
3555 output_addr_const (file, x);
3556 else
3557 output_operand_lossage ("invalid %%r value");
3558 return;
3560 case '+':
3562 const char *which;
3564 /* For conditional branches, returns or calls, substitute
3565 sptk, dptk, dpnt, or spnt for %s. */
3566 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3567 if (x)
3569 int pred_val = INTVAL (XEXP (x, 0));
3571 /* Guess top and bottom 10% statically predicted. */
3572 if (pred_val < REG_BR_PROB_BASE / 50)
3573 which = ".spnt";
3574 else if (pred_val < REG_BR_PROB_BASE / 2)
3575 which = ".dpnt";
3576 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3577 which = ".dptk";
3578 else
3579 which = ".sptk";
3581 else if (GET_CODE (current_output_insn) == CALL_INSN)
3582 which = ".sptk";
3583 else
3584 which = ".dptk";
3586 fputs (which, file);
3587 return;
3590 case ',':
3591 x = current_insn_predicate;
3592 if (x)
3594 unsigned int regno = REGNO (XEXP (x, 0));
3595 if (GET_CODE (x) == EQ)
3596 regno += 1;
3597 fprintf (file, "(%s) ", reg_names [regno]);
3599 return;
3601 default:
3602 output_operand_lossage ("ia64_print_operand: unknown code");
3603 return;
3606 switch (GET_CODE (x))
3608 /* This happens for the spill/restore instructions. */
3609 case POST_INC:
3610 case POST_DEC:
3611 case POST_MODIFY:
3612 x = XEXP (x, 0);
3613 /* ... fall through ... */
3615 case REG:
3616 fputs (reg_names [REGNO (x)], file);
3617 break;
3619 case MEM:
3621 rtx addr = XEXP (x, 0);
3622 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
3623 addr = XEXP (addr, 0);
3624 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3625 break;
3628 default:
3629 output_addr_const (file, x);
3630 break;
3633 return;
3636 /* Calulate the cost of moving data from a register in class FROM to
3637 one in class TO, using MODE. */
3640 ia64_register_move_cost (mode, from, to)
3641 enum machine_mode mode;
3642 enum reg_class from, to;
3644 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
3645 if (to == ADDL_REGS)
3646 to = GR_REGS;
3647 if (from == ADDL_REGS)
3648 from = GR_REGS;
3650 /* All costs are symmetric, so reduce cases by putting the
3651 lower number class as the destination. */
3652 if (from < to)
3654 enum reg_class tmp = to;
3655 to = from, from = tmp;
3658 /* Moving from FR<->GR in TFmode must be more expensive than 2,
3659 so that we get secondary memory reloads. Between FR_REGS,
3660 we have to make this at least as expensive as MEMORY_MOVE_COST
3661 to avoid spectacularly poor register class preferencing. */
3662 if (mode == TFmode)
3664 if (to != GR_REGS || from != GR_REGS)
3665 return MEMORY_MOVE_COST (mode, to, 0);
3666 else
3667 return 3;
3670 switch (to)
3672 case PR_REGS:
3673 /* Moving between PR registers takes two insns. */
3674 if (from == PR_REGS)
3675 return 3;
3676 /* Moving between PR and anything but GR is impossible. */
3677 if (from != GR_REGS)
3678 return MEMORY_MOVE_COST (mode, to, 0);
3679 break;
3681 case BR_REGS:
3682 /* Moving between BR and anything but GR is impossible. */
3683 if (from != GR_REGS && from != GR_AND_BR_REGS)
3684 return MEMORY_MOVE_COST (mode, to, 0);
3685 break;
3687 case AR_I_REGS:
3688 case AR_M_REGS:
3689 /* Moving between AR and anything but GR is impossible. */
3690 if (from != GR_REGS)
3691 return MEMORY_MOVE_COST (mode, to, 0);
3692 break;
3694 case GR_REGS:
3695 case FR_REGS:
3696 case GR_AND_FR_REGS:
3697 case GR_AND_BR_REGS:
3698 case ALL_REGS:
3699 break;
3701 default:
3702 abort ();
3705 return 2;
3708 /* This function returns the register class required for a secondary
3709 register when copying between one of the registers in CLASS, and X,
3710 using MODE. A return value of NO_REGS means that no secondary register
3711 is required. */
3713 enum reg_class
3714 ia64_secondary_reload_class (class, mode, x)
3715 enum reg_class class;
3716 enum machine_mode mode ATTRIBUTE_UNUSED;
3717 rtx x;
3719 int regno = -1;
3721 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3722 regno = true_regnum (x);
3724 switch (class)
3726 case BR_REGS:
3727 case AR_M_REGS:
3728 case AR_I_REGS:
3729 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
3730 interaction. We end up with two pseudos with overlapping lifetimes
3731 both of which are equiv to the same constant, and both which need
3732 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
3733 changes depending on the path length, which means the qty_first_reg
3734 check in make_regs_eqv can give different answers at different times.
3735 At some point I'll probably need a reload_indi pattern to handle
3736 this.
3738 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
3739 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
3740 non-general registers for good measure. */
3741 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
3742 return GR_REGS;
3744 /* This is needed if a pseudo used as a call_operand gets spilled to a
3745 stack slot. */
3746 if (GET_CODE (x) == MEM)
3747 return GR_REGS;
3748 break;
3750 case FR_REGS:
3751 /* Need to go through general regsters to get to other class regs. */
3752 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
3753 return GR_REGS;
3755 /* This can happen when a paradoxical subreg is an operand to the
3756 muldi3 pattern. */
3757 /* ??? This shouldn't be necessary after instruction scheduling is
3758 enabled, because paradoxical subregs are not accepted by
3759 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3760 stop the paradoxical subreg stupidity in the *_operand functions
3761 in recog.c. */
3762 if (GET_CODE (x) == MEM
3763 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3764 || GET_MODE (x) == QImode))
3765 return GR_REGS;
3767 /* This can happen because of the ior/and/etc patterns that accept FP
3768 registers as operands. If the third operand is a constant, then it
3769 needs to be reloaded into a FP register. */
3770 if (GET_CODE (x) == CONST_INT)
3771 return GR_REGS;
3773 /* This can happen because of register elimination in a muldi3 insn.
3774 E.g. `26107 * (unsigned long)&u'. */
3775 if (GET_CODE (x) == PLUS)
3776 return GR_REGS;
3777 break;
3779 case PR_REGS:
3780 /* ??? This happens if we cse/gcse a BImode value across a call,
3781 and the function has a nonlocal goto. This is because global
3782 does not allocate call crossing pseudos to hard registers when
3783 current_function_has_nonlocal_goto is true. This is relatively
3784 common for C++ programs that use exceptions. To reproduce,
3785 return NO_REGS and compile libstdc++. */
3786 if (GET_CODE (x) == MEM)
3787 return GR_REGS;
3789 /* This can happen when we take a BImode subreg of a DImode value,
3790 and that DImode value winds up in some non-GR register. */
3791 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
3792 return GR_REGS;
3793 break;
3795 case GR_REGS:
3796 /* Since we have no offsettable memory addresses, we need a temporary
3797 to hold the address of the second word. */
3798 if (mode == TImode)
3799 return GR_REGS;
3800 break;
3802 default:
3803 break;
3806 return NO_REGS;
3810 /* Emit text to declare externally defined variables and functions, because
3811 the Intel assembler does not support undefined externals. */
3813 void
3814 ia64_asm_output_external (file, decl, name)
3815 FILE *file;
3816 tree decl;
3817 const char *name;
3819 int save_referenced;
3821 /* GNU as does not need anything here. */
3822 if (TARGET_GNU_AS)
3823 return;
3825 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3826 the linker when we do this, so we need to be careful not to do this for
3827 builtin functions which have no library equivalent. Unfortunately, we
3828 can't tell here whether or not a function will actually be called by
3829 expand_expr, so we pull in library functions even if we may not need
3830 them later. */
3831 if (! strcmp (name, "__builtin_next_arg")
3832 || ! strcmp (name, "alloca")
3833 || ! strcmp (name, "__builtin_constant_p")
3834 || ! strcmp (name, "__builtin_args_info"))
3835 return;
3837 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3838 restore it. */
3839 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
3840 if (TREE_CODE (decl) == FUNCTION_DECL)
3842 fprintf (file, "%s", TYPE_ASM_OP);
3843 assemble_name (file, name);
3844 putc (',', file);
3845 fprintf (file, TYPE_OPERAND_FMT, "function");
3846 putc ('\n', file);
3848 ASM_GLOBALIZE_LABEL (file, name);
3849 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
3852 /* Parse the -mfixed-range= option string. */
3854 static void
3855 fix_range (const_str)
3856 const char *const_str;
3858 int i, first, last;
3859 char *str, *dash, *comma;
3861 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3862 REG2 are either register names or register numbers. The effect
3863 of this option is to mark the registers in the range from REG1 to
3864 REG2 as ``fixed'' so they won't be used by the compiler. This is
3865 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3867 i = strlen (const_str);
3868 str = (char *) alloca (i + 1);
3869 memcpy (str, const_str, i + 1);
3871 while (1)
3873 dash = strchr (str, '-');
3874 if (!dash)
3876 warning ("value of -mfixed-range must have form REG1-REG2");
3877 return;
3879 *dash = '\0';
3881 comma = strchr (dash + 1, ',');
3882 if (comma)
3883 *comma = '\0';
3885 first = decode_reg_name (str);
3886 if (first < 0)
3888 warning ("unknown register name: %s", str);
3889 return;
3892 last = decode_reg_name (dash + 1);
3893 if (last < 0)
3895 warning ("unknown register name: %s", dash + 1);
3896 return;
3899 *dash = '-';
3901 if (first > last)
3903 warning ("%s-%s is an empty range", str, dash + 1);
3904 return;
3907 for (i = first; i <= last; ++i)
3908 fixed_regs[i] = call_used_regs[i] = 1;
3910 if (!comma)
3911 break;
3913 *comma = ',';
3914 str = comma + 1;
3918 /* Called to register all of our global variables with the garbage
3919 collector. */
3921 static void
3922 ia64_add_gc_roots ()
3924 ggc_add_rtx_root (&ia64_compare_op0, 1);
3925 ggc_add_rtx_root (&ia64_compare_op1, 1);
3928 static void
3929 ia64_init_machine_status (p)
3930 struct function *p;
3932 p->machine =
3933 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
3936 static void
3937 ia64_mark_machine_status (p)
3938 struct function *p;
3940 struct machine_function *machine = p->machine;
3942 if (machine)
3944 ggc_mark_rtx (machine->ia64_eh_epilogue_sp);
3945 ggc_mark_rtx (machine->ia64_eh_epilogue_bsp);
3946 ggc_mark_rtx (machine->ia64_gp_save);
3950 static void
3951 ia64_free_machine_status (p)
3952 struct function *p;
3954 free (p->machine);
3955 p->machine = NULL;
3958 /* Handle TARGET_OPTIONS switches. */
3960 void
3961 ia64_override_options ()
3963 if (TARGET_AUTO_PIC)
3964 target_flags |= MASK_CONST_GP;
3966 if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
3968 warning ("cannot optimize division for both latency and throughput");
3969 target_flags &= ~MASK_INLINE_DIV_THR;
3972 if (ia64_fixed_range_string)
3973 fix_range (ia64_fixed_range_string);
3975 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
3976 flag_schedule_insns_after_reload = 0;
3978 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
3980 init_machine_status = ia64_init_machine_status;
3981 mark_machine_status = ia64_mark_machine_status;
3982 free_machine_status = ia64_free_machine_status;
3984 ia64_add_gc_roots ();
3987 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
3988 static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
3989 static enum attr_type ia64_safe_type PARAMS((rtx));
3991 static enum attr_itanium_requires_unit0
3992 ia64_safe_itanium_requires_unit0 (insn)
3993 rtx insn;
3995 if (recog_memoized (insn) >= 0)
3996 return get_attr_itanium_requires_unit0 (insn);
3997 else
3998 return ITANIUM_REQUIRES_UNIT0_NO;
4001 static enum attr_itanium_class
4002 ia64_safe_itanium_class (insn)
4003 rtx insn;
4005 if (recog_memoized (insn) >= 0)
4006 return get_attr_itanium_class (insn);
4007 else
4008 return ITANIUM_CLASS_UNKNOWN;
4011 static enum attr_type
4012 ia64_safe_type (insn)
4013 rtx insn;
4015 if (recog_memoized (insn) >= 0)
4016 return get_attr_type (insn);
4017 else
4018 return TYPE_UNKNOWN;
4021 /* The following collection of routines emit instruction group stop bits as
4022 necessary to avoid dependencies. */
4024 /* Need to track some additional registers as far as serialization is
4025 concerned so we can properly handle br.call and br.ret. We could
4026 make these registers visible to gcc, but since these registers are
4027 never explicitly used in gcc generated code, it seems wasteful to
4028 do so (plus it would make the call and return patterns needlessly
4029 complex). */
4030 #define REG_GP (GR_REG (1))
4031 #define REG_RP (BR_REG (0))
4032 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4033 /* This is used for volatile asms which may require a stop bit immediately
4034 before and after them. */
4035 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4036 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4037 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4039 /* For each register, we keep track of how it has been written in the
4040 current instruction group.
4042 If a register is written unconditionally (no qualifying predicate),
4043 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4045 If a register is written if its qualifying predicate P is true, we
4046 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4047 may be written again by the complement of P (P^1) and when this happens,
4048 WRITE_COUNT gets set to 2.
4050 The result of this is that whenever an insn attempts to write a register
4051 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4053 If a predicate register is written by a floating-point insn, we set
4054 WRITTEN_BY_FP to true.
4056 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4057 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4059 struct reg_write_state
4061 unsigned int write_count : 2;
4062 unsigned int first_pred : 16;
4063 unsigned int written_by_fp : 1;
4064 unsigned int written_by_and : 1;
4065 unsigned int written_by_or : 1;
4068 /* Cumulative info for the current instruction group. */
4069 struct reg_write_state rws_sum[NUM_REGS];
4070 /* Info for the current instruction. This gets copied to rws_sum after a
4071 stop bit is emitted. */
4072 struct reg_write_state rws_insn[NUM_REGS];
4074 /* Indicates whether this is the first instruction after a stop bit,
4075 in which case we don't need another stop bit. Without this, we hit
4076 the abort in ia64_variable_issue when scheduling an alloc. */
4077 static int first_instruction;
4079 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4080 RTL for one instruction. */
4081 struct reg_flags
4083 unsigned int is_write : 1; /* Is register being written? */
4084 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4085 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4086 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4087 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4088 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4091 static void rws_update PARAMS ((struct reg_write_state *, int,
4092 struct reg_flags, int));
4093 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
4094 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
4095 static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
4096 static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
4097 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
4098 static void init_insn_group_barriers PARAMS ((void));
4099 static int group_barrier_needed_p PARAMS ((rtx));
4100 static int safe_group_barrier_needed_p PARAMS ((rtx));
4102 /* Update *RWS for REGNO, which is being written by the current instruction,
4103 with predicate PRED, and associated register flags in FLAGS. */
4105 static void
4106 rws_update (rws, regno, flags, pred)
4107 struct reg_write_state *rws;
4108 int regno;
4109 struct reg_flags flags;
4110 int pred;
4112 if (pred)
4113 rws[regno].write_count++;
4114 else
4115 rws[regno].write_count = 2;
4116 rws[regno].written_by_fp |= flags.is_fp;
4117 /* ??? Not tracking and/or across differing predicates. */
4118 rws[regno].written_by_and = flags.is_and;
4119 rws[regno].written_by_or = flags.is_or;
4120 rws[regno].first_pred = pred;
4123 /* Handle an access to register REGNO of type FLAGS using predicate register
4124 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4125 a dependency with an earlier instruction in the same group. */
4127 static int
4128 rws_access_regno (regno, flags, pred)
4129 int regno;
4130 struct reg_flags flags;
4131 int pred;
4133 int need_barrier = 0;
4135 if (regno >= NUM_REGS)
4136 abort ();
4138 if (! PR_REGNO_P (regno))
4139 flags.is_and = flags.is_or = 0;
4141 if (flags.is_write)
4143 int write_count;
4145 /* One insn writes same reg multiple times? */
4146 if (rws_insn[regno].write_count > 0)
4147 abort ();
4149 /* Update info for current instruction. */
4150 rws_update (rws_insn, regno, flags, pred);
4151 write_count = rws_sum[regno].write_count;
4153 switch (write_count)
4155 case 0:
4156 /* The register has not been written yet. */
4157 rws_update (rws_sum, regno, flags, pred);
4158 break;
4160 case 1:
4161 /* The register has been written via a predicate. If this is
4162 not a complementary predicate, then we need a barrier. */
4163 /* ??? This assumes that P and P+1 are always complementary
4164 predicates for P even. */
4165 if (flags.is_and && rws_sum[regno].written_by_and)
4167 else if (flags.is_or && rws_sum[regno].written_by_or)
4169 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4170 need_barrier = 1;
4171 rws_update (rws_sum, regno, flags, pred);
4172 break;
4174 case 2:
4175 /* The register has been unconditionally written already. We
4176 need a barrier. */
4177 if (flags.is_and && rws_sum[regno].written_by_and)
4179 else if (flags.is_or && rws_sum[regno].written_by_or)
4181 else
4182 need_barrier = 1;
4183 rws_sum[regno].written_by_and = flags.is_and;
4184 rws_sum[regno].written_by_or = flags.is_or;
4185 break;
4187 default:
4188 abort ();
4191 else
4193 if (flags.is_branch)
4195 /* Branches have several RAW exceptions that allow to avoid
4196 barriers. */
4198 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4199 /* RAW dependencies on branch regs are permissible as long
4200 as the writer is a non-branch instruction. Since we
4201 never generate code that uses a branch register written
4202 by a branch instruction, handling this case is
4203 easy. */
4204 return 0;
4206 if (REGNO_REG_CLASS (regno) == PR_REGS
4207 && ! rws_sum[regno].written_by_fp)
4208 /* The predicates of a branch are available within the
4209 same insn group as long as the predicate was written by
4210 something other than a floating-point instruction. */
4211 return 0;
4214 if (flags.is_and && rws_sum[regno].written_by_and)
4215 return 0;
4216 if (flags.is_or && rws_sum[regno].written_by_or)
4217 return 0;
4219 switch (rws_sum[regno].write_count)
4221 case 0:
4222 /* The register has not been written yet. */
4223 break;
4225 case 1:
4226 /* The register has been written via a predicate. If this is
4227 not a complementary predicate, then we need a barrier. */
4228 /* ??? This assumes that P and P+1 are always complementary
4229 predicates for P even. */
4230 if ((rws_sum[regno].first_pred ^ 1) != pred)
4231 need_barrier = 1;
4232 break;
4234 case 2:
4235 /* The register has been unconditionally written already. We
4236 need a barrier. */
4237 need_barrier = 1;
4238 break;
4240 default:
4241 abort ();
4245 return need_barrier;
4248 static int
4249 rws_access_reg (reg, flags, pred)
4250 rtx reg;
4251 struct reg_flags flags;
4252 int pred;
4254 int regno = REGNO (reg);
4255 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4257 if (n == 1)
4258 return rws_access_regno (regno, flags, pred);
4259 else
4261 int need_barrier = 0;
4262 while (--n >= 0)
4263 need_barrier |= rws_access_regno (regno + n, flags, pred);
4264 return need_barrier;
4268 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4269 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4271 static void
4272 update_set_flags (x, pflags, ppred, pcond)
4273 rtx x;
4274 struct reg_flags *pflags;
4275 int *ppred;
4276 rtx *pcond;
4278 rtx src = SET_SRC (x);
4280 *pcond = 0;
4282 switch (GET_CODE (src))
4284 case CALL:
4285 return;
4287 case IF_THEN_ELSE:
4288 if (SET_DEST (x) == pc_rtx)
4289 /* X is a conditional branch. */
4290 return;
4291 else
4293 int is_complemented = 0;
4295 /* X is a conditional move. */
4296 rtx cond = XEXP (src, 0);
4297 if (GET_CODE (cond) == EQ)
4298 is_complemented = 1;
4299 cond = XEXP (cond, 0);
4300 if (GET_CODE (cond) != REG
4301 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4302 abort ();
4303 *pcond = cond;
4304 if (XEXP (src, 1) == SET_DEST (x)
4305 || XEXP (src, 2) == SET_DEST (x))
4307 /* X is a conditional move that conditionally writes the
4308 destination. */
4310 /* We need another complement in this case. */
4311 if (XEXP (src, 1) == SET_DEST (x))
4312 is_complemented = ! is_complemented;
4314 *ppred = REGNO (cond);
4315 if (is_complemented)
4316 ++*ppred;
4319 /* ??? If this is a conditional write to the dest, then this
4320 instruction does not actually read one source. This probably
4321 doesn't matter, because that source is also the dest. */
4322 /* ??? Multiple writes to predicate registers are allowed
4323 if they are all AND type compares, or if they are all OR
4324 type compares. We do not generate such instructions
4325 currently. */
4327 /* ... fall through ... */
4329 default:
4330 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4331 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4332 /* Set pflags->is_fp to 1 so that we know we're dealing
4333 with a floating point comparison when processing the
4334 destination of the SET. */
4335 pflags->is_fp = 1;
4337 /* Discover if this is a parallel comparison. We only handle
4338 and.orcm and or.andcm at present, since we must retain a
4339 strict inverse on the predicate pair. */
4340 else if (GET_CODE (src) == AND)
4341 pflags->is_and = 1;
4342 else if (GET_CODE (src) == IOR)
4343 pflags->is_or = 1;
4345 break;
4349 /* Subroutine of rtx_needs_barrier; this function determines whether the
4350 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4351 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4352 for this insn. */
4354 static int
4355 set_src_needs_barrier (x, flags, pred, cond)
4356 rtx x;
4357 struct reg_flags flags;
4358 int pred;
4359 rtx cond;
4361 int need_barrier = 0;
4362 rtx dst;
4363 rtx src = SET_SRC (x);
4365 if (GET_CODE (src) == CALL)
4366 /* We don't need to worry about the result registers that
4367 get written by subroutine call. */
4368 return rtx_needs_barrier (src, flags, pred);
4369 else if (SET_DEST (x) == pc_rtx)
4371 /* X is a conditional branch. */
4372 /* ??? This seems redundant, as the caller sets this bit for
4373 all JUMP_INSNs. */
4374 flags.is_branch = 1;
4375 return rtx_needs_barrier (src, flags, pred);
4378 need_barrier = rtx_needs_barrier (src, flags, pred);
4380 /* This instruction unconditionally uses a predicate register. */
4381 if (cond)
4382 need_barrier |= rws_access_reg (cond, flags, 0);
4384 dst = SET_DEST (x);
4385 if (GET_CODE (dst) == ZERO_EXTRACT)
4387 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4388 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4389 dst = XEXP (dst, 0);
4391 return need_barrier;
4394 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4395 Return 1 is this access creates a dependency with an earlier instruction
4396 in the same group. */
4398 static int
4399 rtx_needs_barrier (x, flags, pred)
4400 rtx x;
4401 struct reg_flags flags;
4402 int pred;
4404 int i, j;
4405 int is_complemented = 0;
4406 int need_barrier = 0;
4407 const char *format_ptr;
4408 struct reg_flags new_flags;
4409 rtx cond = 0;
4411 if (! x)
4412 return 0;
4414 new_flags = flags;
4416 switch (GET_CODE (x))
4418 case SET:
4419 update_set_flags (x, &new_flags, &pred, &cond);
4420 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4421 if (GET_CODE (SET_SRC (x)) != CALL)
4423 new_flags.is_write = 1;
4424 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4426 break;
4428 case CALL:
4429 new_flags.is_write = 0;
4430 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4432 /* Avoid multiple register writes, in case this is a pattern with
4433 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4434 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4436 new_flags.is_write = 1;
4437 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4438 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4439 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4441 break;
4443 case COND_EXEC:
4444 /* X is a predicated instruction. */
4446 cond = COND_EXEC_TEST (x);
4447 if (pred)
4448 abort ();
4449 need_barrier = rtx_needs_barrier (cond, flags, 0);
4451 if (GET_CODE (cond) == EQ)
4452 is_complemented = 1;
4453 cond = XEXP (cond, 0);
4454 if (GET_CODE (cond) != REG
4455 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4456 abort ();
4457 pred = REGNO (cond);
4458 if (is_complemented)
4459 ++pred;
4461 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4462 return need_barrier;
4464 case CLOBBER:
4465 case USE:
4466 /* Clobber & use are for earlier compiler-phases only. */
4467 break;
4469 case ASM_OPERANDS:
4470 case ASM_INPUT:
4471 /* We always emit stop bits for traditional asms. We emit stop bits
4472 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4473 if (GET_CODE (x) != ASM_OPERANDS
4474 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4476 /* Avoid writing the register multiple times if we have multiple
4477 asm outputs. This avoids an abort in rws_access_reg. */
4478 if (! rws_insn[REG_VOLATILE].write_count)
4480 new_flags.is_write = 1;
4481 rws_access_regno (REG_VOLATILE, new_flags, pred);
4483 return 1;
4486 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4487 We can not just fall through here since then we would be confused
4488 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4489 traditional asms unlike their normal usage. */
4491 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4492 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4493 need_barrier = 1;
4494 break;
4496 case PARALLEL:
4497 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4499 rtx pat = XVECEXP (x, 0, i);
4500 if (GET_CODE (pat) == SET)
4502 update_set_flags (pat, &new_flags, &pred, &cond);
4503 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4505 else if (GET_CODE (pat) == USE
4506 || GET_CODE (pat) == CALL
4507 || GET_CODE (pat) == ASM_OPERANDS)
4508 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4509 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4510 abort ();
4512 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4514 rtx pat = XVECEXP (x, 0, i);
4515 if (GET_CODE (pat) == SET)
4517 if (GET_CODE (SET_SRC (pat)) != CALL)
4519 new_flags.is_write = 1;
4520 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4521 pred);
4524 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
4525 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4527 break;
4529 case SUBREG:
4530 x = SUBREG_REG (x);
4531 /* FALLTHRU */
4532 case REG:
4533 if (REGNO (x) == AR_UNAT_REGNUM)
4535 for (i = 0; i < 64; ++i)
4536 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4538 else
4539 need_barrier = rws_access_reg (x, flags, pred);
4540 break;
4542 case MEM:
4543 /* Find the regs used in memory address computation. */
4544 new_flags.is_write = 0;
4545 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4546 break;
4548 case CONST_INT: case CONST_DOUBLE:
4549 case SYMBOL_REF: case LABEL_REF: case CONST:
4550 break;
4552 /* Operators with side-effects. */
4553 case POST_INC: case POST_DEC:
4554 if (GET_CODE (XEXP (x, 0)) != REG)
4555 abort ();
4557 new_flags.is_write = 0;
4558 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4559 new_flags.is_write = 1;
4560 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4561 break;
4563 case POST_MODIFY:
4564 if (GET_CODE (XEXP (x, 0)) != REG)
4565 abort ();
4567 new_flags.is_write = 0;
4568 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4569 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4570 new_flags.is_write = 1;
4571 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4572 break;
4574 /* Handle common unary and binary ops for efficiency. */
4575 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4576 case MOD: case UDIV: case UMOD: case AND: case IOR:
4577 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4578 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4579 case NE: case EQ: case GE: case GT: case LE:
4580 case LT: case GEU: case GTU: case LEU: case LTU:
4581 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4582 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4583 break;
4585 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4586 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4587 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4588 case SQRT: case FFS:
4589 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4590 break;
4592 case UNSPEC:
4593 switch (XINT (x, 1))
4595 case UNSPEC_GR_SPILL:
4596 case UNSPEC_GR_RESTORE:
4598 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4599 HOST_WIDE_INT bit = (offset >> 3) & 63;
4601 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4602 new_flags.is_write = (XINT (x, 1) == 1);
4603 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4604 new_flags, pred);
4605 break;
4608 case UNSPEC_FR_SPILL:
4609 case UNSPEC_FR_RESTORE:
4610 case UNSPEC_POPCNT:
4611 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4612 break;
4614 case UNSPEC_PRED_REL_MUTEX:
4615 case UNSPEC_PIC_CALL:
4616 case UNSPEC_MF:
4617 case UNSPEC_FETCHADD_ACQ:
4618 case UNSPEC_BSP_VALUE:
4619 case UNSPEC_FLUSHRS:
4620 case UNSPEC_BUNDLE_SELECTOR:
4621 break;
4623 case UNSPEC_ADDP4:
4624 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4625 break;
4627 case UNSPEC_FR_RECIP_APPROX:
4628 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4629 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4630 break;
4632 case UNSPEC_CMPXCHG_ACQ:
4633 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4634 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4635 break;
4637 default:
4638 abort ();
4640 break;
4642 case UNSPEC_VOLATILE:
4643 switch (XINT (x, 1))
4645 case UNSPECV_ALLOC:
4646 /* Alloc must always be the first instruction of a group.
4647 We force this by always returning true. */
4648 /* ??? We might get better scheduling if we explicitly check for
4649 input/local/output register dependencies, and modify the
4650 scheduler so that alloc is always reordered to the start of
4651 the current group. We could then eliminate all of the
4652 first_instruction code. */
4653 rws_access_regno (AR_PFS_REGNUM, flags, pred);
4655 new_flags.is_write = 1;
4656 rws_access_regno (REG_AR_CFM, new_flags, pred);
4657 return 1;
4659 case UNSPECV_SET_BSP:
4660 need_barrier = 1;
4661 break;
4663 case UNSPECV_BLOCKAGE:
4664 case UNSPECV_INSN_GROUP_BARRIER:
4665 case UNSPECV_BREAK:
4666 case UNSPECV_PSAC_ALL:
4667 case UNSPECV_PSAC_NORMAL:
4668 return 0;
4670 default:
4671 abort ();
4673 break;
4675 case RETURN:
4676 new_flags.is_write = 0;
4677 need_barrier = rws_access_regno (REG_RP, flags, pred);
4678 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
4680 new_flags.is_write = 1;
4681 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4682 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4683 break;
4685 default:
4686 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4687 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4688 switch (format_ptr[i])
4690 case '0': /* unused field */
4691 case 'i': /* integer */
4692 case 'n': /* note */
4693 case 'w': /* wide integer */
4694 case 's': /* pointer to string */
4695 case 'S': /* optional pointer to string */
4696 break;
4698 case 'e':
4699 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4700 need_barrier = 1;
4701 break;
4703 case 'E':
4704 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4705 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4706 need_barrier = 1;
4707 break;
4709 default:
4710 abort ();
4712 break;
4714 return need_barrier;
4717 /* Clear out the state for group_barrier_needed_p at the start of a
4718 sequence of insns. */
4720 static void
4721 init_insn_group_barriers ()
4723 memset (rws_sum, 0, sizeof (rws_sum));
4724 first_instruction = 1;
4727 /* Given the current state, recorded by previous calls to this function,
4728 determine whether a group barrier (a stop bit) is necessary before INSN.
4729 Return nonzero if so. */
4731 static int
4732 group_barrier_needed_p (insn)
4733 rtx insn;
4735 rtx pat;
4736 int need_barrier = 0;
4737 struct reg_flags flags;
4739 memset (&flags, 0, sizeof (flags));
4740 switch (GET_CODE (insn))
4742 case NOTE:
4743 break;
4745 case BARRIER:
4746 /* A barrier doesn't imply an instruction group boundary. */
4747 break;
4749 case CODE_LABEL:
4750 memset (rws_insn, 0, sizeof (rws_insn));
4751 return 1;
4753 case CALL_INSN:
4754 flags.is_branch = 1;
4755 flags.is_sibcall = SIBLING_CALL_P (insn);
4756 memset (rws_insn, 0, sizeof (rws_insn));
4758 /* Don't bundle a call following another call. */
4759 if ((pat = prev_active_insn (insn))
4760 && GET_CODE (pat) == CALL_INSN)
4762 need_barrier = 1;
4763 break;
4766 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
4767 break;
4769 case JUMP_INSN:
4770 flags.is_branch = 1;
4772 /* Don't bundle a jump following a call. */
4773 if ((pat = prev_active_insn (insn))
4774 && GET_CODE (pat) == CALL_INSN)
4776 need_barrier = 1;
4777 break;
4779 /* FALLTHRU */
4781 case INSN:
4782 if (GET_CODE (PATTERN (insn)) == USE
4783 || GET_CODE (PATTERN (insn)) == CLOBBER)
4784 /* Don't care about USE and CLOBBER "insns"---those are used to
4785 indicate to the optimizer that it shouldn't get rid of
4786 certain operations. */
4787 break;
4789 pat = PATTERN (insn);
4791 /* Ug. Hack hacks hacked elsewhere. */
4792 switch (recog_memoized (insn))
4794 /* We play dependency tricks with the epilogue in order
4795 to get proper schedules. Undo this for dv analysis. */
4796 case CODE_FOR_epilogue_deallocate_stack:
4797 case CODE_FOR_prologue_allocate_stack:
4798 pat = XVECEXP (pat, 0, 0);
4799 break;
4801 /* The pattern we use for br.cloop confuses the code above.
4802 The second element of the vector is representative. */
4803 case CODE_FOR_doloop_end_internal:
4804 pat = XVECEXP (pat, 0, 1);
4805 break;
4807 /* Doesn't generate code. */
4808 case CODE_FOR_pred_rel_mutex:
4809 case CODE_FOR_prologue_use:
4810 return 0;
4812 default:
4813 break;
4816 memset (rws_insn, 0, sizeof (rws_insn));
4817 need_barrier = rtx_needs_barrier (pat, flags, 0);
4819 /* Check to see if the previous instruction was a volatile
4820 asm. */
4821 if (! need_barrier)
4822 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
4823 break;
4825 default:
4826 abort ();
4829 if (first_instruction)
4831 need_barrier = 0;
4832 first_instruction = 0;
4835 return need_barrier;
4838 /* Like group_barrier_needed_p, but do not clobber the current state. */
4840 static int
4841 safe_group_barrier_needed_p (insn)
4842 rtx insn;
4844 struct reg_write_state rws_saved[NUM_REGS];
4845 int saved_first_instruction;
4846 int t;
4848 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
4849 saved_first_instruction = first_instruction;
4851 t = group_barrier_needed_p (insn);
4853 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
4854 first_instruction = saved_first_instruction;
4856 return t;
4859 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
4860 as necessary to eliminate dependendencies. This function assumes that
4861 a final instruction scheduling pass has been run which has already
4862 inserted most of the necessary stop bits. This function only inserts
4863 new ones at basic block boundaries, since these are invisible to the
4864 scheduler. */
4866 static void
4867 emit_insn_group_barriers (dump, insns)
4868 FILE *dump;
4869 rtx insns;
4871 rtx insn;
4872 rtx last_label = 0;
4873 int insns_since_last_label = 0;
4875 init_insn_group_barriers ();
4877 for (insn = insns; insn; insn = NEXT_INSN (insn))
4879 if (GET_CODE (insn) == CODE_LABEL)
4881 if (insns_since_last_label)
4882 last_label = insn;
4883 insns_since_last_label = 0;
4885 else if (GET_CODE (insn) == NOTE
4886 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4888 if (insns_since_last_label)
4889 last_label = insn;
4890 insns_since_last_label = 0;
4892 else if (GET_CODE (insn) == INSN
4893 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4894 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
4896 init_insn_group_barriers ();
4897 last_label = 0;
4899 else if (INSN_P (insn))
4901 insns_since_last_label = 1;
4903 if (group_barrier_needed_p (insn))
4905 if (last_label)
4907 if (dump)
4908 fprintf (dump, "Emitting stop before label %d\n",
4909 INSN_UID (last_label));
4910 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
4911 insn = last_label;
4913 init_insn_group_barriers ();
4914 last_label = 0;
4921 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4922 This function has to emit all necessary group barriers. */
4924 static void
4925 emit_all_insn_group_barriers (dump, insns)
4926 FILE *dump ATTRIBUTE_UNUSED;
4927 rtx insns;
4929 rtx insn;
4931 init_insn_group_barriers ();
4933 for (insn = insns; insn; insn = NEXT_INSN (insn))
4935 if (GET_CODE (insn) == BARRIER)
4937 rtx last = prev_active_insn (insn);
4939 if (! last)
4940 continue;
4941 if (GET_CODE (last) == JUMP_INSN
4942 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
4943 last = prev_active_insn (last);
4944 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
4945 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
4947 init_insn_group_barriers ();
4949 else if (INSN_P (insn))
4951 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
4952 init_insn_group_barriers ();
4953 else if (group_barrier_needed_p (insn))
4955 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4956 init_insn_group_barriers ();
4957 group_barrier_needed_p (insn);
4963 static int errata_find_address_regs PARAMS ((rtx *, void *));
4964 static void errata_emit_nops PARAMS ((rtx));
4965 static void fixup_errata PARAMS ((void));
4967 /* This structure is used to track some details about the previous insns
4968 groups so we can determine if it may be necessary to insert NOPs to
4969 workaround hardware errata. */
4970 static struct group
4972 HARD_REG_SET p_reg_set;
4973 HARD_REG_SET gr_reg_conditionally_set;
4974 } last_group[2];
4976 /* Index into the last_group array. */
4977 static int group_idx;
4979 /* Called through for_each_rtx; determines if a hard register that was
4980 conditionally set in the previous group is used as an address register.
4981 It ensures that for_each_rtx returns 1 in that case. */
4982 static int
4983 errata_find_address_regs (xp, data)
4984 rtx *xp;
4985 void *data ATTRIBUTE_UNUSED;
4987 rtx x = *xp;
4988 if (GET_CODE (x) != MEM)
4989 return 0;
4990 x = XEXP (x, 0);
4991 if (GET_CODE (x) == POST_MODIFY)
4992 x = XEXP (x, 0);
4993 if (GET_CODE (x) == REG)
4995 struct group *prev_group = last_group + (group_idx ^ 1);
4996 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
4997 REGNO (x)))
4998 return 1;
4999 return -1;
5001 return 0;
5004 /* Called for each insn; this function keeps track of the state in
5005 last_group and emits additional NOPs if necessary to work around
5006 an Itanium A/B step erratum. */
5007 static void
5008 errata_emit_nops (insn)
5009 rtx insn;
5011 struct group *this_group = last_group + group_idx;
5012 struct group *prev_group = last_group + (group_idx ^ 1);
5013 rtx pat = PATTERN (insn);
5014 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5015 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5016 enum attr_type type;
5017 rtx set = real_pat;
5019 if (GET_CODE (real_pat) == USE
5020 || GET_CODE (real_pat) == CLOBBER
5021 || GET_CODE (real_pat) == ASM_INPUT
5022 || GET_CODE (real_pat) == ADDR_VEC
5023 || GET_CODE (real_pat) == ADDR_DIFF_VEC
5024 || asm_noperands (PATTERN (insn)) >= 0)
5025 return;
5027 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5028 parts of it. */
5030 if (GET_CODE (set) == PARALLEL)
5032 int i;
5033 set = XVECEXP (real_pat, 0, 0);
5034 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5035 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5036 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5038 set = 0;
5039 break;
5043 if (set && GET_CODE (set) != SET)
5044 set = 0;
5046 type = get_attr_type (insn);
5048 if (type == TYPE_F
5049 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5050 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5052 if ((type == TYPE_M || type == TYPE_A) && cond && set
5053 && REG_P (SET_DEST (set))
5054 && GET_CODE (SET_SRC (set)) != PLUS
5055 && GET_CODE (SET_SRC (set)) != MINUS
5056 && (GET_CODE (SET_SRC (set)) != ASHIFT
5057 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5058 && (GET_CODE (SET_SRC (set)) != MEM
5059 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5060 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5062 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5063 || ! REG_P (XEXP (cond, 0)))
5064 abort ();
5066 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5067 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5069 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5071 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5072 emit_insn_before (gen_nop (), insn);
5073 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5074 group_idx = 0;
5075 memset (last_group, 0, sizeof last_group);
5079 /* Emit extra nops if they are required to work around hardware errata. */
5081 static void
5082 fixup_errata ()
5084 rtx insn;
5086 if (! TARGET_B_STEP)
5087 return;
5089 group_idx = 0;
5090 memset (last_group, 0, sizeof last_group);
5092 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5094 if (!INSN_P (insn))
5095 continue;
5097 if (ia64_safe_type (insn) == TYPE_S)
5099 group_idx ^= 1;
5100 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5102 else
5103 errata_emit_nops (insn);
5107 /* Instruction scheduling support. */
5108 /* Describe one bundle. */
5110 struct bundle
5112 /* Zero if there's no possibility of a stop in this bundle other than
5113 at the end, otherwise the position of the optional stop bit. */
5114 int possible_stop;
5115 /* The types of the three slots. */
5116 enum attr_type t[3];
5117 /* The pseudo op to be emitted into the assembler output. */
5118 const char *name;
5121 #define NR_BUNDLES 10
5123 /* A list of all available bundles. */
5125 static const struct bundle bundle[NR_BUNDLES] =
5127 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
5128 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
5129 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
5130 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
5131 #if NR_BUNDLES == 10
5132 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
5133 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
5134 #endif
5135 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
5136 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
5137 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
5138 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
5139 it matches an L type insn. Otherwise we'll try to generate L type
5140 nops. */
5141 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
5144 /* Describe a packet of instructions. Packets consist of two bundles that
5145 are visible to the hardware in one scheduling window. */
5147 struct ia64_packet
5149 const struct bundle *t1, *t2;
5150 /* Precomputed value of the first split issue in this packet if a cycle
5151 starts at its beginning. */
5152 int first_split;
5153 /* For convenience, the insn types are replicated here so we don't have
5154 to go through T1 and T2 all the time. */
5155 enum attr_type t[6];
5158 /* An array containing all possible packets. */
5159 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
5160 static struct ia64_packet packets[NR_PACKETS];
5162 /* Map attr_type to a string with the name. */
5164 static const char *const type_names[] =
5166 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
5169 /* Nonzero if we should insert stop bits into the schedule. */
5170 int ia64_final_schedule = 0;
5172 static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
5173 static rtx ia64_single_set PARAMS ((rtx));
5174 static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
5175 static void ia64_emit_insn_before PARAMS ((rtx, rtx));
5176 static void maybe_rotate PARAMS ((FILE *));
5177 static void finish_last_head PARAMS ((FILE *, int));
5178 static void rotate_one_bundle PARAMS ((FILE *));
5179 static void rotate_two_bundles PARAMS ((FILE *));
5180 static void nop_cycles_until PARAMS ((int, FILE *));
5181 static void cycle_end_fill_slots PARAMS ((FILE *));
5182 static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
5183 static int get_split PARAMS ((const struct ia64_packet *, int));
5184 static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
5185 const struct ia64_packet *, int));
5186 static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
5187 rtx *, enum attr_type *, int));
5188 static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
5189 static void dump_current_packet PARAMS ((FILE *));
5190 static void schedule_stop PARAMS ((FILE *));
5191 static rtx gen_nop_type PARAMS ((enum attr_type));
5192 static void ia64_emit_nops PARAMS ((void));
5194 /* Map a bundle number to its pseudo-op. */
5196 const char *
5197 get_bundle_name (b)
5198 int b;
5200 return bundle[b].name;
5203 /* Compute the slot which will cause a split issue in packet P if the
5204 current cycle begins at slot BEGIN. */
5206 static int
5207 itanium_split_issue (p, begin)
5208 const struct ia64_packet *p;
5209 int begin;
5211 int type_count[TYPE_S];
5212 int i;
5213 int split = 6;
5215 if (begin < 3)
5217 /* Always split before and after MMF. */
5218 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
5219 return 3;
5220 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
5221 return 3;
5222 /* Always split after MBB and BBB. */
5223 if (p->t[1] == TYPE_B)
5224 return 3;
5225 /* Split after first bundle in MIB BBB combination. */
5226 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
5227 return 3;
5230 memset (type_count, 0, sizeof type_count);
5231 for (i = begin; i < split; i++)
5233 enum attr_type t0 = p->t[i];
5234 /* An MLX bundle reserves the same units as an MFI bundle. */
5235 enum attr_type t = (t0 == TYPE_L ? TYPE_F
5236 : t0 == TYPE_X ? TYPE_I
5237 : t0);
5239 /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and
5240 2 integer per cycle. */
5241 int max = (t == TYPE_B ? 3 : 2);
5242 if (type_count[t] == max)
5243 return i;
5245 type_count[t]++;
5247 return split;
5250 /* Return the maximum number of instructions a cpu can issue. */
5252 static int
5253 ia64_issue_rate ()
5255 return 6;
5258 /* Helper function - like single_set, but look inside COND_EXEC. */
5260 static rtx
5261 ia64_single_set (insn)
5262 rtx insn;
5264 rtx x = PATTERN (insn), ret;
5265 if (GET_CODE (x) == COND_EXEC)
5266 x = COND_EXEC_CODE (x);
5267 if (GET_CODE (x) == SET)
5268 return x;
5270 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5271 Although they are not classical single set, the second set is there just
5272 to protect it from moving past FP-relative stack accesses. */
5273 switch (recog_memoized (insn))
5275 case CODE_FOR_prologue_allocate_stack:
5276 case CODE_FOR_epilogue_deallocate_stack:
5277 ret = XVECEXP (x, 0, 0);
5278 break;
5280 default:
5281 ret = single_set_2 (insn, x);
5282 break;
5285 return ret;
5288 /* Adjust the cost of a scheduling dependency. Return the new cost of
5289 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5291 static int
5292 ia64_adjust_cost (insn, link, dep_insn, cost)
5293 rtx insn, link, dep_insn;
5294 int cost;
5296 enum attr_type dep_type;
5297 enum attr_itanium_class dep_class;
5298 enum attr_itanium_class insn_class;
5299 rtx dep_set, set, src, addr;
5301 if (GET_CODE (PATTERN (insn)) == CLOBBER
5302 || GET_CODE (PATTERN (insn)) == USE
5303 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5304 || GET_CODE (PATTERN (dep_insn)) == USE
5305 /* @@@ Not accurate for indirect calls. */
5306 || GET_CODE (insn) == CALL_INSN
5307 || ia64_safe_type (insn) == TYPE_S)
5308 return 0;
5310 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5311 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5312 return 0;
5314 dep_type = ia64_safe_type (dep_insn);
5315 dep_class = ia64_safe_itanium_class (dep_insn);
5316 insn_class = ia64_safe_itanium_class (insn);
5318 /* Compares that feed a conditional branch can execute in the same
5319 cycle. */
5320 dep_set = ia64_single_set (dep_insn);
5321 set = ia64_single_set (insn);
5323 if (dep_type != TYPE_F
5324 && dep_set
5325 && GET_CODE (SET_DEST (dep_set)) == REG
5326 && PR_REG (REGNO (SET_DEST (dep_set)))
5327 && GET_CODE (insn) == JUMP_INSN)
5328 return 0;
5330 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5332 /* ??? Can't find any information in the documenation about whether
5333 a sequence
5334 st [rx] = ra
5335 ld rb = [ry]
5336 splits issue. Assume it doesn't. */
5337 return 0;
5340 src = set ? SET_SRC (set) : 0;
5341 addr = 0;
5342 if (set)
5344 if (GET_CODE (SET_DEST (set)) == MEM)
5345 addr = XEXP (SET_DEST (set), 0);
5346 else if (GET_CODE (SET_DEST (set)) == SUBREG
5347 && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM)
5348 addr = XEXP (SUBREG_REG (SET_DEST (set)), 0);
5349 else
5351 addr = src;
5352 if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0)
5353 addr = XVECEXP (addr, 0, 0);
5354 while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND)
5355 addr = XEXP (addr, 0);
5356 if (GET_CODE (addr) == MEM)
5357 addr = XEXP (addr, 0);
5358 else
5359 addr = 0;
5363 if (addr && GET_CODE (addr) == POST_MODIFY)
5364 addr = XEXP (addr, 0);
5366 set = ia64_single_set (dep_insn);
5368 if ((dep_class == ITANIUM_CLASS_IALU
5369 || dep_class == ITANIUM_CLASS_ILOG
5370 || dep_class == ITANIUM_CLASS_LD)
5371 && (insn_class == ITANIUM_CLASS_LD
5372 || insn_class == ITANIUM_CLASS_ST))
5374 if (! addr || ! set)
5375 abort ();
5376 /* This isn't completely correct - an IALU that feeds an address has
5377 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5378 otherwise. Unfortunately there's no good way to describe this. */
5379 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5380 return cost + 1;
5383 if ((dep_class == ITANIUM_CLASS_IALU
5384 || dep_class == ITANIUM_CLASS_ILOG
5385 || dep_class == ITANIUM_CLASS_LD)
5386 && (insn_class == ITANIUM_CLASS_MMMUL
5387 || insn_class == ITANIUM_CLASS_MMSHF
5388 || insn_class == ITANIUM_CLASS_MMSHFI))
5389 return 3;
5391 if (dep_class == ITANIUM_CLASS_FMAC
5392 && (insn_class == ITANIUM_CLASS_FMISC
5393 || insn_class == ITANIUM_CLASS_FCVTFX
5394 || insn_class == ITANIUM_CLASS_XMPY))
5395 return 7;
5397 if ((dep_class == ITANIUM_CLASS_FMAC
5398 || dep_class == ITANIUM_CLASS_FMISC
5399 || dep_class == ITANIUM_CLASS_FCVTFX
5400 || dep_class == ITANIUM_CLASS_XMPY)
5401 && insn_class == ITANIUM_CLASS_STF)
5402 return 8;
5404 /* Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4,
5405 but HP engineers say any non-MM operation. */
5406 if ((dep_class == ITANIUM_CLASS_MMMUL
5407 || dep_class == ITANIUM_CLASS_MMSHF
5408 || dep_class == ITANIUM_CLASS_MMSHFI)
5409 && insn_class != ITANIUM_CLASS_MMMUL
5410 && insn_class != ITANIUM_CLASS_MMSHF
5411 && insn_class != ITANIUM_CLASS_MMSHFI)
5412 return 4;
5414 return cost;
5417 /* Describe the current state of the Itanium pipeline. */
5418 static struct
5420 /* The first slot that is used in the current cycle. */
5421 int first_slot;
5422 /* The next slot to fill. */
5423 int cur;
5424 /* The packet we have selected for the current issue window. */
5425 const struct ia64_packet *packet;
5426 /* The position of the split issue that occurs due to issue width
5427 limitations (6 if there's no split issue). */
5428 int split;
5429 /* Record data about the insns scheduled so far in the same issue
5430 window. The elements up to but not including FIRST_SLOT belong
5431 to the previous cycle, the ones starting with FIRST_SLOT belong
5432 to the current cycle. */
5433 enum attr_type types[6];
5434 rtx insns[6];
5435 int stopbit[6];
5436 /* Nonzero if we decided to schedule a stop bit. */
5437 int last_was_stop;
5438 } sched_data;
5440 /* Temporary arrays; they have enough elements to hold all insns that
5441 can be ready at the same time while scheduling of the current block.
5442 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5443 static rtx *sched_ready;
5444 static enum attr_type *sched_types;
5446 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5447 of packet P. */
5449 static int
5450 insn_matches_slot (p, itype, slot, insn)
5451 const struct ia64_packet *p;
5452 enum attr_type itype;
5453 int slot;
5454 rtx insn;
5456 enum attr_itanium_requires_unit0 u0;
5457 enum attr_type stype = p->t[slot];
5459 if (insn)
5461 u0 = ia64_safe_itanium_requires_unit0 (insn);
5462 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5464 int i;
5465 for (i = sched_data.first_slot; i < slot; i++)
5466 if (p->t[i] == stype
5467 || (stype == TYPE_F && p->t[i] == TYPE_L)
5468 || (stype == TYPE_I && p->t[i] == TYPE_X))
5469 return 0;
5471 if (GET_CODE (insn) == CALL_INSN)
5473 /* Reject calls in multiway branch packets. We want to limit
5474 the number of multiway branches we generate (since the branch
5475 predictor is limited), and this seems to work fairly well.
5476 (If we didn't do this, we'd have to add another test here to
5477 force calls into the third slot of the bundle.) */
5478 if (slot < 3)
5480 if (p->t[1] == TYPE_B)
5481 return 0;
5483 else
5485 if (p->t[4] == TYPE_B)
5486 return 0;
5491 if (itype == stype)
5492 return 1;
5493 if (itype == TYPE_A)
5494 return stype == TYPE_M || stype == TYPE_I;
5495 return 0;
5498 /* Like emit_insn_before, but skip cycle_display notes.
5499 ??? When cycle display notes are implemented, update this. */
5501 static void
5502 ia64_emit_insn_before (insn, before)
5503 rtx insn, before;
5505 emit_insn_before (insn, before);
5508 /* When rotating a bundle out of the issue window, insert a bundle selector
5509 insn in front of it. DUMP is the scheduling dump file or NULL. START
5510 is either 0 or 3, depending on whether we want to emit a bundle selector
5511 for the first bundle or the second bundle in the current issue window.
5513 The selector insns are emitted this late because the selected packet can
5514 be changed until parts of it get rotated out. */
5516 static void
5517 finish_last_head (dump, start)
5518 FILE *dump;
5519 int start;
5521 const struct ia64_packet *p = sched_data.packet;
5522 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5523 int bundle_type = b - bundle;
5524 rtx insn;
5525 int i;
5527 if (! ia64_final_schedule)
5528 return;
5530 for (i = start; sched_data.insns[i] == 0; i++)
5531 if (i == start + 3)
5532 abort ();
5533 insn = sched_data.insns[i];
5535 if (dump)
5536 fprintf (dump, "// Emitting template before %d: %s\n",
5537 INSN_UID (insn), b->name);
5539 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5542 /* We can't schedule more insns this cycle. Fix up the scheduling state
5543 and advance FIRST_SLOT and CUR.
5544 We have to distribute the insns that are currently found between
5545 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5546 far, they are stored successively in the fields starting at FIRST_SLOT;
5547 now they must be moved to the correct slots.
5548 DUMP is the current scheduling dump file, or NULL. */
5550 static void
5551 cycle_end_fill_slots (dump)
5552 FILE *dump;
5554 const struct ia64_packet *packet = sched_data.packet;
5555 int slot, i;
5556 enum attr_type tmp_types[6];
5557 rtx tmp_insns[6];
5559 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5560 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5562 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5564 enum attr_type t = tmp_types[i];
5565 if (t != ia64_safe_type (tmp_insns[i]))
5566 abort ();
5567 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5569 if (slot > sched_data.split)
5570 abort ();
5571 if (dump)
5572 fprintf (dump, "// Packet needs %s, have %s\n",
5573 type_names[packet->t[slot]], type_names[t]);
5574 sched_data.types[slot] = packet->t[slot];
5575 sched_data.insns[slot] = 0;
5576 sched_data.stopbit[slot] = 0;
5578 /* ??? TYPE_L instructions always fill up two slots, but we don't
5579 support TYPE_L nops. */
5580 if (packet->t[slot] == TYPE_L)
5581 abort ();
5583 slot++;
5586 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5587 actual slot type later. */
5588 sched_data.types[slot] = packet->t[slot];
5589 sched_data.insns[slot] = tmp_insns[i];
5590 sched_data.stopbit[slot] = 0;
5591 slot++;
5593 /* TYPE_L instructions always fill up two slots. */
5594 if (t == TYPE_L)
5596 sched_data.types[slot] = packet->t[slot];
5597 sched_data.insns[slot] = 0;
5598 sched_data.stopbit[slot] = 0;
5599 slot++;
5603 /* This isn't right - there's no need to pad out until the forced split;
5604 the CPU will automatically split if an insn isn't ready. */
5605 #if 0
5606 while (slot < sched_data.split)
5608 sched_data.types[slot] = packet->t[slot];
5609 sched_data.insns[slot] = 0;
5610 sched_data.stopbit[slot] = 0;
5611 slot++;
5613 #endif
5615 sched_data.first_slot = sched_data.cur = slot;
5618 /* Bundle rotations, as described in the Itanium optimization manual.
5619 We can rotate either one or both bundles out of the issue window.
5620 DUMP is the current scheduling dump file, or NULL. */
5622 static void
5623 rotate_one_bundle (dump)
5624 FILE *dump;
5626 if (dump)
5627 fprintf (dump, "// Rotating one bundle.\n");
5629 finish_last_head (dump, 0);
5630 if (sched_data.cur > 3)
5632 sched_data.cur -= 3;
5633 sched_data.first_slot -= 3;
5634 memmove (sched_data.types,
5635 sched_data.types + 3,
5636 sched_data.cur * sizeof *sched_data.types);
5637 memmove (sched_data.stopbit,
5638 sched_data.stopbit + 3,
5639 sched_data.cur * sizeof *sched_data.stopbit);
5640 memmove (sched_data.insns,
5641 sched_data.insns + 3,
5642 sched_data.cur * sizeof *sched_data.insns);
5643 sched_data.packet
5644 = &packets[(sched_data.packet->t2 - bundle) * NR_BUNDLES];
5646 else
5648 sched_data.cur = 0;
5649 sched_data.first_slot = 0;
5653 static void
5654 rotate_two_bundles (dump)
5655 FILE *dump;
5657 if (dump)
5658 fprintf (dump, "// Rotating two bundles.\n");
5660 if (sched_data.cur == 0)
5661 return;
5663 finish_last_head (dump, 0);
5664 if (sched_data.cur > 3)
5665 finish_last_head (dump, 3);
5666 sched_data.cur = 0;
5667 sched_data.first_slot = 0;
5670 /* We're beginning a new block. Initialize data structures as necessary. */
5672 static void
5673 ia64_sched_init (dump, sched_verbose, max_ready)
5674 FILE *dump ATTRIBUTE_UNUSED;
5675 int sched_verbose ATTRIBUTE_UNUSED;
5676 int max_ready;
5678 static int initialized = 0;
5680 if (! initialized)
5682 int b1, b2, i;
5684 initialized = 1;
5686 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
5688 const struct bundle *t1 = bundle + b1;
5689 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
5691 const struct bundle *t2 = bundle + b2;
5693 packets[i].t1 = t1;
5694 packets[i].t2 = t2;
5697 for (i = 0; i < NR_PACKETS; i++)
5699 int j;
5700 for (j = 0; j < 3; j++)
5701 packets[i].t[j] = packets[i].t1->t[j];
5702 for (j = 0; j < 3; j++)
5703 packets[i].t[j + 3] = packets[i].t2->t[j];
5704 packets[i].first_split = itanium_split_issue (packets + i, 0);
5709 init_insn_group_barriers ();
5711 memset (&sched_data, 0, sizeof sched_data);
5712 sched_types = (enum attr_type *) xmalloc (max_ready
5713 * sizeof (enum attr_type));
5714 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
5717 /* See if the packet P can match the insns we have already scheduled. Return
5718 nonzero if so. In *PSLOT, we store the first slot that is available for
5719 more instructions if we choose this packet.
5720 SPLIT holds the last slot we can use, there's a split issue after it so
5721 scheduling beyond it would cause us to use more than one cycle. */
5723 static int
5724 packet_matches_p (p, split, pslot)
5725 const struct ia64_packet *p;
5726 int split;
5727 int *pslot;
5729 int filled = sched_data.cur;
5730 int first = sched_data.first_slot;
5731 int i, slot;
5733 /* First, check if the first of the two bundles must be a specific one (due
5734 to stop bits). */
5735 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
5736 return 0;
5737 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
5738 return 0;
5740 for (i = 0; i < first; i++)
5741 if (! insn_matches_slot (p, sched_data.types[i], i,
5742 sched_data.insns[i]))
5743 return 0;
5744 for (i = slot = first; i < filled; i++)
5746 while (slot < split)
5748 if (insn_matches_slot (p, sched_data.types[i], slot,
5749 sched_data.insns[i]))
5750 break;
5751 slot++;
5753 if (slot == split)
5754 return 0;
5755 slot++;
5758 if (pslot)
5759 *pslot = slot;
5760 return 1;
5763 /* A frontend for itanium_split_issue. For a packet P and a slot
5764 number FIRST that describes the start of the current clock cycle,
5765 return the slot number of the first split issue. This function
5766 uses the cached number found in P if possible. */
5768 static int
5769 get_split (p, first)
5770 const struct ia64_packet *p;
5771 int first;
5773 if (first == 0)
5774 return p->first_split;
5775 return itanium_split_issue (p, first);
5778 /* Given N_READY insns in the array READY, whose types are found in the
5779 corresponding array TYPES, return the insn that is best suited to be
5780 scheduled in slot SLOT of packet P. */
5782 static int
5783 find_best_insn (ready, types, n_ready, p, slot)
5784 rtx *ready;
5785 enum attr_type *types;
5786 int n_ready;
5787 const struct ia64_packet *p;
5788 int slot;
5790 int best = -1;
5791 int best_pri = 0;
5792 while (n_ready-- > 0)
5794 rtx insn = ready[n_ready];
5795 if (! insn)
5796 continue;
5797 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
5798 break;
5799 /* If we have equally good insns, one of which has a stricter
5800 slot requirement, prefer the one with the stricter requirement. */
5801 if (best >= 0 && types[n_ready] == TYPE_A)
5802 continue;
5803 if (insn_matches_slot (p, types[n_ready], slot, insn))
5805 best = n_ready;
5806 best_pri = INSN_PRIORITY (ready[best]);
5808 /* If there's no way we could get a stricter requirement, stop
5809 looking now. */
5810 if (types[n_ready] != TYPE_A
5811 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
5812 break;
5813 break;
5816 return best;
5819 /* Select the best packet to use given the current scheduler state and the
5820 current ready list.
5821 READY is an array holding N_READY ready insns; TYPES is a corresponding
5822 array that holds their types. Store the best packet in *PPACKET and the
5823 number of insns that can be scheduled in the current cycle in *PBEST. */
5825 static void
5826 find_best_packet (pbest, ppacket, ready, types, n_ready)
5827 int *pbest;
5828 const struct ia64_packet **ppacket;
5829 rtx *ready;
5830 enum attr_type *types;
5831 int n_ready;
5833 int first = sched_data.first_slot;
5834 int best = 0;
5835 int lowest_end = 6;
5836 const struct ia64_packet *best_packet = NULL;
5837 int i;
5839 for (i = 0; i < NR_PACKETS; i++)
5841 const struct ia64_packet *p = packets + i;
5842 int slot;
5843 int split = get_split (p, first);
5844 int win = 0;
5845 int first_slot, last_slot;
5846 int b_nops = 0;
5848 if (! packet_matches_p (p, split, &first_slot))
5849 continue;
5851 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
5853 win = 0;
5854 last_slot = 6;
5855 for (slot = first_slot; slot < split; slot++)
5857 int insn_nr;
5859 /* Disallow a degenerate case where the first bundle doesn't
5860 contain anything but NOPs! */
5861 if (first_slot == 0 && win == 0 && slot == 3)
5863 win = -1;
5864 break;
5867 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
5868 if (insn_nr >= 0)
5870 sched_ready[insn_nr] = 0;
5871 last_slot = slot;
5872 win++;
5874 else if (p->t[slot] == TYPE_B)
5875 b_nops++;
5877 /* We must disallow MBB/BBB packets if any of their B slots would be
5878 filled with nops. */
5879 if (last_slot < 3)
5881 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
5882 win = -1;
5884 else
5886 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
5887 win = -1;
5890 if (win > best
5891 || (win == best && last_slot < lowest_end))
5893 best = win;
5894 lowest_end = last_slot;
5895 best_packet = p;
5898 *pbest = best;
5899 *ppacket = best_packet;
5902 /* Reorder the ready list so that the insns that can be issued in this cycle
5903 are found in the correct order at the end of the list.
5904 DUMP is the scheduling dump file, or NULL. READY points to the start,
5905 E_READY to the end of the ready list. MAY_FAIL determines what should be
5906 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5907 otherwise we return 0.
5908 Return 1 if any insns can be scheduled in this cycle. */
5910 static int
5911 itanium_reorder (dump, ready, e_ready, may_fail)
5912 FILE *dump;
5913 rtx *ready;
5914 rtx *e_ready;
5915 int may_fail;
5917 const struct ia64_packet *best_packet;
5918 int n_ready = e_ready - ready;
5919 int first = sched_data.first_slot;
5920 int i, best, best_split, filled;
5922 for (i = 0; i < n_ready; i++)
5923 sched_types[i] = ia64_safe_type (ready[i]);
5925 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
5927 if (best == 0)
5929 if (may_fail)
5930 return 0;
5931 abort ();
5934 if (dump)
5936 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
5937 best_packet->t1->name,
5938 best_packet->t2 ? best_packet->t2->name : NULL, best);
5941 best_split = itanium_split_issue (best_packet, first);
5942 packet_matches_p (best_packet, best_split, &filled);
5944 for (i = filled; i < best_split; i++)
5946 int insn_nr;
5948 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
5949 if (insn_nr >= 0)
5951 rtx insn = ready[insn_nr];
5952 memmove (ready + insn_nr, ready + insn_nr + 1,
5953 (n_ready - insn_nr - 1) * sizeof (rtx));
5954 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
5955 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
5956 ready[--n_ready] = insn;
5960 sched_data.packet = best_packet;
5961 sched_data.split = best_split;
5962 return 1;
5965 /* Dump information about the current scheduling state to file DUMP. */
5967 static void
5968 dump_current_packet (dump)
5969 FILE *dump;
5971 int i;
5972 fprintf (dump, "// %d slots filled:", sched_data.cur);
5973 for (i = 0; i < sched_data.first_slot; i++)
5975 rtx insn = sched_data.insns[i];
5976 fprintf (dump, " %s", type_names[sched_data.types[i]]);
5977 if (insn)
5978 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
5979 if (sched_data.stopbit[i])
5980 fprintf (dump, " ;;");
5982 fprintf (dump, " :::");
5983 for (i = sched_data.first_slot; i < sched_data.cur; i++)
5985 rtx insn = sched_data.insns[i];
5986 enum attr_type t = ia64_safe_type (insn);
5987 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
5989 fprintf (dump, "\n");
5992 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
5993 NULL. */
5995 static void
5996 schedule_stop (dump)
5997 FILE *dump;
5999 const struct ia64_packet *best = sched_data.packet;
6000 int i;
6001 int best_stop = 6;
6003 if (dump)
6004 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
6006 if (sched_data.cur == 0)
6008 if (dump)
6009 fprintf (dump, "// At start of bundle, so nothing to do.\n");
6011 rotate_two_bundles (NULL);
6012 return;
6015 for (i = -1; i < NR_PACKETS; i++)
6017 /* This is a slight hack to give the current packet the first chance.
6018 This is done to avoid e.g. switching from MIB to MBB bundles. */
6019 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
6020 int split = get_split (p, sched_data.first_slot);
6021 const struct bundle *compare;
6022 int next, stoppos;
6024 if (! packet_matches_p (p, split, &next))
6025 continue;
6027 compare = next > 3 ? p->t2 : p->t1;
6029 stoppos = 3;
6030 if (compare->possible_stop)
6031 stoppos = compare->possible_stop;
6032 if (next > 3)
6033 stoppos += 3;
6035 if (stoppos < next || stoppos >= best_stop)
6037 if (compare->possible_stop == 0)
6038 continue;
6039 stoppos = (next > 3 ? 6 : 3);
6041 if (stoppos < next || stoppos >= best_stop)
6042 continue;
6044 if (dump)
6045 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
6046 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
6047 stoppos);
6049 best_stop = stoppos;
6050 best = p;
6053 sched_data.packet = best;
6054 cycle_end_fill_slots (dump);
6055 while (sched_data.cur < best_stop)
6057 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
6058 sched_data.insns[sched_data.cur] = 0;
6059 sched_data.stopbit[sched_data.cur] = 0;
6060 sched_data.cur++;
6062 sched_data.stopbit[sched_data.cur - 1] = 1;
6063 sched_data.first_slot = best_stop;
6065 if (dump)
6066 dump_current_packet (dump);
6069 /* If necessary, perform one or two rotations on the scheduling state.
6070 This should only be called if we are starting a new cycle. */
6072 static void
6073 maybe_rotate (dump)
6074 FILE *dump;
6076 cycle_end_fill_slots (dump);
6077 if (sched_data.cur == 6)
6078 rotate_two_bundles (dump);
6079 else if (sched_data.cur >= 3)
6080 rotate_one_bundle (dump);
6081 sched_data.first_slot = sched_data.cur;
6084 /* The clock cycle when ia64_sched_reorder was last called. */
6085 static int prev_cycle;
6087 /* The first insn scheduled in the previous cycle. This is the saved
6088 value of sched_data.first_slot. */
6089 static int prev_first;
6091 /* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
6092 pad out the delay between MM (shifts, etc.) and integer operations. */
6094 static void
6095 nop_cycles_until (clock_var, dump)
6096 int clock_var;
6097 FILE *dump;
6099 int prev_clock = prev_cycle;
6100 int cycles_left = clock_var - prev_clock;
6101 bool did_stop = false;
6103 /* Finish the previous cycle; pad it out with NOPs. */
6104 if (sched_data.cur == 3)
6106 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6107 did_stop = true;
6108 maybe_rotate (dump);
6110 else if (sched_data.cur > 0)
6112 int need_stop = 0;
6113 int split = itanium_split_issue (sched_data.packet, prev_first);
6115 if (sched_data.cur < 3 && split > 3)
6117 split = 3;
6118 need_stop = 1;
6121 if (split > sched_data.cur)
6123 int i;
6124 for (i = sched_data.cur; i < split; i++)
6126 rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
6127 sched_data.types[i] = sched_data.packet->t[i];
6128 sched_data.insns[i] = t;
6129 sched_data.stopbit[i] = 0;
6131 sched_data.cur = split;
6134 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
6135 && cycles_left > 1)
6137 int i;
6138 for (i = sched_data.cur; i < 6; i++)
6140 rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
6141 sched_data.types[i] = sched_data.packet->t[i];
6142 sched_data.insns[i] = t;
6143 sched_data.stopbit[i] = 0;
6145 sched_data.cur = 6;
6146 cycles_left--;
6147 need_stop = 1;
6150 if (need_stop || sched_data.cur == 6)
6152 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6153 did_stop = true;
6155 maybe_rotate (dump);
6158 cycles_left--;
6159 while (cycles_left > 0)
6161 sched_emit_insn (gen_bundle_selector (GEN_INT (0)));
6162 sched_emit_insn (gen_nop_type (TYPE_M));
6163 sched_emit_insn (gen_nop_type (TYPE_I));
6164 if (cycles_left > 1)
6166 sched_emit_insn (gen_insn_group_barrier (GEN_INT (2)));
6167 cycles_left--;
6169 sched_emit_insn (gen_nop_type (TYPE_I));
6170 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6171 did_stop = true;
6172 cycles_left--;
6175 if (did_stop)
6176 init_insn_group_barriers ();
6179 /* We are about to being issuing insns for this clock cycle.
6180 Override the default sort algorithm to better slot instructions. */
6182 static int
6183 ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready,
6184 reorder_type, clock_var)
6185 FILE *dump ATTRIBUTE_UNUSED;
6186 int sched_verbose ATTRIBUTE_UNUSED;
6187 rtx *ready;
6188 int *pn_ready;
6189 int reorder_type, clock_var;
6191 int n_asms;
6192 int n_ready = *pn_ready;
6193 rtx *e_ready = ready + n_ready;
6194 rtx *insnp;
6196 if (sched_verbose)
6198 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
6199 dump_current_packet (dump);
6202 /* Work around the pipeline flush that will occurr if the results of
6203 an MM instruction are accessed before the result is ready. Intel
6204 documentation says this only happens with IALU, ISHF, ILOG, LD,
6205 and ST consumers, but experimental evidence shows that *any* non-MM
6206 type instruction will incurr the flush. */
6207 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
6209 for (insnp = ready; insnp < e_ready; insnp++)
6211 rtx insn = *insnp, link;
6212 enum attr_itanium_class t = ia64_safe_itanium_class (insn);
6214 if (t == ITANIUM_CLASS_MMMUL
6215 || t == ITANIUM_CLASS_MMSHF
6216 || t == ITANIUM_CLASS_MMSHFI)
6217 continue;
6219 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6220 if (REG_NOTE_KIND (link) == 0)
6222 rtx other = XEXP (link, 0);
6223 enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
6224 if (t0 == ITANIUM_CLASS_MMSHF || t0 == ITANIUM_CLASS_MMMUL)
6226 nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
6227 goto out;
6232 out:
6234 prev_first = sched_data.first_slot;
6235 prev_cycle = clock_var;
6237 if (reorder_type == 0)
6238 maybe_rotate (sched_verbose ? dump : NULL);
6240 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6241 n_asms = 0;
6242 for (insnp = ready; insnp < e_ready; insnp++)
6243 if (insnp < e_ready)
6245 rtx insn = *insnp;
6246 enum attr_type t = ia64_safe_type (insn);
6247 if (t == TYPE_UNKNOWN)
6249 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6250 || asm_noperands (PATTERN (insn)) >= 0)
6252 rtx lowest = ready[n_asms];
6253 ready[n_asms] = insn;
6254 *insnp = lowest;
6255 n_asms++;
6257 else
6259 rtx highest = ready[n_ready - 1];
6260 ready[n_ready - 1] = insn;
6261 *insnp = highest;
6262 if (ia64_final_schedule && group_barrier_needed_p (insn))
6264 schedule_stop (sched_verbose ? dump : NULL);
6265 sched_data.last_was_stop = 1;
6266 maybe_rotate (sched_verbose ? dump : NULL);
6269 return 1;
6273 if (n_asms < n_ready)
6275 /* Some normal insns to process. Skip the asms. */
6276 ready += n_asms;
6277 n_ready -= n_asms;
6279 else if (n_ready > 0)
6281 /* Only asm insns left. */
6282 if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1]))
6284 schedule_stop (sched_verbose ? dump : NULL);
6285 sched_data.last_was_stop = 1;
6286 maybe_rotate (sched_verbose ? dump : NULL);
6288 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6289 return 1;
6292 if (ia64_final_schedule)
6294 int nr_need_stop = 0;
6296 for (insnp = ready; insnp < e_ready; insnp++)
6297 if (safe_group_barrier_needed_p (*insnp))
6298 nr_need_stop++;
6300 /* Schedule a stop bit if
6301 - all insns require a stop bit, or
6302 - we are starting a new cycle and _any_ insns require a stop bit.
6303 The reason for the latter is that if our schedule is accurate, then
6304 the additional stop won't decrease performance at this point (since
6305 there's a split issue at this point anyway), but it gives us more
6306 freedom when scheduling the currently ready insns. */
6307 if ((reorder_type == 0 && nr_need_stop)
6308 || (reorder_type == 1 && n_ready == nr_need_stop))
6310 schedule_stop (sched_verbose ? dump : NULL);
6311 sched_data.last_was_stop = 1;
6312 maybe_rotate (sched_verbose ? dump : NULL);
6313 if (reorder_type == 1)
6314 return 0;
6316 else
6318 int deleted = 0;
6319 insnp = e_ready;
6320 /* Move down everything that needs a stop bit, preserving relative
6321 order. */
6322 while (insnp-- > ready + deleted)
6323 while (insnp >= ready + deleted)
6325 rtx insn = *insnp;
6326 if (! safe_group_barrier_needed_p (insn))
6327 break;
6328 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6329 *ready = insn;
6330 deleted++;
6332 n_ready -= deleted;
6333 ready += deleted;
6334 if (deleted != nr_need_stop)
6335 abort ();
6339 return itanium_reorder (sched_verbose ? dump : NULL,
6340 ready, e_ready, reorder_type == 1);
6343 static int
6344 ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
6345 FILE *dump;
6346 int sched_verbose;
6347 rtx *ready;
6348 int *pn_ready;
6349 int clock_var;
6351 return ia64_internal_sched_reorder (dump, sched_verbose, ready,
6352 pn_ready, 0, clock_var);
6355 /* Like ia64_sched_reorder, but called after issuing each insn.
6356 Override the default sort algorithm to better slot instructions. */
6358 static int
6359 ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6360 FILE *dump ATTRIBUTE_UNUSED;
6361 int sched_verbose ATTRIBUTE_UNUSED;
6362 rtx *ready;
6363 int *pn_ready;
6364 int clock_var;
6366 if (sched_data.last_was_stop)
6367 return 0;
6369 /* Detect one special case and try to optimize it.
6370 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6371 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6372 if (sched_data.first_slot == 1
6373 && sched_data.stopbit[0]
6374 && ((sched_data.cur == 4
6375 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6376 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6377 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6378 || (sched_data.cur == 3
6379 && (sched_data.types[1] == TYPE_M
6380 || sched_data.types[1] == TYPE_A)
6381 && (sched_data.types[2] != TYPE_M
6382 && sched_data.types[2] != TYPE_I
6383 && sched_data.types[2] != TYPE_A))))
6386 int i, best;
6387 rtx stop = sched_data.insns[1];
6389 /* Search backward for the stop bit that must be there. */
6390 while (1)
6392 int insn_code;
6394 stop = PREV_INSN (stop);
6395 if (GET_CODE (stop) != INSN)
6396 abort ();
6397 insn_code = recog_memoized (stop);
6399 /* Ignore .pred.rel.mutex.
6401 ??? Update this to ignore cycle display notes too
6402 ??? once those are implemented */
6403 if (insn_code == CODE_FOR_pred_rel_mutex
6404 || insn_code == CODE_FOR_prologue_use)
6405 continue;
6407 if (insn_code == CODE_FOR_insn_group_barrier)
6408 break;
6409 abort ();
6412 /* Adjust the stop bit's slot selector. */
6413 if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1)
6414 abort ();
6415 XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3);
6417 sched_data.stopbit[0] = 0;
6418 sched_data.stopbit[2] = 1;
6420 sched_data.types[5] = sched_data.types[3];
6421 sched_data.types[4] = sched_data.types[2];
6422 sched_data.types[3] = sched_data.types[1];
6423 sched_data.insns[5] = sched_data.insns[3];
6424 sched_data.insns[4] = sched_data.insns[2];
6425 sched_data.insns[3] = sched_data.insns[1];
6426 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6427 sched_data.cur += 2;
6428 sched_data.first_slot = 3;
6429 for (i = 0; i < NR_PACKETS; i++)
6431 const struct ia64_packet *p = packets + i;
6432 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6434 sched_data.packet = p;
6435 break;
6438 rotate_one_bundle (sched_verbose ? dump : NULL);
6440 best = 6;
6441 for (i = 0; i < NR_PACKETS; i++)
6443 const struct ia64_packet *p = packets + i;
6444 int split = get_split (p, sched_data.first_slot);
6445 int next;
6447 /* Disallow multiway branches here. */
6448 if (p->t[1] == TYPE_B)
6449 continue;
6451 if (packet_matches_p (p, split, &next) && next < best)
6453 best = next;
6454 sched_data.packet = p;
6455 sched_data.split = split;
6458 if (best == 6)
6459 abort ();
6462 if (*pn_ready > 0)
6464 int more = ia64_internal_sched_reorder (dump, sched_verbose,
6465 ready, pn_ready, 1,
6466 clock_var);
6467 if (more)
6468 return more;
6469 /* Did we schedule a stop? If so, finish this cycle. */
6470 if (sched_data.cur == sched_data.first_slot)
6471 return 0;
6474 if (sched_verbose)
6475 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
6477 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6478 if (sched_verbose)
6479 dump_current_packet (dump);
6480 return 0;
6483 /* We are about to issue INSN. Return the number of insns left on the
6484 ready queue that can be issued this cycle. */
6486 static int
6487 ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6488 FILE *dump;
6489 int sched_verbose;
6490 rtx insn;
6491 int can_issue_more ATTRIBUTE_UNUSED;
6493 enum attr_type t = ia64_safe_type (insn);
6495 if (sched_data.last_was_stop)
6497 int t = sched_data.first_slot;
6498 if (t == 0)
6499 t = 3;
6500 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6501 init_insn_group_barriers ();
6502 sched_data.last_was_stop = 0;
6505 if (t == TYPE_UNKNOWN)
6507 if (sched_verbose)
6508 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
6509 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6510 || asm_noperands (PATTERN (insn)) >= 0)
6512 /* This must be some kind of asm. Clear the scheduling state. */
6513 rotate_two_bundles (sched_verbose ? dump : NULL);
6514 if (ia64_final_schedule)
6515 group_barrier_needed_p (insn);
6517 return 1;
6520 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6521 important state info. Don't delete this test. */
6522 if (ia64_final_schedule
6523 && group_barrier_needed_p (insn))
6524 abort ();
6526 sched_data.stopbit[sched_data.cur] = 0;
6527 sched_data.insns[sched_data.cur] = insn;
6528 sched_data.types[sched_data.cur] = t;
6530 sched_data.cur++;
6531 if (sched_verbose)
6532 fprintf (dump, "// Scheduling insn %d of type %s\n",
6533 INSN_UID (insn), type_names[t]);
6535 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6537 schedule_stop (sched_verbose ? dump : NULL);
6538 sched_data.last_was_stop = 1;
6541 return 1;
6544 /* Free data allocated by ia64_sched_init. */
6546 static void
6547 ia64_sched_finish (dump, sched_verbose)
6548 FILE *dump;
6549 int sched_verbose;
6551 if (sched_verbose)
6552 fprintf (dump, "// Finishing schedule.\n");
6553 rotate_two_bundles (NULL);
6554 free (sched_types);
6555 free (sched_ready);
6558 /* Emit pseudo-ops for the assembler to describe predicate relations.
6559 At present this assumes that we only consider predicate pairs to
6560 be mutex, and that the assembler can deduce proper values from
6561 straight-line code. */
6563 static void
6564 emit_predicate_relation_info ()
6566 int i;
6568 for (i = n_basic_blocks - 1; i >= 0; --i)
6570 basic_block bb = BASIC_BLOCK (i);
6571 int r;
6572 rtx head = bb->head;
6574 /* We only need such notes at code labels. */
6575 if (GET_CODE (head) != CODE_LABEL)
6576 continue;
6577 if (GET_CODE (NEXT_INSN (head)) == NOTE
6578 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6579 head = NEXT_INSN (head);
6581 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6582 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6584 rtx p = gen_rtx_REG (BImode, r);
6585 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6586 if (head == bb->end)
6587 bb->end = n;
6588 head = n;
6592 /* Look for conditional calls that do not return, and protect predicate
6593 relations around them. Otherwise the assembler will assume the call
6594 returns, and complain about uses of call-clobbered predicates after
6595 the call. */
6596 for (i = n_basic_blocks - 1; i >= 0; --i)
6598 basic_block bb = BASIC_BLOCK (i);
6599 rtx insn = bb->head;
6601 while (1)
6603 if (GET_CODE (insn) == CALL_INSN
6604 && GET_CODE (PATTERN (insn)) == COND_EXEC
6605 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6607 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6608 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6609 if (bb->head == insn)
6610 bb->head = b;
6611 if (bb->end == insn)
6612 bb->end = a;
6615 if (insn == bb->end)
6616 break;
6617 insn = NEXT_INSN (insn);
6622 /* Generate a NOP instruction of type T. We will never generate L type
6623 nops. */
6625 static rtx
6626 gen_nop_type (t)
6627 enum attr_type t;
6629 switch (t)
6631 case TYPE_M:
6632 return gen_nop_m ();
6633 case TYPE_I:
6634 return gen_nop_i ();
6635 case TYPE_B:
6636 return gen_nop_b ();
6637 case TYPE_F:
6638 return gen_nop_f ();
6639 case TYPE_X:
6640 return gen_nop_x ();
6641 default:
6642 abort ();
6646 /* After the last scheduling pass, fill in NOPs. It's easier to do this
6647 here than while scheduling. */
6649 static void
6650 ia64_emit_nops ()
6652 rtx insn;
6653 const struct bundle *b = 0;
6654 int bundle_pos = 0;
6656 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6658 rtx pat;
6659 enum attr_type t;
6660 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
6661 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
6662 continue;
6663 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == UNSPEC_BUNDLE_SELECTOR)
6664 || GET_CODE (insn) == CODE_LABEL)
6666 if (b)
6667 while (bundle_pos < 3)
6669 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6670 bundle_pos++;
6672 if (GET_CODE (insn) != CODE_LABEL)
6673 b = bundle + INTVAL (XVECEXP (pat, 0, 0));
6674 else
6675 b = 0;
6676 bundle_pos = 0;
6677 continue;
6679 else if (GET_CODE (pat) == UNSPEC_VOLATILE
6680 && XINT (pat, 1) == UNSPECV_INSN_GROUP_BARRIER)
6682 int t = INTVAL (XVECEXP (pat, 0, 0));
6683 if (b)
6684 while (bundle_pos < t)
6686 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6687 bundle_pos++;
6689 continue;
6692 if (bundle_pos == 3)
6693 b = 0;
6695 if (b && INSN_P (insn))
6697 t = ia64_safe_type (insn);
6698 if (asm_noperands (PATTERN (insn)) >= 0
6699 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
6701 while (bundle_pos < 3)
6703 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6704 bundle_pos++;
6706 continue;
6709 if (t == TYPE_UNKNOWN)
6710 continue;
6711 while (bundle_pos < 3)
6713 if (t == b->t[bundle_pos]
6714 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
6715 || b->t[bundle_pos] == TYPE_I)))
6716 break;
6718 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6719 bundle_pos++;
6721 if (bundle_pos < 3)
6722 bundle_pos++;
6727 /* Perform machine dependent operations on the rtl chain INSNS. */
6729 void
6730 ia64_reorg (insns)
6731 rtx insns;
6733 /* We are freeing block_for_insn in the toplev to keep compatibility
6734 with old MDEP_REORGS that are not CFG based. Recompute it now. */
6735 compute_bb_for_insn (get_max_uid ());
6737 /* If optimizing, we'll have split before scheduling. */
6738 if (optimize == 0)
6739 split_all_insns (0);
6741 update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES,
6742 PROP_DEATH_NOTES);
6744 if (ia64_flag_schedule_insns2)
6746 timevar_push (TV_SCHED2);
6747 ia64_final_schedule = 1;
6748 schedule_ebbs (rtl_dump_file);
6749 ia64_final_schedule = 0;
6750 timevar_pop (TV_SCHED2);
6752 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6753 place as they were during scheduling. */
6754 emit_insn_group_barriers (rtl_dump_file, insns);
6755 ia64_emit_nops ();
6757 else
6758 emit_all_insn_group_barriers (rtl_dump_file, insns);
6760 /* A call must not be the last instruction in a function, so that the
6761 return address is still within the function, so that unwinding works
6762 properly. Note that IA-64 differs from dwarf2 on this point. */
6763 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
6765 rtx insn;
6766 int saw_stop = 0;
6768 insn = get_last_insn ();
6769 if (! INSN_P (insn))
6770 insn = prev_active_insn (insn);
6771 if (GET_CODE (insn) == INSN
6772 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6773 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6775 saw_stop = 1;
6776 insn = prev_active_insn (insn);
6778 if (GET_CODE (insn) == CALL_INSN)
6780 if (! saw_stop)
6781 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6782 emit_insn (gen_break_f ());
6783 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6787 fixup_errata ();
6788 emit_predicate_relation_info ();
6791 /* Return true if REGNO is used by the epilogue. */
6794 ia64_epilogue_uses (regno)
6795 int regno;
6797 switch (regno)
6799 case R_GR (1):
6800 /* When a function makes a call through a function descriptor, we
6801 will write a (potentially) new value to "gp". After returning
6802 from such a call, we need to make sure the function restores the
6803 original gp-value, even if the function itself does not use the
6804 gp anymore. */
6805 return (TARGET_CONST_GP && !(TARGET_AUTO_PIC || TARGET_NO_PIC));
6807 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
6808 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
6809 /* For functions defined with the syscall_linkage attribute, all
6810 input registers are marked as live at all function exits. This
6811 prevents the register allocator from using the input registers,
6812 which in turn makes it possible to restart a system call after
6813 an interrupt without having to save/restore the input registers.
6814 This also prevents kernel data from leaking to application code. */
6815 return lookup_attribute ("syscall_linkage",
6816 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
6818 case R_BR (0):
6819 /* Conditional return patterns can't represent the use of `b0' as
6820 the return address, so we force the value live this way. */
6821 return 1;
6823 case AR_PFS_REGNUM:
6824 /* Likewise for ar.pfs, which is used by br.ret. */
6825 return 1;
6827 default:
6828 return 0;
6832 /* Return true if REGNO is used by the frame unwinder. */
6835 ia64_eh_uses (regno)
6836 int regno;
6838 if (! reload_completed)
6839 return 0;
6841 if (current_frame_info.reg_save_b0
6842 && regno == current_frame_info.reg_save_b0)
6843 return 1;
6844 if (current_frame_info.reg_save_pr
6845 && regno == current_frame_info.reg_save_pr)
6846 return 1;
6847 if (current_frame_info.reg_save_ar_pfs
6848 && regno == current_frame_info.reg_save_ar_pfs)
6849 return 1;
6850 if (current_frame_info.reg_save_ar_unat
6851 && regno == current_frame_info.reg_save_ar_unat)
6852 return 1;
6853 if (current_frame_info.reg_save_ar_lc
6854 && regno == current_frame_info.reg_save_ar_lc)
6855 return 1;
6857 return 0;
6860 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6862 We add @ to the name if this goes in small data/bss. We can only put
6863 a variable in small data/bss if it is defined in this module or a module
6864 that we are statically linked with. We can't check the second condition,
6865 but TREE_STATIC gives us the first one. */
6867 /* ??? If we had IPA, we could check the second condition. We could support
6868 programmer added section attributes if the variable is not defined in this
6869 module. */
6871 /* ??? See the v850 port for a cleaner way to do this. */
6873 /* ??? We could also support own long data here. Generating movl/add/ld8
6874 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6875 code faster because there is one less load. This also includes incomplete
6876 types which can't go in sdata/sbss. */
6878 static bool
6879 ia64_in_small_data_p (exp)
6880 tree exp;
6882 if (TARGET_NO_SDATA)
6883 return false;
6885 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
6887 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
6888 if (strcmp (section, ".sdata") == 0
6889 || strcmp (section, ".sbss") == 0)
6890 return true;
6892 else
6894 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
6896 /* If this is an incomplete type with size 0, then we can't put it
6897 in sdata because it might be too big when completed. */
6898 if (size > 0 && size <= ia64_section_threshold)
6899 return true;
6902 return false;
6905 static void
6906 ia64_encode_section_info (decl, first)
6907 tree decl;
6908 int first ATTRIBUTE_UNUSED;
6910 const char *symbol_str;
6911 bool is_local, is_small;
6912 rtx symbol;
6914 if (TREE_CODE (decl) == FUNCTION_DECL)
6916 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
6917 return;
6920 /* Careful not to prod global register variables. */
6921 if (TREE_CODE (decl) != VAR_DECL
6922 || GET_CODE (DECL_RTL (decl)) != MEM
6923 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
6924 return;
6926 symbol = XEXP (DECL_RTL (decl), 0);
6927 symbol_str = XSTR (symbol, 0);
6929 /* A variable is considered "local" if it is defined by this module. */
6931 if (MODULE_LOCAL_P (decl))
6932 is_local = true;
6933 /* Otherwise, variables defined outside this object may not be local. */
6934 else if (DECL_EXTERNAL (decl))
6935 is_local = false;
6936 /* Linkonce and weak data are never local. */
6937 else if (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
6938 is_local = false;
6939 /* Static variables are always local. */
6940 else if (! TREE_PUBLIC (decl))
6941 is_local = true;
6942 /* If PIC, then assume that any global name can be overridden by
6943 symbols resolved from other modules. */
6944 else if (flag_pic)
6945 is_local = false;
6946 /* Uninitialized COMMON variable may be unified with symbols
6947 resolved from other modules. */
6948 else if (DECL_COMMON (decl)
6949 && (DECL_INITIAL (decl) == NULL
6950 || DECL_INITIAL (decl) == error_mark_node))
6951 is_local = false;
6952 /* Otherwise we're left with initialized (or non-common) global data
6953 which is of necessity defined locally. */
6954 else
6955 is_local = true;
6957 /* Determine if DECL will wind up in .sdata/.sbss. */
6958 is_small = ia64_in_small_data_p (decl);
6960 /* Finally, encode this into the symbol string. */
6961 if (is_local && is_small)
6963 char *newstr;
6964 size_t len;
6966 if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
6967 return;
6969 len = strlen (symbol_str) + 1;
6970 newstr = alloca (len + 1);
6971 newstr[0] = SDATA_NAME_FLAG_CHAR;
6972 memcpy (newstr + 1, symbol_str, len);
6974 XSTR (symbol, 0) = ggc_alloc_string (newstr, len);
6977 /* This decl is marked as being in small data/bss but it shouldn't
6978 be; one likely explanation for this is that the decl has been
6979 moved into a different section from the one it was in when
6980 targetm.encode_section_info was first called. Remove the '@'. */
6981 else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
6982 XSTR (symbol, 0) = ggc_strdup (symbol_str + 1);
6985 /* Output assembly directives for prologue regions. */
6987 /* The current basic block number. */
6989 static int block_num;
6991 /* True if we need a copy_state command at the start of the next block. */
6993 static int need_copy_state;
6995 /* The function emits unwind directives for the start of an epilogue. */
6997 static void
6998 process_epilogue ()
7000 /* If this isn't the last block of the function, then we need to label the
7001 current state, and copy it back in at the start of the next block. */
7003 if (block_num != n_basic_blocks - 1)
7005 fprintf (asm_out_file, "\t.label_state 1\n");
7006 need_copy_state = 1;
7009 fprintf (asm_out_file, "\t.restore sp\n");
7012 /* This function processes a SET pattern looking for specific patterns
7013 which result in emitting an assembly directive required for unwinding. */
7015 static int
7016 process_set (asm_out_file, pat)
7017 FILE *asm_out_file;
7018 rtx pat;
7020 rtx src = SET_SRC (pat);
7021 rtx dest = SET_DEST (pat);
7022 int src_regno, dest_regno;
7024 /* Look for the ALLOC insn. */
7025 if (GET_CODE (src) == UNSPEC_VOLATILE
7026 && XINT (src, 1) == UNSPECV_ALLOC
7027 && GET_CODE (dest) == REG)
7029 dest_regno = REGNO (dest);
7031 /* If this isn't the final destination for ar.pfs, the alloc
7032 shouldn't have been marked frame related. */
7033 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7034 abort ();
7036 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7037 ia64_dbx_register_number (dest_regno));
7038 return 1;
7041 /* Look for SP = .... */
7042 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7044 if (GET_CODE (src) == PLUS)
7046 rtx op0 = XEXP (src, 0);
7047 rtx op1 = XEXP (src, 1);
7048 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7050 if (INTVAL (op1) < 0)
7052 fputs ("\t.fframe ", asm_out_file);
7053 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
7054 -INTVAL (op1));
7055 fputc ('\n', asm_out_file);
7057 else
7058 process_epilogue ();
7060 else
7061 abort ();
7063 else if (GET_CODE (src) == REG
7064 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7065 process_epilogue ();
7066 else
7067 abort ();
7069 return 1;
7072 /* Register move we need to look at. */
7073 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7075 src_regno = REGNO (src);
7076 dest_regno = REGNO (dest);
7078 switch (src_regno)
7080 case BR_REG (0):
7081 /* Saving return address pointer. */
7082 if (dest_regno != current_frame_info.reg_save_b0)
7083 abort ();
7084 fprintf (asm_out_file, "\t.save rp, r%d\n",
7085 ia64_dbx_register_number (dest_regno));
7086 return 1;
7088 case PR_REG (0):
7089 if (dest_regno != current_frame_info.reg_save_pr)
7090 abort ();
7091 fprintf (asm_out_file, "\t.save pr, r%d\n",
7092 ia64_dbx_register_number (dest_regno));
7093 return 1;
7095 case AR_UNAT_REGNUM:
7096 if (dest_regno != current_frame_info.reg_save_ar_unat)
7097 abort ();
7098 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7099 ia64_dbx_register_number (dest_regno));
7100 return 1;
7102 case AR_LC_REGNUM:
7103 if (dest_regno != current_frame_info.reg_save_ar_lc)
7104 abort ();
7105 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7106 ia64_dbx_register_number (dest_regno));
7107 return 1;
7109 case STACK_POINTER_REGNUM:
7110 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7111 || ! frame_pointer_needed)
7112 abort ();
7113 fprintf (asm_out_file, "\t.vframe r%d\n",
7114 ia64_dbx_register_number (dest_regno));
7115 return 1;
7117 default:
7118 /* Everything else should indicate being stored to memory. */
7119 abort ();
7123 /* Memory store we need to look at. */
7124 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7126 long off;
7127 rtx base;
7128 const char *saveop;
7130 if (GET_CODE (XEXP (dest, 0)) == REG)
7132 base = XEXP (dest, 0);
7133 off = 0;
7135 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7136 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7138 base = XEXP (XEXP (dest, 0), 0);
7139 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7141 else
7142 abort ();
7144 if (base == hard_frame_pointer_rtx)
7146 saveop = ".savepsp";
7147 off = - off;
7149 else if (base == stack_pointer_rtx)
7150 saveop = ".savesp";
7151 else
7152 abort ();
7154 src_regno = REGNO (src);
7155 switch (src_regno)
7157 case BR_REG (0):
7158 if (current_frame_info.reg_save_b0 != 0)
7159 abort ();
7160 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7161 return 1;
7163 case PR_REG (0):
7164 if (current_frame_info.reg_save_pr != 0)
7165 abort ();
7166 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7167 return 1;
7169 case AR_LC_REGNUM:
7170 if (current_frame_info.reg_save_ar_lc != 0)
7171 abort ();
7172 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7173 return 1;
7175 case AR_PFS_REGNUM:
7176 if (current_frame_info.reg_save_ar_pfs != 0)
7177 abort ();
7178 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7179 return 1;
7181 case AR_UNAT_REGNUM:
7182 if (current_frame_info.reg_save_ar_unat != 0)
7183 abort ();
7184 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7185 return 1;
7187 case GR_REG (4):
7188 case GR_REG (5):
7189 case GR_REG (6):
7190 case GR_REG (7):
7191 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7192 1 << (src_regno - GR_REG (4)));
7193 return 1;
7195 case BR_REG (1):
7196 case BR_REG (2):
7197 case BR_REG (3):
7198 case BR_REG (4):
7199 case BR_REG (5):
7200 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7201 1 << (src_regno - BR_REG (1)));
7202 return 1;
7204 case FR_REG (2):
7205 case FR_REG (3):
7206 case FR_REG (4):
7207 case FR_REG (5):
7208 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7209 1 << (src_regno - FR_REG (2)));
7210 return 1;
7212 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7213 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7214 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7215 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7216 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7217 1 << (src_regno - FR_REG (12)));
7218 return 1;
7220 default:
7221 return 0;
7225 return 0;
7229 /* This function looks at a single insn and emits any directives
7230 required to unwind this insn. */
7231 void
7232 process_for_unwind_directive (asm_out_file, insn)
7233 FILE *asm_out_file;
7234 rtx insn;
7236 if (flag_unwind_tables
7237 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7239 rtx pat;
7241 if (GET_CODE (insn) == NOTE
7242 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7244 block_num = NOTE_BASIC_BLOCK (insn)->index;
7246 /* Restore unwind state from immediately before the epilogue. */
7247 if (need_copy_state)
7249 fprintf (asm_out_file, "\t.body\n");
7250 fprintf (asm_out_file, "\t.copy_state 1\n");
7251 need_copy_state = 0;
7255 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7256 return;
7258 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7259 if (pat)
7260 pat = XEXP (pat, 0);
7261 else
7262 pat = PATTERN (insn);
7264 switch (GET_CODE (pat))
7266 case SET:
7267 process_set (asm_out_file, pat);
7268 break;
7270 case PARALLEL:
7272 int par_index;
7273 int limit = XVECLEN (pat, 0);
7274 for (par_index = 0; par_index < limit; par_index++)
7276 rtx x = XVECEXP (pat, 0, par_index);
7277 if (GET_CODE (x) == SET)
7278 process_set (asm_out_file, x);
7280 break;
7283 default:
7284 abort ();
7290 void
7291 ia64_init_builtins ()
7293 tree psi_type_node = build_pointer_type (integer_type_node);
7294 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7295 tree endlink = void_list_node;
7297 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7298 tree si_ftype_psi_si_si
7299 = build_function_type (integer_type_node,
7300 tree_cons (NULL_TREE, psi_type_node,
7301 tree_cons (NULL_TREE, integer_type_node,
7302 tree_cons (NULL_TREE,
7303 integer_type_node,
7304 endlink))));
7306 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
7307 tree di_ftype_pdi_di_di
7308 = build_function_type (long_integer_type_node,
7309 tree_cons (NULL_TREE, pdi_type_node,
7310 tree_cons (NULL_TREE,
7311 long_integer_type_node,
7312 tree_cons (NULL_TREE,
7313 long_integer_type_node,
7314 endlink))));
7315 /* __sync_synchronize */
7316 tree void_ftype_void
7317 = build_function_type (void_type_node, endlink);
7319 /* __sync_lock_test_and_set_si */
7320 tree si_ftype_psi_si
7321 = build_function_type (integer_type_node,
7322 tree_cons (NULL_TREE, psi_type_node,
7323 tree_cons (NULL_TREE, integer_type_node, endlink)));
7325 /* __sync_lock_test_and_set_di */
7326 tree di_ftype_pdi_di
7327 = build_function_type (long_integer_type_node,
7328 tree_cons (NULL_TREE, pdi_type_node,
7329 tree_cons (NULL_TREE, long_integer_type_node,
7330 endlink)));
7332 /* __sync_lock_release_si */
7333 tree void_ftype_psi
7334 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
7335 endlink));
7337 /* __sync_lock_release_di */
7338 tree void_ftype_pdi
7339 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
7340 endlink));
7342 #define def_builtin(name, type, code) \
7343 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
7345 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7346 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7347 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7348 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7349 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7350 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7351 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
7352 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7354 def_builtin ("__sync_synchronize", void_ftype_void,
7355 IA64_BUILTIN_SYNCHRONIZE);
7357 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7358 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7359 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7360 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7361 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7362 IA64_BUILTIN_LOCK_RELEASE_SI);
7363 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7364 IA64_BUILTIN_LOCK_RELEASE_DI);
7366 def_builtin ("__builtin_ia64_bsp",
7367 build_function_type (ptr_type_node, endlink),
7368 IA64_BUILTIN_BSP);
7370 def_builtin ("__builtin_ia64_flushrs",
7371 build_function_type (void_type_node, endlink),
7372 IA64_BUILTIN_FLUSHRS);
7374 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7375 IA64_BUILTIN_FETCH_AND_ADD_SI);
7376 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7377 IA64_BUILTIN_FETCH_AND_SUB_SI);
7378 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7379 IA64_BUILTIN_FETCH_AND_OR_SI);
7380 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7381 IA64_BUILTIN_FETCH_AND_AND_SI);
7382 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7383 IA64_BUILTIN_FETCH_AND_XOR_SI);
7384 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7385 IA64_BUILTIN_FETCH_AND_NAND_SI);
7387 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7388 IA64_BUILTIN_ADD_AND_FETCH_SI);
7389 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7390 IA64_BUILTIN_SUB_AND_FETCH_SI);
7391 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7392 IA64_BUILTIN_OR_AND_FETCH_SI);
7393 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7394 IA64_BUILTIN_AND_AND_FETCH_SI);
7395 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7396 IA64_BUILTIN_XOR_AND_FETCH_SI);
7397 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7398 IA64_BUILTIN_NAND_AND_FETCH_SI);
7400 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7401 IA64_BUILTIN_FETCH_AND_ADD_DI);
7402 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7403 IA64_BUILTIN_FETCH_AND_SUB_DI);
7404 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7405 IA64_BUILTIN_FETCH_AND_OR_DI);
7406 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7407 IA64_BUILTIN_FETCH_AND_AND_DI);
7408 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7409 IA64_BUILTIN_FETCH_AND_XOR_DI);
7410 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7411 IA64_BUILTIN_FETCH_AND_NAND_DI);
7413 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7414 IA64_BUILTIN_ADD_AND_FETCH_DI);
7415 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7416 IA64_BUILTIN_SUB_AND_FETCH_DI);
7417 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7418 IA64_BUILTIN_OR_AND_FETCH_DI);
7419 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7420 IA64_BUILTIN_AND_AND_FETCH_DI);
7421 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7422 IA64_BUILTIN_XOR_AND_FETCH_DI);
7423 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7424 IA64_BUILTIN_NAND_AND_FETCH_DI);
7426 #undef def_builtin
7429 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7432 tmp = [ptr];
7433 do {
7434 ret = tmp;
7435 ar.ccv = tmp;
7436 tmp <op>= value;
7437 cmpxchgsz.acq tmp = [ptr], tmp
7438 } while (tmp != ret)
7441 static rtx
7442 ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7443 optab binoptab;
7444 enum machine_mode mode;
7445 tree arglist;
7446 rtx target;
7448 rtx ret, label, tmp, ccv, insn, mem, value;
7449 tree arg0, arg1;
7451 arg0 = TREE_VALUE (arglist);
7452 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7453 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7454 value = expand_expr (arg1, NULL_RTX, mode, 0);
7456 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7457 MEM_VOLATILE_P (mem) = 1;
7459 if (target && register_operand (target, mode))
7460 ret = target;
7461 else
7462 ret = gen_reg_rtx (mode);
7464 emit_insn (gen_mf ());
7466 /* Special case for fetchadd instructions. */
7467 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7469 if (mode == SImode)
7470 insn = gen_fetchadd_acq_si (ret, mem, value);
7471 else
7472 insn = gen_fetchadd_acq_di (ret, mem, value);
7473 emit_insn (insn);
7474 return ret;
7477 tmp = gen_reg_rtx (mode);
7478 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7479 emit_move_insn (tmp, mem);
7481 label = gen_label_rtx ();
7482 emit_label (label);
7483 emit_move_insn (ret, tmp);
7484 emit_move_insn (ccv, tmp);
7486 /* Perform the specific operation. Special case NAND by noticing
7487 one_cmpl_optab instead. */
7488 if (binoptab == one_cmpl_optab)
7490 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7491 binoptab = and_optab;
7493 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7495 if (mode == SImode)
7496 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7497 else
7498 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7499 emit_insn (insn);
7501 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
7503 return ret;
7506 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7509 tmp = [ptr];
7510 do {
7511 old = tmp;
7512 ar.ccv = tmp;
7513 ret = tmp + value;
7514 cmpxchgsz.acq tmp = [ptr], ret
7515 } while (tmp != old)
7518 static rtx
7519 ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7520 optab binoptab;
7521 enum machine_mode mode;
7522 tree arglist;
7523 rtx target;
7525 rtx old, label, tmp, ret, ccv, insn, mem, value;
7526 tree arg0, arg1;
7528 arg0 = TREE_VALUE (arglist);
7529 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7530 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7531 value = expand_expr (arg1, NULL_RTX, mode, 0);
7533 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7534 MEM_VOLATILE_P (mem) = 1;
7536 if (target && ! register_operand (target, mode))
7537 target = NULL_RTX;
7539 emit_insn (gen_mf ());
7540 tmp = gen_reg_rtx (mode);
7541 old = gen_reg_rtx (mode);
7542 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7544 emit_move_insn (tmp, mem);
7546 label = gen_label_rtx ();
7547 emit_label (label);
7548 emit_move_insn (old, tmp);
7549 emit_move_insn (ccv, tmp);
7551 /* Perform the specific operation. Special case NAND by noticing
7552 one_cmpl_optab instead. */
7553 if (binoptab == one_cmpl_optab)
7555 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7556 binoptab = and_optab;
7558 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
7560 if (mode == SImode)
7561 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
7562 else
7563 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7564 emit_insn (insn);
7566 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
7568 return ret;
7571 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7573 ar.ccv = oldval
7575 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7576 return ret
7578 For bool_ it's the same except return ret == oldval.
7581 static rtx
7582 ia64_expand_compare_and_swap (mode, boolp, arglist, target)
7583 enum machine_mode mode;
7584 int boolp;
7585 tree arglist;
7586 rtx target;
7588 tree arg0, arg1, arg2;
7589 rtx mem, old, new, ccv, tmp, insn;
7591 arg0 = TREE_VALUE (arglist);
7592 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7593 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7594 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7595 old = expand_expr (arg1, NULL_RTX, mode, 0);
7596 new = expand_expr (arg2, NULL_RTX, mode, 0);
7598 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7599 MEM_VOLATILE_P (mem) = 1;
7601 if (! register_operand (old, mode))
7602 old = copy_to_mode_reg (mode, old);
7603 if (! register_operand (new, mode))
7604 new = copy_to_mode_reg (mode, new);
7606 if (! boolp && target && register_operand (target, mode))
7607 tmp = target;
7608 else
7609 tmp = gen_reg_rtx (mode);
7611 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7612 emit_move_insn (ccv, old);
7613 emit_insn (gen_mf ());
7614 if (mode == SImode)
7615 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7616 else
7617 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7618 emit_insn (insn);
7620 if (boolp)
7622 if (! target)
7623 target = gen_reg_rtx (mode);
7624 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
7626 else
7627 return tmp;
7630 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7632 static rtx
7633 ia64_expand_lock_test_and_set (mode, arglist, target)
7634 enum machine_mode mode;
7635 tree arglist;
7636 rtx target;
7638 tree arg0, arg1;
7639 rtx mem, new, ret, insn;
7641 arg0 = TREE_VALUE (arglist);
7642 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7643 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7644 new = expand_expr (arg1, NULL_RTX, mode, 0);
7646 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7647 MEM_VOLATILE_P (mem) = 1;
7648 if (! register_operand (new, mode))
7649 new = copy_to_mode_reg (mode, new);
7651 if (target && register_operand (target, mode))
7652 ret = target;
7653 else
7654 ret = gen_reg_rtx (mode);
7656 if (mode == SImode)
7657 insn = gen_xchgsi (ret, mem, new);
7658 else
7659 insn = gen_xchgdi (ret, mem, new);
7660 emit_insn (insn);
7662 return ret;
7665 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7667 static rtx
7668 ia64_expand_lock_release (mode, arglist, target)
7669 enum machine_mode mode;
7670 tree arglist;
7671 rtx target ATTRIBUTE_UNUSED;
7673 tree arg0;
7674 rtx mem;
7676 arg0 = TREE_VALUE (arglist);
7677 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7679 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7680 MEM_VOLATILE_P (mem) = 1;
7682 emit_move_insn (mem, const0_rtx);
7684 return const0_rtx;
7688 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
7689 tree exp;
7690 rtx target;
7691 rtx subtarget ATTRIBUTE_UNUSED;
7692 enum machine_mode mode ATTRIBUTE_UNUSED;
7693 int ignore ATTRIBUTE_UNUSED;
7695 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7696 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
7697 tree arglist = TREE_OPERAND (exp, 1);
7699 switch (fcode)
7701 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7702 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7703 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7704 case IA64_BUILTIN_LOCK_RELEASE_SI:
7705 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7706 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7707 case IA64_BUILTIN_FETCH_AND_OR_SI:
7708 case IA64_BUILTIN_FETCH_AND_AND_SI:
7709 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7710 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7711 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7712 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7713 case IA64_BUILTIN_OR_AND_FETCH_SI:
7714 case IA64_BUILTIN_AND_AND_FETCH_SI:
7715 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7716 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7717 mode = SImode;
7718 break;
7720 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7721 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7722 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7723 case IA64_BUILTIN_LOCK_RELEASE_DI:
7724 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7725 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7726 case IA64_BUILTIN_FETCH_AND_OR_DI:
7727 case IA64_BUILTIN_FETCH_AND_AND_DI:
7728 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7729 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7730 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7731 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7732 case IA64_BUILTIN_OR_AND_FETCH_DI:
7733 case IA64_BUILTIN_AND_AND_FETCH_DI:
7734 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7735 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7736 mode = DImode;
7737 break;
7739 default:
7740 break;
7743 switch (fcode)
7745 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7746 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7747 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
7749 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7750 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7751 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
7753 case IA64_BUILTIN_SYNCHRONIZE:
7754 emit_insn (gen_mf ());
7755 return const0_rtx;
7757 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7758 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7759 return ia64_expand_lock_test_and_set (mode, arglist, target);
7761 case IA64_BUILTIN_LOCK_RELEASE_SI:
7762 case IA64_BUILTIN_LOCK_RELEASE_DI:
7763 return ia64_expand_lock_release (mode, arglist, target);
7765 case IA64_BUILTIN_BSP:
7766 if (! target || ! register_operand (target, DImode))
7767 target = gen_reg_rtx (DImode);
7768 emit_insn (gen_bsp_value (target));
7769 return target;
7771 case IA64_BUILTIN_FLUSHRS:
7772 emit_insn (gen_flushrs ());
7773 return const0_rtx;
7775 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7776 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7777 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
7779 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7780 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7781 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
7783 case IA64_BUILTIN_FETCH_AND_OR_SI:
7784 case IA64_BUILTIN_FETCH_AND_OR_DI:
7785 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
7787 case IA64_BUILTIN_FETCH_AND_AND_SI:
7788 case IA64_BUILTIN_FETCH_AND_AND_DI:
7789 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
7791 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7792 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7793 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
7795 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7796 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7797 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
7799 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7800 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7801 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
7803 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7804 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7805 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
7807 case IA64_BUILTIN_OR_AND_FETCH_SI:
7808 case IA64_BUILTIN_OR_AND_FETCH_DI:
7809 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
7811 case IA64_BUILTIN_AND_AND_FETCH_SI:
7812 case IA64_BUILTIN_AND_AND_FETCH_DI:
7813 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
7815 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7816 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7817 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
7819 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7820 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7821 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
7823 default:
7824 break;
7827 return NULL_RTX;
7830 /* For the HP-UX IA64 aggregate parameters are passed stored in the
7831 most significant bits of the stack slot. */
7833 enum direction
7834 ia64_hpux_function_arg_padding (mode, type)
7835 enum machine_mode mode;
7836 tree type;
7838 /* Exception to normal case for structures/unions/etc. */
7840 if (type && AGGREGATE_TYPE_P (type)
7841 && int_size_in_bytes (type) < UNITS_PER_WORD)
7842 return upward;
7844 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
7845 hardwired to be true. */
7847 return((mode == BLKmode
7848 ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
7849 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
7850 : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
7851 ? downward : upward);
7854 /* Switch to the section to which we should output X. The only thing
7855 special we do here is to honor small data. */
7857 static void
7858 ia64_select_rtx_section (mode, x, align)
7859 enum machine_mode mode;
7860 rtx x;
7861 unsigned HOST_WIDE_INT align;
7863 if (GET_MODE_SIZE (mode) > 0
7864 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
7865 sdata_section ();
7866 else
7867 default_elf_select_rtx_section (mode, x, align);
7870 /* It is illegal to have relocations in shared segments on AIX.
7871 Pretend flag_pic is always set. */
7873 static void
7874 ia64_aix_select_section (exp, reloc, align)
7875 tree exp;
7876 int reloc;
7877 unsigned HOST_WIDE_INT align;
7879 int save_pic = flag_pic;
7880 flag_pic = 1;
7881 default_elf_select_section (exp, reloc, align);
7882 flag_pic = save_pic;
7885 static void
7886 ia64_aix_unique_section (decl, reloc)
7887 tree decl;
7888 int reloc;
7890 int save_pic = flag_pic;
7891 flag_pic = 1;
7892 default_unique_section (decl, reloc);
7893 flag_pic = save_pic;
7896 static void
7897 ia64_aix_select_rtx_section (mode, x, align)
7898 enum machine_mode mode;
7899 rtx x;
7900 unsigned HOST_WIDE_INT align;
7902 int save_pic = flag_pic;
7903 flag_pic = 1;
7904 ia64_select_rtx_section (mode, x, align);
7905 flag_pic = save_pic;