* flags.h: New variables align_loops, align_loops_log,
[official-gcc.git] / gcc / config / sparc / sparc.c
blobad51856714db0e603400732971a724aab2c973c7
1 /* Subroutines for insn-output.c for Sun SPARC.
2 Copyright (C) 1987, 88, 89, 92-98, 1999 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64 bit SPARC V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
7 This file is part of GNU CC.
9 GNU CC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GNU CC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "tree.h"
27 #include "rtl.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "function.h"
38 #include "expr.h"
39 #include "recog.h"
40 #include "toplev.h"
42 /* 1 if the caller has placed an "unimp" insn immediately after the call.
43 This is used in v8 code when calling a function that returns a structure.
44 v9 doesn't have this. Be careful to have this test be the same as that
45 used on the call. */
47 #define SKIP_CALLERS_UNIMP_P \
48 (!TARGET_ARCH64 && current_function_returns_struct \
49 && ! integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))) \
50 && (TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl))) \
51 == INTEGER_CST))
53 /* Global variables for machine-dependent things. */
55 /* Size of frame. Need to know this to emit return insns from leaf procedures.
56 ACTUAL_FSIZE is set by compute_frame_size() which is called during the
57 reload pass. This is important as the value is later used in insn
58 scheduling (to see what can go in a delay slot).
59 APPARENT_FSIZE is the size of the stack less the register save area and less
60 the outgoing argument area. It is used when saving call preserved regs. */
61 static int apparent_fsize;
62 static int actual_fsize;
64 /* Save the operands last given to a compare for use when we
65 generate a scc or bcc insn. */
67 rtx sparc_compare_op0, sparc_compare_op1;
69 /* We may need an epilogue if we spill too many registers.
70 If this is non-zero, then we branch here for the epilogue. */
71 static rtx leaf_label;
73 #ifdef LEAF_REGISTERS
75 /* Vector to say how input registers are mapped to output
76 registers. FRAME_POINTER_REGNUM cannot be remapped by
77 this function to eliminate it. You must use -fomit-frame-pointer
78 to get that. */
79 char leaf_reg_remap[] =
80 { 0, 1, 2, 3, 4, 5, 6, 7,
81 -1, -1, -1, -1, -1, -1, 14, -1,
82 -1, -1, -1, -1, -1, -1, -1, -1,
83 8, 9, 10, 11, 12, 13, -1, 15,
85 32, 33, 34, 35, 36, 37, 38, 39,
86 40, 41, 42, 43, 44, 45, 46, 47,
87 48, 49, 50, 51, 52, 53, 54, 55,
88 56, 57, 58, 59, 60, 61, 62, 63,
89 64, 65, 66, 67, 68, 69, 70, 71,
90 72, 73, 74, 75, 76, 77, 78, 79,
91 80, 81, 82, 83, 84, 85, 86, 87,
92 88, 89, 90, 91, 92, 93, 94, 95,
93 96, 97, 98, 99, 100};
95 #endif
97 /* Name of where we pretend to think the frame pointer points.
98 Normally, this is "%fp", but if we are in a leaf procedure,
99 this is "%sp+something". We record "something" separately as it may be
100 too big for reg+constant addressing. */
102 static const char *frame_base_name;
103 static int frame_base_offset;
105 static rtx pic_setup_code PROTO((void));
106 static void sparc_init_modes PROTO((void));
107 static int save_regs PROTO((FILE *, int, int, const char *,
108 int, int, int));
109 static int restore_regs PROTO((FILE *, int, int, const char *, int, int));
110 static void build_big_number PROTO((FILE *, int, const char *));
111 static int function_arg_slotno PROTO((const CUMULATIVE_ARGS *,
112 enum machine_mode, tree, int, int,
113 int *, int *));
115 static int supersparc_adjust_cost PROTO((rtx, rtx, rtx, int));
116 static int hypersparc_adjust_cost PROTO((rtx, rtx, rtx, int));
117 static int ultrasparc_adjust_cost PROTO((rtx, rtx, rtx, int));
119 static void sparc_output_addr_vec PROTO((rtx));
120 static void sparc_output_addr_diff_vec PROTO((rtx));
121 static void sparc_output_deferred_case_vectors PROTO((void));
124 #ifdef DWARF2_DEBUGGING_INFO
125 extern char *dwarf2out_cfi_label ();
126 #endif
128 /* Option handling. */
130 /* Code model option as passed by user. */
131 const char *sparc_cmodel_string;
132 /* Parsed value. */
133 enum cmodel sparc_cmodel;
135 char sparc_hard_reg_printed[8];
137 struct sparc_cpu_select sparc_select[] =
139 /* switch name, tune arch */
140 { (char *)0, "default", 1, 1 },
141 { (char *)0, "-mcpu=", 1, 1 },
142 { (char *)0, "-mtune=", 1, 0 },
143 { 0, 0, 0, 0 }
146 /* CPU type. This is set from TARGET_CPU_DEFAULT and -m{cpu,tune}=xxx. */
147 enum processor_type sparc_cpu;
149 /* Validate and override various options, and do some machine dependent
150 initialization. */
152 void
153 sparc_override_options ()
155 static struct code_model {
156 const char *name;
157 int value;
158 } cmodels[] = {
159 { "32", CM_32 },
160 { "medlow", CM_MEDLOW },
161 { "medmid", CM_MEDMID },
162 { "medany", CM_MEDANY },
163 { "embmedany", CM_EMBMEDANY },
164 { 0, 0 }
166 struct code_model *cmodel;
167 /* Map TARGET_CPU_DEFAULT to value for -m{arch,tune}=. */
168 static struct cpu_default {
169 int cpu;
170 const char *name;
171 } cpu_default[] = {
172 /* There must be one entry here for each TARGET_CPU value. */
173 { TARGET_CPU_sparc, "cypress" },
174 { TARGET_CPU_sparclet, "tsc701" },
175 { TARGET_CPU_sparclite, "f930" },
176 { TARGET_CPU_v8, "v8" },
177 { TARGET_CPU_hypersparc, "hypersparc" },
178 { TARGET_CPU_sparclite86x, "sparclite86x" },
179 { TARGET_CPU_supersparc, "supersparc" },
180 { TARGET_CPU_v9, "v9" },
181 { TARGET_CPU_ultrasparc, "ultrasparc" },
182 { 0, 0 }
184 struct cpu_default *def;
185 /* Table of values for -m{cpu,tune}=. */
186 static struct cpu_table {
187 const char *name;
188 enum processor_type processor;
189 int disable;
190 int enable;
191 } cpu_table[] = {
192 { "v7", PROCESSOR_V7, MASK_ISA, 0 },
193 { "cypress", PROCESSOR_CYPRESS, MASK_ISA, 0 },
194 { "v8", PROCESSOR_V8, MASK_ISA, MASK_V8 },
195 /* TI TMS390Z55 supersparc */
196 { "supersparc", PROCESSOR_SUPERSPARC, MASK_ISA, MASK_V8 },
197 { "sparclite", PROCESSOR_SPARCLITE, MASK_ISA, MASK_SPARCLITE },
198 /* The Fujitsu MB86930 is the original sparclite chip, with no fpu.
199 The Fujitsu MB86934 is the recent sparclite chip, with an fpu. */
200 { "f930", PROCESSOR_F930, MASK_ISA|MASK_FPU, MASK_SPARCLITE },
201 { "f934", PROCESSOR_F934, MASK_ISA, MASK_SPARCLITE|MASK_FPU },
202 { "hypersparc", PROCESSOR_HYPERSPARC, MASK_ISA, MASK_V8|MASK_FPU },
203 { "sparclite86x", PROCESSOR_SPARCLITE86X, MASK_ISA|MASK_FPU,
204 MASK_SPARCLITE },
205 { "sparclet", PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
206 /* TEMIC sparclet */
207 { "tsc701", PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
208 { "v9", PROCESSOR_V9, MASK_ISA, MASK_V9 },
209 /* TI ultrasparc I, II, IIi */
210 { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9
211 /* Although insns using %y are deprecated, it is a clear win on current
212 ultrasparcs. */
213 |MASK_DEPRECATED_V8_INSNS },
214 { 0, 0, 0, 0 }
216 struct cpu_table *cpu;
217 struct sparc_cpu_select *sel;
218 int fpu;
220 #ifndef SPARC_BI_ARCH
221 /* Check for unsupported architecture size. */
222 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
224 error ("%s is not supported by this configuration",
225 DEFAULT_ARCH32_P ? "-m64" : "-m32");
227 #endif
229 /* At the moment we don't allow different pointer size and architecture */
230 if (! TARGET_64BIT != ! TARGET_PTR64)
232 error ("-mptr%d not allowed on -m%d",
233 TARGET_PTR64 ? 64 : 32, TARGET_64BIT ? 64 : 32);
234 if (TARGET_64BIT)
235 target_flags |= MASK_PTR64;
236 else
237 target_flags &= ~MASK_PTR64;
240 /* Code model selection. */
241 sparc_cmodel = SPARC_DEFAULT_CMODEL;
243 #ifdef SPARC_BI_ARCH
244 if (TARGET_ARCH32)
245 sparc_cmodel = CM_32;
246 #endif
248 if (sparc_cmodel_string != NULL)
250 if (TARGET_ARCH64)
252 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
253 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
254 break;
255 if (cmodel->name == NULL)
256 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
257 else
258 sparc_cmodel = cmodel->value;
260 else
261 error ("-mcmodel= is not supported on 32 bit systems");
264 fpu = TARGET_FPU; /* save current -mfpu status */
266 /* Set the default CPU. */
267 for (def = &cpu_default[0]; def->name; ++def)
268 if (def->cpu == TARGET_CPU_DEFAULT)
269 break;
270 if (! def->name)
271 abort ();
272 sparc_select[0].string = def->name;
274 for (sel = &sparc_select[0]; sel->name; ++sel)
276 if (sel->string)
278 for (cpu = &cpu_table[0]; cpu->name; ++cpu)
279 if (! strcmp (sel->string, cpu->name))
281 if (sel->set_tune_p)
282 sparc_cpu = cpu->processor;
284 if (sel->set_arch_p)
286 target_flags &= ~cpu->disable;
287 target_flags |= cpu->enable;
289 break;
292 if (! cpu->name)
293 error ("bad value (%s) for %s switch", sel->string, sel->name);
297 /* If -mfpu or -mno-fpu was explicitly used, don't override with
298 the processor default. */
299 if (TARGET_FPU_SET)
300 target_flags = (target_flags & ~MASK_FPU) | fpu;
302 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
303 if (TARGET_V9 && TARGET_ARCH32)
304 target_flags |= MASK_DEPRECATED_V8_INSNS;
306 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
307 if (! TARGET_V9 || TARGET_ARCH64)
308 target_flags &= ~MASK_V8PLUS;
310 /* Don't use stack biasing in 32 bit mode. */
311 if (TARGET_ARCH32)
312 target_flags &= ~MASK_STACK_BIAS;
314 /* Don't allow -mvis if FPU is disabled. */
315 if (! TARGET_FPU)
316 target_flags &= ~MASK_VIS;
318 /* Supply a default value for align_functions. */
319 if (align_functions == 0 && sparc_cpu == PROCESSOR_ULTRASPARC)
320 align_functions = 32;
322 /* Validate PCC_STRUCT_RETURN. */
323 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
324 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
326 /* Do various machine dependent initializations. */
327 sparc_init_modes ();
329 if ((profile_flag || profile_block_flag)
330 && sparc_cmodel != CM_MEDLOW)
332 error ("profiling does not support code models other than medlow");
336 /* Miscellaneous utilities. */
338 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
339 or branch on register contents instructions. */
342 v9_regcmp_p (code)
343 enum rtx_code code;
345 return (code == EQ || code == NE || code == GE || code == LT
346 || code == LE || code == GT);
350 /* Operand constraints. */
352 /* Return non-zero only if OP is a register of mode MODE,
353 or const0_rtx. Don't allow const0_rtx if TARGET_LIVE_G0 because
354 %g0 may contain anything. */
357 reg_or_0_operand (op, mode)
358 rtx op;
359 enum machine_mode mode;
361 if (register_operand (op, mode))
362 return 1;
363 if (TARGET_LIVE_G0)
364 return 0;
365 if (op == const0_rtx)
366 return 1;
367 if (GET_MODE (op) == VOIDmode && GET_CODE (op) == CONST_DOUBLE
368 && CONST_DOUBLE_HIGH (op) == 0
369 && CONST_DOUBLE_LOW (op) == 0)
370 return 1;
371 if (GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT
372 && GET_CODE (op) == CONST_DOUBLE
373 && fp_zero_operand (op))
374 return 1;
375 return 0;
378 /* Nonzero if OP is a floating point value with value 0.0. */
381 fp_zero_operand (op)
382 rtx op;
384 REAL_VALUE_TYPE r;
386 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
387 return (REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r));
390 /* Nonzero if OP is an integer register. */
393 intreg_operand (op, mode)
394 rtx op;
395 enum machine_mode mode ATTRIBUTE_UNUSED;
397 return (register_operand (op, SImode)
398 || (TARGET_ARCH64 && register_operand (op, DImode)));
401 /* Nonzero if OP is a floating point condition code register. */
404 fcc_reg_operand (op, mode)
405 rtx op;
406 enum machine_mode mode;
408 /* This can happen when recog is called from combine. Op may be a MEM.
409 Fail instead of calling abort in this case. */
410 if (GET_CODE (op) != REG)
411 return 0;
413 if (mode != VOIDmode && mode != GET_MODE (op))
414 return 0;
415 if (mode == VOIDmode
416 && (GET_MODE (op) != CCFPmode && GET_MODE (op) != CCFPEmode))
417 return 0;
419 #if 0 /* ??? ==> 1 when %fcc0-3 are pseudos first. See gen_compare_reg(). */
420 if (reg_renumber == 0)
421 return REGNO (op) >= FIRST_PSEUDO_REGISTER;
422 return REGNO_OK_FOR_CCFP_P (REGNO (op));
423 #else
424 return (unsigned) REGNO (op) - SPARC_FIRST_V9_FCC_REG < 4;
425 #endif
428 /* Nonzero if OP is an integer or floating point condition code register. */
431 icc_or_fcc_reg_operand (op, mode)
432 rtx op;
433 enum machine_mode mode;
435 if (GET_CODE (op) == REG && REGNO (op) == SPARC_ICC_REG)
437 if (mode != VOIDmode && mode != GET_MODE (op))
438 return 0;
439 if (mode == VOIDmode
440 && GET_MODE (op) != CCmode && GET_MODE (op) != CCXmode)
441 return 0;
442 return 1;
445 return fcc_reg_operand (op, mode);
448 /* Nonzero if OP can appear as the dest of a RESTORE insn. */
450 restore_operand (op, mode)
451 rtx op;
452 enum machine_mode mode;
454 return (GET_CODE (op) == REG && GET_MODE (op) == mode
455 && (REGNO (op) < 8 || (REGNO (op) >= 24 && REGNO (op) < 32)));
458 /* Call insn on SPARC can take a PC-relative constant address, or any regular
459 memory address. */
462 call_operand (op, mode)
463 rtx op;
464 enum machine_mode mode;
466 if (GET_CODE (op) != MEM)
467 abort ();
468 op = XEXP (op, 0);
469 return (symbolic_operand (op, mode) || memory_address_p (Pmode, op));
473 call_operand_address (op, mode)
474 rtx op;
475 enum machine_mode mode;
477 return (symbolic_operand (op, mode) || memory_address_p (Pmode, op));
480 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
481 reference and a constant. */
484 symbolic_operand (op, mode)
485 register rtx op;
486 enum machine_mode mode;
488 switch (GET_CODE (op))
490 case SYMBOL_REF:
491 case LABEL_REF:
492 return 1;
494 case CONST:
495 op = XEXP (op, 0);
496 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
497 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
498 && GET_CODE (XEXP (op, 1)) == CONST_INT);
500 /* ??? This clause seems to be irrelevant. */
501 case CONST_DOUBLE:
502 return GET_MODE (op) == mode;
504 default:
505 return 0;
509 /* Return truth value of statement that OP is a symbolic memory
510 operand of mode MODE. */
513 symbolic_memory_operand (op, mode)
514 rtx op;
515 enum machine_mode mode ATTRIBUTE_UNUSED;
517 if (GET_CODE (op) == SUBREG)
518 op = SUBREG_REG (op);
519 if (GET_CODE (op) != MEM)
520 return 0;
521 op = XEXP (op, 0);
522 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
523 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
526 /* Return truth value of statement that OP is a LABEL_REF of mode MODE. */
529 label_ref_operand (op, mode)
530 rtx op;
531 enum machine_mode mode;
533 if (GET_CODE (op) != LABEL_REF)
534 return 0;
535 if (GET_MODE (op) != mode)
536 return 0;
537 return 1;
540 /* Return 1 if the operand is an argument used in generating pic references
541 in either the medium/low or medium/anywhere code models of sparc64. */
544 sp64_medium_pic_operand (op, mode)
545 rtx op;
546 enum machine_mode mode ATTRIBUTE_UNUSED;
548 /* Check for (const (minus (symbol_ref:GOT)
549 (const (minus (label) (pc))))). */
550 if (GET_CODE (op) != CONST)
551 return 0;
552 op = XEXP (op, 0);
553 if (GET_CODE (op) != MINUS)
554 return 0;
555 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
556 return 0;
557 /* ??? Ensure symbol is GOT. */
558 if (GET_CODE (XEXP (op, 1)) != CONST)
559 return 0;
560 if (GET_CODE (XEXP (XEXP (op, 1), 0)) != MINUS)
561 return 0;
562 return 1;
565 /* Return 1 if the operand is a data segment reference. This includes
566 the readonly data segment, or in other words anything but the text segment.
567 This is needed in the medium/anywhere code model on v9. These values
568 are accessed with EMBMEDANY_BASE_REG. */
571 data_segment_operand (op, mode)
572 rtx op;
573 enum machine_mode mode ATTRIBUTE_UNUSED;
575 switch (GET_CODE (op))
577 case SYMBOL_REF :
578 return ! SYMBOL_REF_FLAG (op);
579 case PLUS :
580 /* Assume canonical format of symbol + constant.
581 Fall through. */
582 case CONST :
583 return data_segment_operand (XEXP (op, 0));
584 default :
585 return 0;
589 /* Return 1 if the operand is a text segment reference.
590 This is needed in the medium/anywhere code model on v9. */
593 text_segment_operand (op, mode)
594 rtx op;
595 enum machine_mode mode ATTRIBUTE_UNUSED;
597 switch (GET_CODE (op))
599 case LABEL_REF :
600 return 1;
601 case SYMBOL_REF :
602 return SYMBOL_REF_FLAG (op);
603 case PLUS :
604 /* Assume canonical format of symbol + constant.
605 Fall through. */
606 case CONST :
607 return text_segment_operand (XEXP (op, 0));
608 default :
609 return 0;
613 /* Return 1 if the operand is either a register or a memory operand that is
614 not symbolic. */
617 reg_or_nonsymb_mem_operand (op, mode)
618 register rtx op;
619 enum machine_mode mode;
621 if (register_operand (op, mode))
622 return 1;
624 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
625 return 1;
627 return 0;
631 splittable_symbolic_memory_operand (op, mode)
632 rtx op;
633 enum machine_mode mode ATTRIBUTE_UNUSED;
635 if (GET_CODE (op) != MEM)
636 return 0;
637 if (! symbolic_operand (XEXP (op, 0), Pmode))
638 return 0;
639 return 1;
643 splittable_immediate_memory_operand (op, mode)
644 rtx op;
645 enum machine_mode mode ATTRIBUTE_UNUSED;
647 if (GET_CODE (op) != MEM)
648 return 0;
649 if (! immediate_operand (XEXP (op, 0), Pmode))
650 return 0;
651 return 1;
654 /* Return truth value of whether OP is EQ or NE. */
657 eq_or_neq (op, mode)
658 rtx op;
659 enum machine_mode mode ATTRIBUTE_UNUSED;
661 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
664 /* Return 1 if this is a comparison operator, but not an EQ, NE, GEU,
665 or LTU for non-floating-point. We handle those specially. */
668 normal_comp_operator (op, mode)
669 rtx op;
670 enum machine_mode mode ATTRIBUTE_UNUSED;
672 enum rtx_code code = GET_CODE (op);
674 if (GET_RTX_CLASS (code) != '<')
675 return 0;
677 if (GET_MODE (XEXP (op, 0)) == CCFPmode
678 || GET_MODE (XEXP (op, 0)) == CCFPEmode)
679 return 1;
681 return (code != NE && code != EQ && code != GEU && code != LTU);
684 /* Return 1 if this is a comparison operator. This allows the use of
685 MATCH_OPERATOR to recognize all the branch insns. */
688 noov_compare_op (op, mode)
689 register rtx op;
690 enum machine_mode mode ATTRIBUTE_UNUSED;
692 enum rtx_code code = GET_CODE (op);
694 if (GET_RTX_CLASS (code) != '<')
695 return 0;
697 if (GET_MODE (XEXP (op, 0)) == CC_NOOVmode)
698 /* These are the only branches which work with CC_NOOVmode. */
699 return (code == EQ || code == NE || code == GE || code == LT);
700 return 1;
703 /* Nonzero if OP is a comparison operator suitable for use in v9
704 conditional move or branch on register contents instructions. */
707 v9_regcmp_op (op, mode)
708 register rtx op;
709 enum machine_mode mode ATTRIBUTE_UNUSED;
711 enum rtx_code code = GET_CODE (op);
713 if (GET_RTX_CLASS (code) != '<')
714 return 0;
716 return v9_regcmp_p (code);
719 /* Return 1 if this is a SIGN_EXTEND or ZERO_EXTEND operation. */
722 extend_op (op, mode)
723 rtx op;
724 enum machine_mode mode ATTRIBUTE_UNUSED;
726 return GET_CODE (op) == SIGN_EXTEND || GET_CODE (op) == ZERO_EXTEND;
729 /* Return nonzero if OP is an operator of mode MODE which can set
730 the condition codes explicitly. We do not include PLUS and MINUS
731 because these require CC_NOOVmode, which we handle explicitly. */
734 cc_arithop (op, mode)
735 rtx op;
736 enum machine_mode mode ATTRIBUTE_UNUSED;
738 if (GET_CODE (op) == AND
739 || GET_CODE (op) == IOR
740 || GET_CODE (op) == XOR)
741 return 1;
743 return 0;
746 /* Return nonzero if OP is an operator of mode MODE which can bitwise
747 complement its second operand and set the condition codes explicitly. */
750 cc_arithopn (op, mode)
751 rtx op;
752 enum machine_mode mode ATTRIBUTE_UNUSED;
754 /* XOR is not here because combine canonicalizes (xor (not ...) ...)
755 and (xor ... (not ...)) to (not (xor ...)). */
756 return (GET_CODE (op) == AND
757 || GET_CODE (op) == IOR);
760 /* Return true if OP is a register, or is a CONST_INT that can fit in a
761 signed 13 bit immediate field. This is an acceptable SImode operand for
762 most 3 address instructions. */
765 arith_operand (op, mode)
766 rtx op;
767 enum machine_mode mode;
769 int val;
770 if (register_operand (op, mode))
771 return 1;
772 if (GET_CODE (op) != CONST_INT)
773 return 0;
774 val = INTVAL (op) & 0xffffffff;
775 return SPARC_SIMM13_P (val);
778 /* Return true if OP is a constant 4096 */
781 arith_4096_operand (op, mode)
782 rtx op;
783 enum machine_mode mode ATTRIBUTE_UNUSED;
785 int val;
786 if (GET_CODE (op) != CONST_INT)
787 return 0;
788 val = INTVAL (op) & 0xffffffff;
789 return val == 4096;
792 /* Return true if OP is suitable as second operand for add/sub */
795 arith_add_operand (op, mode)
796 rtx op;
797 enum machine_mode mode;
799 return arith_operand (op, mode) || arith_4096_operand (op, mode);
802 /* Return true if OP is a CONST_INT or a CONST_DOUBLE which can fit in the
803 immediate field of OR and XOR instructions. Used for 64-bit
804 constant formation patterns. */
806 const64_operand (op, mode)
807 rtx op;
808 enum machine_mode mode ATTRIBUTE_UNUSED;
810 return ((GET_CODE (op) == CONST_INT
811 && SPARC_SIMM13_P (INTVAL (op)))
812 #if HOST_BITS_PER_WIDE_INT != 64
813 || (GET_CODE (op) == CONST_DOUBLE
814 && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))
815 && (CONST_DOUBLE_HIGH (op) ==
816 ((CONST_DOUBLE_LOW (op) & 0x80000000) != 0 ?
817 (HOST_WIDE_INT)0xffffffff : 0)))
818 #endif
822 /* The same, but only for sethi instructions. */
824 const64_high_operand (op, mode)
825 rtx op;
826 enum machine_mode mode ATTRIBUTE_UNUSED;
828 return ((GET_CODE (op) == CONST_INT
829 && (INTVAL (op) & 0xfffffc00) != 0
830 && SPARC_SETHI_P (INTVAL (op))
831 #if HOST_BITS_PER_WIDE_INT != 64
832 /* Must be positive on non-64bit host else the
833 optimizer is fooled into thinking that sethi
834 sign extends, even though it does not. */
835 && INTVAL (op) >= 0
836 #endif
838 || (GET_CODE (op) == CONST_DOUBLE
839 && CONST_DOUBLE_HIGH (op) == 0
840 && (CONST_DOUBLE_LOW (op) & 0xfffffc00) != 0
841 && SPARC_SETHI_P (CONST_DOUBLE_LOW (op))));
844 /* Return true if OP is a register, or is a CONST_INT that can fit in a
845 signed 11 bit immediate field. This is an acceptable SImode operand for
846 the movcc instructions. */
849 arith11_operand (op, mode)
850 rtx op;
851 enum machine_mode mode;
853 return (register_operand (op, mode)
854 || (GET_CODE (op) == CONST_INT && SPARC_SIMM11_P (INTVAL (op))));
857 /* Return true if OP is a register, or is a CONST_INT that can fit in a
858 signed 10 bit immediate field. This is an acceptable SImode operand for
859 the movrcc instructions. */
862 arith10_operand (op, mode)
863 rtx op;
864 enum machine_mode mode;
866 return (register_operand (op, mode)
867 || (GET_CODE (op) == CONST_INT && SPARC_SIMM10_P (INTVAL (op))));
870 /* Return true if OP is a register, is a CONST_INT that fits in a 13 bit
871 immediate field, or is a CONST_DOUBLE whose both parts fit in a 13 bit
872 immediate field.
873 v9: Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
874 can fit in a 13 bit immediate field. This is an acceptable DImode operand
875 for most 3 address instructions. */
878 arith_double_operand (op, mode)
879 rtx op;
880 enum machine_mode mode;
882 return (register_operand (op, mode)
883 || (GET_CODE (op) == CONST_INT && SMALL_INT (op))
884 || (! TARGET_ARCH64
885 && GET_CODE (op) == CONST_DOUBLE
886 && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x1000) < 0x2000
887 && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_HIGH (op) + 0x1000) < 0x2000)
888 || (TARGET_ARCH64
889 && GET_CODE (op) == CONST_DOUBLE
890 && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x1000) < 0x2000
891 && ((CONST_DOUBLE_HIGH (op) == -1
892 && (CONST_DOUBLE_LOW (op) & 0x1000) == 0x1000)
893 || (CONST_DOUBLE_HIGH (op) == 0
894 && (CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
897 /* Return true if OP is a constant 4096 for DImode on ARCH64 */
900 arith_double_4096_operand (op, mode)
901 rtx op;
902 enum machine_mode mode ATTRIBUTE_UNUSED;
904 return (TARGET_ARCH64 &&
905 ((GET_CODE (op) == CONST_INT && INTVAL (op) == 4096) ||
906 (GET_CODE (op) == CONST_DOUBLE &&
907 CONST_DOUBLE_LOW (op) == 4096 &&
908 CONST_DOUBLE_HIGH (op) == 0)));
911 /* Return true if OP is suitable as second operand for add/sub in DImode */
914 arith_double_add_operand (op, mode)
915 rtx op;
916 enum machine_mode mode;
918 return arith_double_operand (op, mode) || arith_double_4096_operand (op, mode);
921 /* Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
922 can fit in an 11 bit immediate field. This is an acceptable DImode
923 operand for the movcc instructions. */
924 /* ??? Replace with arith11_operand? */
927 arith11_double_operand (op, mode)
928 rtx op;
929 enum machine_mode mode;
931 return (register_operand (op, mode)
932 || (GET_CODE (op) == CONST_DOUBLE
933 && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
934 && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x400) < 0x800
935 && ((CONST_DOUBLE_HIGH (op) == -1
936 && (CONST_DOUBLE_LOW (op) & 0x400) == 0x400)
937 || (CONST_DOUBLE_HIGH (op) == 0
938 && (CONST_DOUBLE_LOW (op) & 0x400) == 0)))
939 || (GET_CODE (op) == CONST_INT
940 && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
941 && (unsigned HOST_WIDE_INT) (INTVAL (op) + 0x400) < 0x800));
944 /* Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
945 can fit in an 10 bit immediate field. This is an acceptable DImode
946 operand for the movrcc instructions. */
947 /* ??? Replace with arith10_operand? */
950 arith10_double_operand (op, mode)
951 rtx op;
952 enum machine_mode mode;
954 return (register_operand (op, mode)
955 || (GET_CODE (op) == CONST_DOUBLE
956 && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
957 && (unsigned) (CONST_DOUBLE_LOW (op) + 0x200) < 0x400
958 && ((CONST_DOUBLE_HIGH (op) == -1
959 && (CONST_DOUBLE_LOW (op) & 0x200) == 0x200)
960 || (CONST_DOUBLE_HIGH (op) == 0
961 && (CONST_DOUBLE_LOW (op) & 0x200) == 0)))
962 || (GET_CODE (op) == CONST_INT
963 && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
964 && (unsigned HOST_WIDE_INT) (INTVAL (op) + 0x200) < 0x400));
967 /* Return truth value of whether OP is a integer which fits the
968 range constraining immediate operands in most three-address insns,
969 which have a 13 bit immediate field. */
972 small_int (op, mode)
973 rtx op;
974 enum machine_mode mode ATTRIBUTE_UNUSED;
976 return (GET_CODE (op) == CONST_INT && SMALL_INT (op));
980 small_int_or_double (op, mode)
981 rtx op;
982 enum machine_mode mode ATTRIBUTE_UNUSED;
984 return ((GET_CODE (op) == CONST_INT && SMALL_INT (op))
985 || (GET_CODE (op) == CONST_DOUBLE
986 && CONST_DOUBLE_HIGH (op) == 0
987 && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))));
990 /* Recognize operand values for the umul instruction. That instruction sign
991 extends immediate values just like all other sparc instructions, but
992 interprets the extended result as an unsigned number. */
995 uns_small_int (op, mode)
996 rtx op;
997 enum machine_mode mode ATTRIBUTE_UNUSED;
999 #if HOST_BITS_PER_WIDE_INT > 32
1000 /* All allowed constants will fit a CONST_INT. */
1001 return (GET_CODE (op) == CONST_INT
1002 && ((INTVAL (op) >= 0 && INTVAL (op) < 0x1000)
1003 || (INTVAL (op) >= 0xFFFFF000
1004 && INTVAL (op) < 0x100000000)));
1005 #else
1006 return ((GET_CODE (op) == CONST_INT && (unsigned) INTVAL (op) < 0x1000)
1007 || (GET_CODE (op) == CONST_DOUBLE
1008 && CONST_DOUBLE_HIGH (op) == 0
1009 && (unsigned) CONST_DOUBLE_LOW (op) - 0xFFFFF000 < 0x1000));
1010 #endif
1014 uns_arith_operand (op, mode)
1015 rtx op;
1016 enum machine_mode mode;
1018 return register_operand (op, mode) || uns_small_int (op, mode);
1021 /* Return truth value of statement that OP is a call-clobbered register. */
1023 clobbered_register (op, mode)
1024 rtx op;
1025 enum machine_mode mode ATTRIBUTE_UNUSED;
1027 return (GET_CODE (op) == REG && call_used_regs[REGNO (op)]);
1030 /* Return 1 if OP is const0_rtx, used for TARGET_LIVE_G0 insns. */
1033 zero_operand (op, mode)
1034 rtx op;
1035 enum machine_mode mode ATTRIBUTE_UNUSED;
1037 return op == const0_rtx;
1040 /* Return 1 if OP is a valid operand for the source of a move insn. */
1043 input_operand (op, mode)
1044 rtx op;
1045 enum machine_mode mode;
1047 /* If both modes are non-void they must be the same. */
1048 if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
1049 return 0;
1051 /* Only a tiny bit of handling for CONSTANT_P_RTX is necessary. */
1052 if (GET_CODE (op) == CONST && GET_CODE (XEXP (op, 0)) == CONSTANT_P_RTX)
1053 return 1;
1055 /* Allow any one instruction integer constant, and all CONST_INT
1056 variants when we are working in DImode and !arch64. */
1057 if (GET_MODE_CLASS (mode) == MODE_INT
1058 && ((GET_CODE (op) == CONST_INT
1059 && ((SPARC_SETHI_P (INTVAL (op))
1060 && (! TARGET_ARCH64
1061 || (INTVAL (op) >= 0)
1062 || mode == SImode))
1063 || SPARC_SIMM13_P (INTVAL (op))
1064 || (mode == DImode
1065 && ! TARGET_ARCH64)))
1066 || (TARGET_ARCH64
1067 && GET_CODE (op) == CONST_DOUBLE
1068 && ((CONST_DOUBLE_HIGH (op) == 0
1069 && SPARC_SETHI_P (CONST_DOUBLE_LOW (op)))
1071 #if HOST_BITS_PER_WIDE_INT == 64
1072 (CONST_DOUBLE_HIGH (op) == 0
1073 && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op)))
1074 #else
1075 (SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))
1076 && (((CONST_DOUBLE_LOW (op) & 0x80000000) == 0
1077 && CONST_DOUBLE_HIGH (op) == 0)
1078 || (CONST_DOUBLE_HIGH (op) == -1)))
1079 #endif
1080 ))))
1081 return 1;
1083 /* If !arch64 and this is a DImode const, allow it so that
1084 the splits can be generated. */
1085 if (! TARGET_ARCH64
1086 && mode == DImode
1087 && GET_CODE (op) == CONST_DOUBLE)
1088 return 1;
1090 if (register_operand (op, mode))
1091 return 1;
1093 /* If this is a SUBREG, look inside so that we handle
1094 paradoxical ones. */
1095 if (GET_CODE (op) == SUBREG)
1096 op = SUBREG_REG (op);
1098 /* Check for valid MEM forms. */
1099 if (GET_CODE (op) == MEM)
1101 rtx inside = XEXP (op, 0);
1103 if (GET_CODE (inside) == LO_SUM)
1105 /* We can't allow these because all of the splits
1106 (eventually as they trickle down into DFmode
1107 splits) require offsettable memory references. */
1108 if (! TARGET_V9
1109 && GET_MODE (op) == TFmode)
1110 return 0;
1112 return (register_operand (XEXP (inside, 0), Pmode)
1113 && CONSTANT_P (XEXP (inside, 1)));
1115 return memory_address_p (mode, inside);
1118 return 0;
1122 /* We know it can't be done in one insn when we get here,
1123 the movsi expander guarentees this. */
1124 void
1125 sparc_emit_set_const32 (op0, op1)
1126 rtx op0;
1127 rtx op1;
1129 enum machine_mode mode = GET_MODE (op0);
1130 rtx temp;
1132 if (GET_CODE (op1) == CONST_INT)
1134 HOST_WIDE_INT value = INTVAL (op1);
1136 if (SPARC_SETHI_P (value)
1137 || SPARC_SIMM13_P (value))
1138 abort ();
1141 /* Full 2-insn decomposition is needed. */
1142 if (reload_in_progress || reload_completed)
1143 temp = op0;
1144 else
1145 temp = gen_reg_rtx (mode);
1147 if (GET_CODE (op1) == CONST_INT)
1149 /* Emit them as real moves instead of a HIGH/LO_SUM,
1150 this way CSE can see everything and reuse intermediate
1151 values if it wants. */
1152 if (TARGET_ARCH64
1153 && HOST_BITS_PER_WIDE_INT != 64
1154 && (INTVAL (op1) & 0x80000000) != 0)
1156 emit_insn (gen_rtx_SET (VOIDmode,
1157 temp,
1158 gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx,
1159 INTVAL (op1) & 0xfffffc00, 0)));
1161 else
1163 emit_insn (gen_rtx_SET (VOIDmode,
1164 temp,
1165 GEN_INT (INTVAL (op1) & 0xfffffc00)));
1167 emit_insn (gen_rtx_SET (VOIDmode,
1168 op0,
1169 gen_rtx_IOR (mode,
1170 temp,
1171 GEN_INT (INTVAL (op1) & 0x3ff))));
1173 else
1175 /* A symbol, emit in the traditional way. */
1176 emit_insn (gen_rtx_SET (VOIDmode,
1177 temp,
1178 gen_rtx_HIGH (mode,
1179 op1)));
1180 emit_insn (gen_rtx_SET (VOIDmode,
1181 op0,
1182 gen_rtx_LO_SUM (mode,
1183 temp,
1184 op1)));
1190 /* Sparc-v9 code-model support. */
1191 void
1192 sparc_emit_set_symbolic_const64 (op0, op1, temp1)
1193 rtx op0;
1194 rtx op1;
1195 rtx temp1;
1197 switch (sparc_cmodel)
1199 case CM_MEDLOW:
1200 /* The range spanned by all instructions in the object is less
1201 than 2^31 bytes (2GB) and the distance from any instruction
1202 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1203 than 2^31 bytes (2GB).
1205 The executable must be in the low 4TB of the virtual address
1206 space.
1208 sethi %hi(symbol), %temp
1209 or %temp, %lo(symbol), %reg */
1210 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1211 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1212 break;
1214 case CM_MEDMID:
1215 /* The range spanned by all instructions in the object is less
1216 than 2^31 bytes (2GB) and the distance from any instruction
1217 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1218 than 2^31 bytes (2GB).
1220 The executable must be in the low 16TB of the virtual address
1221 space.
1223 sethi %h44(symbol), %temp1
1224 or %temp1, %m44(symbol), %temp2
1225 sllx %temp2, 12, %temp3
1226 or %temp3, %l44(symbol), %reg */
1227 emit_insn (gen_seth44 (op0, op1));
1228 emit_insn (gen_setm44 (op0, op0, op1));
1229 emit_insn (gen_rtx_SET (VOIDmode, temp1,
1230 gen_rtx_ASHIFT (DImode, op0, GEN_INT (12))));
1231 emit_insn (gen_setl44 (op0, temp1, op1));
1232 break;
1234 case CM_MEDANY:
1235 /* The range spanned by all instructions in the object is less
1236 than 2^31 bytes (2GB) and the distance from any instruction
1237 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1238 than 2^31 bytes (2GB).
1240 The executable can be placed anywhere in the virtual address
1241 space.
1243 sethi %hh(symbol), %temp1
1244 sethi %lm(symbol), %temp2
1245 or %temp1, %hm(symbol), %temp3
1246 or %temp2, %lo(symbol), %temp4
1247 sllx %temp3, 32, %temp5
1248 or %temp4, %temp5, %reg */
1250 /* Getting this right wrt. reloading is really tricky.
1251 We _MUST_ have a separate temporary at this point,
1252 if we don't barf immediately instead of generating
1253 incorrect code. */
1254 if (temp1 == op0)
1255 abort ();
1257 emit_insn (gen_sethh (op0, op1));
1258 emit_insn (gen_setlm (temp1, op1));
1259 emit_insn (gen_sethm (op0, op0, op1));
1260 emit_insn (gen_rtx_SET (VOIDmode, op0,
1261 gen_rtx_ASHIFT (DImode, op0, GEN_INT (32))));
1262 emit_insn (gen_rtx_SET (VOIDmode, op0,
1263 gen_rtx_PLUS (DImode, op0, temp1)));
1264 emit_insn (gen_setlo (op0, op0, op1));
1265 break;
1267 case CM_EMBMEDANY:
1268 /* Old old old backwards compatibility kruft here.
1269 Essentially it is MEDLOW with a fixed 64-bit
1270 virtual base added to all data segment addresses.
1271 Text-segment stuff is computed like MEDANY, we can't
1272 reuse the code above because the relocation knobs
1273 look different.
1275 Data segment: sethi %hi(symbol), %temp1
1276 or %temp1, %lo(symbol), %temp2
1277 add %temp2, EMBMEDANY_BASE_REG, %reg
1279 Text segment: sethi %uhi(symbol), %temp1
1280 sethi %hi(symbol), %temp2
1281 or %temp1, %ulo(symbol), %temp3
1282 or %temp2, %lo(symbol), %temp4
1283 sllx %temp3, 32, %temp5
1284 or %temp4, %temp5, %reg */
1285 if (data_segment_operand (op1, GET_MODE (op1)))
1287 emit_insn (gen_embmedany_sethi (temp1, op1));
1288 emit_insn (gen_embmedany_brsum (op0, temp1));
1289 emit_insn (gen_embmedany_losum (op0, op0, op1));
1291 else
1293 /* Getting this right wrt. reloading is really tricky.
1294 We _MUST_ have a separate temporary at this point,
1295 so we barf immediately instead of generating
1296 incorrect code. */
1297 if (temp1 == op0)
1298 abort ();
1300 emit_insn (gen_embmedany_textuhi (op0, op1));
1301 emit_insn (gen_embmedany_texthi (temp1, op1));
1302 emit_insn (gen_embmedany_textulo (op0, op0, op1));
1303 emit_insn (gen_rtx_SET (VOIDmode, op0,
1304 gen_rtx_ASHIFT (DImode, op0, GEN_INT (32))));
1305 emit_insn (gen_rtx_SET (VOIDmode, op0,
1306 gen_rtx_PLUS (DImode, op0, temp1)));
1307 emit_insn (gen_embmedany_textlo (op0, op0, op1));
1309 break;
1311 default:
1312 abort();
1316 /* These avoid problems when cross compiling. If we do not
1317 go through all this hair then the optimizer will see
1318 invalid REG_EQUAL notes or in some cases none at all. */
1319 static void sparc_emit_set_safe_HIGH64 PROTO ((rtx, HOST_WIDE_INT));
1320 static rtx gen_safe_SET64 PROTO ((rtx, HOST_WIDE_INT));
1321 static rtx gen_safe_OR64 PROTO ((rtx, HOST_WIDE_INT));
1322 static rtx gen_safe_XOR64 PROTO ((rtx, HOST_WIDE_INT));
1324 #if HOST_BITS_PER_WIDE_INT == 64
1325 #define GEN_HIGHINT64(__x) GEN_INT ((__x) & 0xfffffc00)
1326 #define GEN_INT64(__x) GEN_INT (__x)
1327 #else
1328 #define GEN_HIGHINT64(__x) \
1329 gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx, \
1330 (__x) & 0xfffffc00, 0)
1331 #define GEN_INT64(__x) \
1332 gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx, \
1333 (__x) & 0xffffffff, \
1334 ((__x) & 0x80000000 \
1335 ? 0xffffffff : 0))
1336 #endif
1338 /* The optimizer is not to assume anything about exactly
1339 which bits are set for a HIGH, they are unspecified.
1340 Unfortunately this leads to many missed optimizations
1341 during CSE. We mask out the non-HIGH bits, and matches
1342 a plain movdi, to alleviate this problem. */
1343 static void
1344 sparc_emit_set_safe_HIGH64 (dest, val)
1345 rtx dest;
1346 HOST_WIDE_INT val;
1348 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_HIGHINT64 (val)));
1351 static rtx
1352 gen_safe_SET64 (dest, val)
1353 rtx dest;
1354 HOST_WIDE_INT val;
1356 return gen_rtx_SET (VOIDmode, dest, GEN_INT64 (val));
1359 static rtx
1360 gen_safe_OR64 (src, val)
1361 rtx src;
1362 HOST_WIDE_INT val;
1364 return gen_rtx_IOR (DImode, src, GEN_INT64 (val));
1367 static rtx
1368 gen_safe_XOR64 (src, val)
1369 rtx src;
1370 HOST_WIDE_INT val;
1372 return gen_rtx_XOR (DImode, src, GEN_INT64 (val));
1375 /* Worker routines for 64-bit constant formation on arch64.
1376 One of the key things to be doing in these emissions is
1377 to create as many temp REGs as possible. This makes it
1378 possible for half-built constants to be used later when
1379 such values are similar to something required later on.
1380 Without doing this, the optimizer cannot see such
1381 opportunities. */
1383 static void sparc_emit_set_const64_quick1
1384 PROTO((rtx, rtx, unsigned HOST_WIDE_INT, int));
1386 static void
1387 sparc_emit_set_const64_quick1 (op0, temp, low_bits, is_neg)
1388 rtx op0;
1389 rtx temp;
1390 unsigned HOST_WIDE_INT low_bits;
1391 int is_neg;
1393 unsigned HOST_WIDE_INT high_bits;
1395 if (is_neg)
1396 high_bits = (~low_bits) & 0xffffffff;
1397 else
1398 high_bits = low_bits;
1400 sparc_emit_set_safe_HIGH64 (temp, high_bits);
1401 if (!is_neg)
1403 emit_insn (gen_rtx_SET (VOIDmode, op0,
1404 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1406 else
1408 /* If we are XOR'ing with -1, then we should emit a one's complement
1409 instead. This way the combiner will notice logical operations
1410 such as ANDN later on and substitute. */
1411 if ((low_bits & 0x3ff) == 0x3ff)
1413 emit_insn (gen_rtx_SET (VOIDmode, op0,
1414 gen_rtx_NOT (DImode, temp)));
1416 else
1418 emit_insn (gen_rtx_SET (VOIDmode, op0,
1419 gen_safe_XOR64 (temp,
1420 (-0x400 | (low_bits & 0x3ff)))));
1425 static void sparc_emit_set_const64_quick2
1426 PROTO((rtx, rtx, unsigned HOST_WIDE_INT,
1427 unsigned HOST_WIDE_INT, int));
1429 static void
1430 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_immediate, shift_count)
1431 rtx op0;
1432 rtx temp;
1433 unsigned HOST_WIDE_INT high_bits;
1434 unsigned HOST_WIDE_INT low_immediate;
1435 int shift_count;
1437 rtx temp2 = op0;
1439 if ((high_bits & 0xfffffc00) != 0)
1441 sparc_emit_set_safe_HIGH64 (temp, high_bits);
1442 if ((high_bits & ~0xfffffc00) != 0)
1443 emit_insn (gen_rtx_SET (VOIDmode, op0,
1444 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1445 else
1446 temp2 = temp;
1448 else
1450 emit_insn (gen_safe_SET64 (temp, high_bits));
1451 temp2 = temp;
1454 /* Now shift it up into place. */
1455 emit_insn (gen_rtx_SET (VOIDmode, op0,
1456 gen_rtx_ASHIFT (DImode, temp2,
1457 GEN_INT (shift_count))));
1459 /* If there is a low immediate part piece, finish up by
1460 putting that in as well. */
1461 if (low_immediate != 0)
1462 emit_insn (gen_rtx_SET (VOIDmode, op0,
1463 gen_safe_OR64 (op0, low_immediate)));
1466 static void sparc_emit_set_const64_longway
1467 PROTO((rtx, rtx, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
1469 /* Full 64-bit constant decomposition. Even though this is the
1470 'worst' case, we still optimize a few things away. */
1471 static void
1472 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits)
1473 rtx op0;
1474 rtx temp;
1475 unsigned HOST_WIDE_INT high_bits;
1476 unsigned HOST_WIDE_INT low_bits;
1478 rtx sub_temp;
1480 if (reload_in_progress || reload_completed)
1481 sub_temp = op0;
1482 else
1483 sub_temp = gen_reg_rtx (DImode);
1485 if ((high_bits & 0xfffffc00) != 0)
1487 sparc_emit_set_safe_HIGH64 (temp, high_bits);
1488 if ((high_bits & ~0xfffffc00) != 0)
1489 emit_insn (gen_rtx_SET (VOIDmode,
1490 sub_temp,
1491 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1492 else
1493 sub_temp = temp;
1495 else
1497 emit_insn (gen_safe_SET64 (temp, high_bits));
1498 sub_temp = temp;
1501 if (!reload_in_progress && !reload_completed)
1503 rtx temp2 = gen_reg_rtx (DImode);
1504 rtx temp3 = gen_reg_rtx (DImode);
1505 rtx temp4 = gen_reg_rtx (DImode);
1507 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1508 gen_rtx_ASHIFT (DImode, sub_temp,
1509 GEN_INT (32))));
1511 sparc_emit_set_safe_HIGH64 (temp2, low_bits);
1512 if ((low_bits & ~0xfffffc00) != 0)
1514 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1515 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
1516 emit_insn (gen_rtx_SET (VOIDmode, op0,
1517 gen_rtx_PLUS (DImode, temp4, temp3)));
1519 else
1521 emit_insn (gen_rtx_SET (VOIDmode, op0,
1522 gen_rtx_PLUS (DImode, temp4, temp2)));
1525 else
1527 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
1528 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
1529 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
1530 int to_shift = 12;
1532 /* We are in the middle of reload, so this is really
1533 painful. However we do still make an attempt to
1534 avoid emitting truly stupid code. */
1535 if (low1 != const0_rtx)
1537 emit_insn (gen_rtx_SET (VOIDmode, op0,
1538 gen_rtx_ASHIFT (DImode, sub_temp,
1539 GEN_INT (to_shift))));
1540 emit_insn (gen_rtx_SET (VOIDmode, op0,
1541 gen_rtx_IOR (DImode, op0, low1)));
1542 sub_temp = op0;
1543 to_shift = 12;
1545 else
1547 to_shift += 12;
1549 if (low2 != const0_rtx)
1551 emit_insn (gen_rtx_SET (VOIDmode, op0,
1552 gen_rtx_ASHIFT (DImode, sub_temp,
1553 GEN_INT (to_shift))));
1554 emit_insn (gen_rtx_SET (VOIDmode, op0,
1555 gen_rtx_IOR (DImode, op0, low2)));
1556 sub_temp = op0;
1557 to_shift = 8;
1559 else
1561 to_shift += 8;
1563 emit_insn (gen_rtx_SET (VOIDmode, op0,
1564 gen_rtx_ASHIFT (DImode, sub_temp,
1565 GEN_INT (to_shift))));
1566 if (low3 != const0_rtx)
1567 emit_insn (gen_rtx_SET (VOIDmode, op0,
1568 gen_rtx_IOR (DImode, op0, low3)));
1569 /* phew... */
1573 /* Analyze a 64-bit constant for certain properties. */
1574 static void analyze_64bit_constant
1575 PROTO((unsigned HOST_WIDE_INT,
1576 unsigned HOST_WIDE_INT,
1577 int *, int *, int *));
1579 static void
1580 analyze_64bit_constant (high_bits, low_bits, hbsp, lbsp, abbasp)
1581 unsigned HOST_WIDE_INT high_bits, low_bits;
1582 int *hbsp, *lbsp, *abbasp;
1584 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
1585 int i;
1587 lowest_bit_set = highest_bit_set = -1;
1588 i = 0;
1591 if ((lowest_bit_set == -1)
1592 && ((low_bits >> i) & 1))
1593 lowest_bit_set = i;
1594 if ((highest_bit_set == -1)
1595 && ((high_bits >> (32 - i - 1)) & 1))
1596 highest_bit_set = (64 - i - 1);
1598 while (++i < 32
1599 && ((highest_bit_set == -1)
1600 || (lowest_bit_set == -1)));
1601 if (i == 32)
1603 i = 0;
1606 if ((lowest_bit_set == -1)
1607 && ((high_bits >> i) & 1))
1608 lowest_bit_set = i + 32;
1609 if ((highest_bit_set == -1)
1610 && ((low_bits >> (32 - i - 1)) & 1))
1611 highest_bit_set = 32 - i - 1;
1613 while (++i < 32
1614 && ((highest_bit_set == -1)
1615 || (lowest_bit_set == -1)));
1617 /* If there are no bits set this should have gone out
1618 as one instruction! */
1619 if (lowest_bit_set == -1
1620 || highest_bit_set == -1)
1621 abort ();
1622 all_bits_between_are_set = 1;
1623 for (i = lowest_bit_set; i <= highest_bit_set; i++)
1625 if (i < 32)
1627 if ((low_bits & (1 << i)) != 0)
1628 continue;
1630 else
1632 if ((high_bits & (1 << (i - 32))) != 0)
1633 continue;
1635 all_bits_between_are_set = 0;
1636 break;
1638 *hbsp = highest_bit_set;
1639 *lbsp = lowest_bit_set;
1640 *abbasp = all_bits_between_are_set;
1643 static int const64_is_2insns
1644 PROTO((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
1646 static int
1647 const64_is_2insns (high_bits, low_bits)
1648 unsigned HOST_WIDE_INT high_bits, low_bits;
1650 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
1652 if (high_bits == 0
1653 || high_bits == 0xffffffff)
1654 return 1;
1656 analyze_64bit_constant (high_bits, low_bits,
1657 &highest_bit_set, &lowest_bit_set,
1658 &all_bits_between_are_set);
1660 if ((highest_bit_set == 63
1661 || lowest_bit_set == 0)
1662 && all_bits_between_are_set != 0)
1663 return 1;
1665 if ((highest_bit_set - lowest_bit_set) < 21)
1666 return 1;
1668 return 0;
1671 static unsigned HOST_WIDE_INT create_simple_focus_bits
1672 PROTO((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1673 int, int));
1675 static unsigned HOST_WIDE_INT
1676 create_simple_focus_bits (high_bits, low_bits, lowest_bit_set, shift)
1677 unsigned HOST_WIDE_INT high_bits, low_bits;
1678 int lowest_bit_set, shift;
1680 HOST_WIDE_INT hi, lo;
1682 if (lowest_bit_set < 32)
1684 lo = (low_bits >> lowest_bit_set) << shift;
1685 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
1687 else
1689 lo = 0;
1690 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
1692 if (hi & lo)
1693 abort ();
1694 return (hi | lo);
1697 /* Here we are sure to be arch64 and this is an integer constant
1698 being loaded into a register. Emit the most efficient
1699 insn sequence possible. Detection of all the 1-insn cases
1700 has been done already. */
1701 void
1702 sparc_emit_set_const64 (op0, op1)
1703 rtx op0;
1704 rtx op1;
1706 unsigned HOST_WIDE_INT high_bits, low_bits;
1707 int lowest_bit_set, highest_bit_set;
1708 int all_bits_between_are_set;
1709 rtx temp;
1711 /* Sanity check that we know what we are working with. */
1712 if (! TARGET_ARCH64
1713 || GET_CODE (op0) != REG
1714 || (REGNO (op0) >= SPARC_FIRST_FP_REG
1715 && REGNO (op0) <= SPARC_LAST_V9_FP_REG))
1716 abort ();
1718 if (reload_in_progress || reload_completed)
1719 temp = op0;
1720 else
1721 temp = gen_reg_rtx (DImode);
1723 if (GET_CODE (op1) != CONST_DOUBLE
1724 && GET_CODE (op1) != CONST_INT)
1726 sparc_emit_set_symbolic_const64 (op0, op1, temp);
1727 return;
1730 if (GET_CODE (op1) == CONST_DOUBLE)
1732 #if HOST_BITS_PER_WIDE_INT == 64
1733 high_bits = (CONST_DOUBLE_LOW (op1) >> 32) & 0xffffffff;
1734 low_bits = CONST_DOUBLE_LOW (op1) & 0xffffffff;
1735 #else
1736 high_bits = CONST_DOUBLE_HIGH (op1);
1737 low_bits = CONST_DOUBLE_LOW (op1);
1738 #endif
1740 else
1742 #if HOST_BITS_PER_WIDE_INT == 64
1743 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
1744 low_bits = (INTVAL (op1) & 0xffffffff);
1745 #else
1746 high_bits = ((INTVAL (op1) < 0) ?
1747 0xffffffff :
1748 0x00000000);
1749 low_bits = INTVAL (op1);
1750 #endif
1753 /* low_bits bits 0 --> 31
1754 high_bits bits 32 --> 63 */
1756 analyze_64bit_constant (high_bits, low_bits,
1757 &highest_bit_set, &lowest_bit_set,
1758 &all_bits_between_are_set);
1760 /* First try for a 2-insn sequence. */
1762 /* These situations are preferred because the optimizer can
1763 * do more things with them:
1764 * 1) mov -1, %reg
1765 * sllx %reg, shift, %reg
1766 * 2) mov -1, %reg
1767 * srlx %reg, shift, %reg
1768 * 3) mov some_small_const, %reg
1769 * sllx %reg, shift, %reg
1771 if (((highest_bit_set == 63
1772 || lowest_bit_set == 0)
1773 && all_bits_between_are_set != 0)
1774 || ((highest_bit_set - lowest_bit_set) < 12))
1776 HOST_WIDE_INT the_const = -1;
1777 int shift = lowest_bit_set;
1779 if ((highest_bit_set != 63
1780 && lowest_bit_set != 0)
1781 || all_bits_between_are_set == 0)
1783 the_const =
1784 create_simple_focus_bits (high_bits, low_bits,
1785 lowest_bit_set, 0);
1787 else if (lowest_bit_set == 0)
1788 shift = -(63 - highest_bit_set);
1790 if (! SPARC_SIMM13_P (the_const))
1791 abort ();
1793 emit_insn (gen_safe_SET64 (temp, the_const));
1794 if (shift > 0)
1795 emit_insn (gen_rtx_SET (VOIDmode,
1796 op0,
1797 gen_rtx_ASHIFT (DImode,
1798 temp,
1799 GEN_INT (shift))));
1800 else if (shift < 0)
1801 emit_insn (gen_rtx_SET (VOIDmode,
1802 op0,
1803 gen_rtx_LSHIFTRT (DImode,
1804 temp,
1805 GEN_INT (-shift))));
1806 else
1807 abort ();
1808 return;
1811 /* Now a range of 22 or less bits set somewhere.
1812 * 1) sethi %hi(focus_bits), %reg
1813 * sllx %reg, shift, %reg
1814 * 2) sethi %hi(focus_bits), %reg
1815 * srlx %reg, shift, %reg
1817 if ((highest_bit_set - lowest_bit_set) < 21)
1819 unsigned HOST_WIDE_INT focus_bits =
1820 create_simple_focus_bits (high_bits, low_bits,
1821 lowest_bit_set, 10);
1823 if (! SPARC_SETHI_P (focus_bits))
1824 abort ();
1826 sparc_emit_set_safe_HIGH64 (temp, focus_bits);
1828 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
1829 if (lowest_bit_set < 10)
1830 emit_insn (gen_rtx_SET (VOIDmode,
1831 op0,
1832 gen_rtx_LSHIFTRT (DImode, temp,
1833 GEN_INT (10 - lowest_bit_set))));
1834 else if (lowest_bit_set > 10)
1835 emit_insn (gen_rtx_SET (VOIDmode,
1836 op0,
1837 gen_rtx_ASHIFT (DImode, temp,
1838 GEN_INT (lowest_bit_set - 10))));
1839 else
1840 abort ();
1841 return;
1844 /* 1) sethi %hi(low_bits), %reg
1845 * or %reg, %lo(low_bits), %reg
1846 * 2) sethi %hi(~low_bits), %reg
1847 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
1849 if (high_bits == 0
1850 || high_bits == 0xffffffff)
1852 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
1853 (high_bits == 0xffffffff));
1854 return;
1857 /* Now, try 3-insn sequences. */
1859 /* 1) sethi %hi(high_bits), %reg
1860 * or %reg, %lo(high_bits), %reg
1861 * sllx %reg, 32, %reg
1863 if (low_bits == 0)
1865 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
1866 return;
1869 /* We may be able to do something quick
1870 when the constant is negated, so try that. */
1871 if (const64_is_2insns ((~high_bits) & 0xffffffff,
1872 (~low_bits) & 0xfffffc00))
1874 /* NOTE: The trailing bits get XOR'd so we need the
1875 non-negated bits, not the negated ones. */
1876 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
1878 if ((((~high_bits) & 0xffffffff) == 0
1879 && ((~low_bits) & 0x80000000) == 0)
1880 || (((~high_bits) & 0xffffffff) == 0xffffffff
1881 && ((~low_bits) & 0x80000000) != 0))
1883 int fast_int = (~low_bits & 0xffffffff);
1885 if ((SPARC_SETHI_P (fast_int)
1886 && (~high_bits & 0xffffffff) == 0)
1887 || SPARC_SIMM13_P (fast_int))
1888 emit_insn (gen_safe_SET64 (temp, fast_int));
1889 else
1890 sparc_emit_set_const64 (temp, GEN_INT64 (fast_int));
1892 else
1894 rtx negated_const;
1895 #if HOST_BITS_PER_WIDE_INT == 64
1896 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
1897 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
1898 #else
1899 negated_const = gen_rtx_CONST_DOUBLE (DImode, const0_rtx,
1900 (~low_bits) & 0xfffffc00,
1901 (~high_bits) & 0xffffffff);
1902 #endif
1903 sparc_emit_set_const64 (temp, negated_const);
1906 /* If we are XOR'ing with -1, then we should emit a one's complement
1907 instead. This way the combiner will notice logical operations
1908 such as ANDN later on and substitute. */
1909 if (trailing_bits == 0x3ff)
1911 emit_insn (gen_rtx_SET (VOIDmode, op0,
1912 gen_rtx_NOT (DImode, temp)));
1914 else
1916 emit_insn (gen_rtx_SET (VOIDmode,
1917 op0,
1918 gen_safe_XOR64 (temp,
1919 (-0x400 | trailing_bits))));
1921 return;
1924 /* 1) sethi %hi(xxx), %reg
1925 * or %reg, %lo(xxx), %reg
1926 * sllx %reg, yyy, %reg
1928 * ??? This is just a generalized version of the low_bits==0
1929 * thing above, FIXME...
1931 if ((highest_bit_set - lowest_bit_set) < 32)
1933 unsigned HOST_WIDE_INT focus_bits =
1934 create_simple_focus_bits (high_bits, low_bits,
1935 lowest_bit_set, 0);
1937 /* We can't get here in this state. */
1938 if (highest_bit_set < 32
1939 || lowest_bit_set >= 32)
1940 abort ();
1942 /* So what we know is that the set bits straddle the
1943 middle of the 64-bit word. */
1944 sparc_emit_set_const64_quick2 (op0, temp,
1945 focus_bits, 0,
1946 lowest_bit_set);
1947 return;
1950 /* 1) sethi %hi(high_bits), %reg
1951 * or %reg, %lo(high_bits), %reg
1952 * sllx %reg, 32, %reg
1953 * or %reg, low_bits, %reg
1955 if (SPARC_SIMM13_P(low_bits)
1956 && ((int)low_bits > 0))
1958 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
1959 return;
1962 /* The easiest way when all else fails, is full decomposition. */
1963 #if 0
1964 printf ("sparc_emit_set_const64: Hard constant [%08lx%08lx] neg[%08lx%08lx]\n",
1965 high_bits, low_bits, ~high_bits, ~low_bits);
1966 #endif
1967 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
1970 /* X and Y are two things to compare using CODE. Emit the compare insn and
1971 return the rtx for the cc reg in the proper mode. */
1974 gen_compare_reg (code, x, y)
1975 enum rtx_code code;
1976 rtx x, y;
1978 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
1979 rtx cc_reg;
1981 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
1982 fcc regs (cse can't tell they're really call clobbered regs and will
1983 remove a duplicate comparison even if there is an intervening function
1984 call - it will then try to reload the cc reg via an int reg which is why
1985 we need the movcc patterns). It is possible to provide the movcc
1986 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
1987 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
1988 to tell cse that CCFPE mode registers (even pseudos) are call
1989 clobbered. */
1991 /* ??? This is an experiment. Rather than making changes to cse which may
1992 or may not be easy/clean, we do our own cse. This is possible because
1993 we will generate hard registers. Cse knows they're call clobbered (it
1994 doesn't know the same thing about pseudos). If we guess wrong, no big
1995 deal, but if we win, great! */
1997 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1998 #if 1 /* experiment */
2000 int reg;
2001 /* We cycle through the registers to ensure they're all exercised. */
2002 static int next_fcc_reg = 0;
2003 /* Previous x,y for each fcc reg. */
2004 static rtx prev_args[4][2];
2006 /* Scan prev_args for x,y. */
2007 for (reg = 0; reg < 4; reg++)
2008 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2009 break;
2010 if (reg == 4)
2012 reg = next_fcc_reg;
2013 prev_args[reg][0] = x;
2014 prev_args[reg][1] = y;
2015 next_fcc_reg = (next_fcc_reg + 1) & 3;
2017 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2019 #else
2020 cc_reg = gen_reg_rtx (mode);
2021 #endif /* ! experiment */
2022 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2023 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2024 else
2025 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2027 emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
2028 gen_rtx_COMPARE (mode, x, y)));
2030 return cc_reg;
2033 /* This function is used for v9 only.
2034 CODE is the code for an Scc's comparison.
2035 OPERANDS[0] is the target of the Scc insn.
2036 OPERANDS[1] is the value we compare against const0_rtx (which hasn't
2037 been generated yet).
2039 This function is needed to turn
2041 (set (reg:SI 110)
2042 (gt (reg:CCX 100 %icc)
2043 (const_int 0)))
2044 into
2045 (set (reg:SI 110)
2046 (gt:DI (reg:CCX 100 %icc)
2047 (const_int 0)))
2049 IE: The instruction recognizer needs to see the mode of the comparison to
2050 find the right instruction. We could use "gt:DI" right in the
2051 define_expand, but leaving it out allows us to handle DI, SI, etc.
2053 We refer to the global sparc compare operands sparc_compare_op0 and
2054 sparc_compare_op1. */
2057 gen_v9_scc (compare_code, operands)
2058 enum rtx_code compare_code;
2059 register rtx *operands;
2061 rtx temp, op0, op1;
2063 if (! TARGET_ARCH64
2064 && (GET_MODE (sparc_compare_op0) == DImode
2065 || GET_MODE (operands[0]) == DImode))
2066 return 0;
2068 /* Handle the case where operands[0] == sparc_compare_op0.
2069 We "early clobber" the result. */
2070 if (REGNO (operands[0]) == REGNO (sparc_compare_op0))
2072 op0 = gen_reg_rtx (GET_MODE (sparc_compare_op0));
2073 emit_move_insn (op0, sparc_compare_op0);
2075 else
2076 op0 = sparc_compare_op0;
2077 /* For consistency in the following. */
2078 op1 = sparc_compare_op1;
2080 /* Try to use the movrCC insns. */
2081 if (TARGET_ARCH64
2082 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
2083 && op1 == const0_rtx
2084 && v9_regcmp_p (compare_code))
2086 /* Special case for op0 != 0. This can be done with one instruction if
2087 operands[0] == sparc_compare_op0. We don't assume they are equal
2088 now though. */
2090 if (compare_code == NE
2091 && GET_MODE (operands[0]) == DImode
2092 && GET_MODE (op0) == DImode)
2094 emit_insn (gen_rtx_SET (VOIDmode, operands[0], op0));
2095 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2096 gen_rtx_IF_THEN_ELSE (DImode,
2097 gen_rtx_fmt_ee (compare_code, DImode,
2098 op0, const0_rtx),
2099 const1_rtx,
2100 operands[0])));
2101 return 1;
2104 emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx));
2105 if (GET_MODE (op0) != DImode)
2107 temp = gen_reg_rtx (DImode);
2108 convert_move (temp, op0, 0);
2110 else
2111 temp = op0;
2112 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2113 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
2114 gen_rtx_fmt_ee (compare_code, DImode,
2115 temp, const0_rtx),
2116 const1_rtx,
2117 operands[0])));
2118 return 1;
2120 else
2122 operands[1] = gen_compare_reg (compare_code, op0, op1);
2124 switch (GET_MODE (operands[1]))
2126 case CCmode :
2127 case CCXmode :
2128 case CCFPEmode :
2129 case CCFPmode :
2130 break;
2131 default :
2132 abort ();
2134 emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx));
2135 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2136 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
2137 gen_rtx_fmt_ee (compare_code,
2138 GET_MODE (operands[1]),
2139 operands[1], const0_rtx),
2140 const1_rtx, operands[0])));
2141 return 1;
2145 /* Emit a conditional jump insn for the v9 architecture using comparison code
2146 CODE and jump target LABEL.
2147 This function exists to take advantage of the v9 brxx insns. */
2149 void
2150 emit_v9_brxx_insn (code, op0, label)
2151 enum rtx_code code;
2152 rtx op0, label;
2154 emit_jump_insn (gen_rtx_SET (VOIDmode,
2155 pc_rtx,
2156 gen_rtx_IF_THEN_ELSE (VOIDmode,
2157 gen_rtx_fmt_ee (code, GET_MODE (op0),
2158 op0, const0_rtx),
2159 gen_rtx_LABEL_REF (VOIDmode, label),
2160 pc_rtx)));
2163 /* Return nonzero if a return peephole merging return with
2164 setting of output register is ok. */
2166 leaf_return_peephole_ok ()
2168 return (actual_fsize == 0);
2171 /* Return nonzero if TRIAL can go into the function epilogue's
2172 delay slot. SLOT is the slot we are trying to fill. */
2175 eligible_for_epilogue_delay (trial, slot)
2176 rtx trial;
2177 int slot;
2179 rtx pat, src;
2181 if (slot >= 1)
2182 return 0;
2184 if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2185 return 0;
2187 if (get_attr_length (trial) != 1)
2188 return 0;
2190 /* If %g0 is live, there are lots of things we can't handle.
2191 Rather than trying to find them all now, let's punt and only
2192 optimize things as necessary. */
2193 if (TARGET_LIVE_G0)
2194 return 0;
2196 /* In the case of a true leaf function, anything can go into the delay slot.
2197 A delay slot only exists however if the frame size is zero, otherwise
2198 we will put an insn to adjust the stack after the return. */
2199 if (current_function_uses_only_leaf_regs)
2201 if (leaf_return_peephole_ok ())
2202 return ((get_attr_in_uncond_branch_delay (trial)
2203 == IN_BRANCH_DELAY_TRUE));
2204 return 0;
2207 /* If only trivial `restore' insns work, nothing can go in the
2208 delay slot. */
2209 else if (TARGET_BROKEN_SAVERESTORE)
2210 return 0;
2212 pat = PATTERN (trial);
2214 /* Otherwise, only operations which can be done in tandem with
2215 a `restore' insn can go into the delay slot. */
2216 if (GET_CODE (SET_DEST (pat)) != REG
2217 || REGNO (SET_DEST (pat)) >= 32
2218 || REGNO (SET_DEST (pat)) < 24)
2219 return 0;
2221 /* The set of insns matched here must agree precisely with the set of
2222 patterns paired with a RETURN in sparc.md. */
2224 src = SET_SRC (pat);
2226 /* This matches "*return_[qhs]i" or even "*return_di" on TARGET_ARCH64. */
2227 if (arith_operand (src, GET_MODE (src)))
2229 if (TARGET_ARCH64)
2230 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2231 else
2232 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
2235 /* This matches "*return_di". */
2236 else if (arith_double_operand (src, GET_MODE (src)))
2237 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2239 /* This matches "*return_sf_no_fpu". */
2240 else if (! TARGET_FPU && restore_operand (SET_DEST (pat), SFmode)
2241 && register_operand (src, SFmode))
2242 return 1;
2244 /* This matches "*return_addsi". */
2245 else if (GET_CODE (src) == PLUS
2246 && arith_operand (XEXP (src, 0), SImode)
2247 && arith_operand (XEXP (src, 1), SImode)
2248 && (register_operand (XEXP (src, 0), SImode)
2249 || register_operand (XEXP (src, 1), SImode)))
2250 return 1;
2252 /* This matches "*return_adddi". */
2253 else if (GET_CODE (src) == PLUS
2254 && arith_double_operand (XEXP (src, 0), DImode)
2255 && arith_double_operand (XEXP (src, 1), DImode)
2256 && (register_operand (XEXP (src, 0), DImode)
2257 || register_operand (XEXP (src, 1), DImode)))
2258 return 1;
2260 return 0;
2263 static int
2264 check_return_regs (x)
2265 rtx x;
2267 switch (GET_CODE (x))
2269 case REG:
2270 return IN_OR_GLOBAL_P (x);
2272 case CONST_INT:
2273 case CONST_DOUBLE:
2274 case CONST:
2275 case SYMBOL_REF:
2276 case LABEL_REF:
2277 return 1;
2279 case SET:
2280 case IOR:
2281 case AND:
2282 case XOR:
2283 case PLUS:
2284 case MINUS:
2285 if (check_return_regs (XEXP (x, 1)) == 0)
2286 return 0;
2287 case NOT:
2288 case NEG:
2289 case MEM:
2290 return check_return_regs (XEXP (x, 0));
2292 default:
2293 return 0;
2298 /* Return 1 if TRIAL references only in and global registers. */
2300 eligible_for_return_delay (trial)
2301 rtx trial;
2303 if (GET_CODE (PATTERN (trial)) != SET)
2304 return 0;
2306 return check_return_regs (PATTERN (trial));
2310 short_branch (uid1, uid2)
2311 int uid1, uid2;
2313 unsigned int delta = insn_addresses[uid1] - insn_addresses[uid2];
2314 if (delta + 1024 < 2048)
2315 return 1;
2316 /* warning ("long branch, distance %d", delta); */
2317 return 0;
2320 /* Return non-zero if REG is not used after INSN.
2321 We assume REG is a reload reg, and therefore does
2322 not live past labels or calls or jumps. */
2324 reg_unused_after (reg, insn)
2325 rtx reg;
2326 rtx insn;
2328 enum rtx_code code, prev_code = UNKNOWN;
2330 while ((insn = NEXT_INSN (insn)))
2332 if (prev_code == CALL_INSN && call_used_regs[REGNO (reg)])
2333 return 1;
2335 code = GET_CODE (insn);
2336 if (GET_CODE (insn) == CODE_LABEL)
2337 return 1;
2339 if (GET_RTX_CLASS (code) == 'i')
2341 rtx set = single_set (insn);
2342 int in_src = set && reg_overlap_mentioned_p (reg, SET_SRC (set));
2343 if (set && in_src)
2344 return 0;
2345 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
2346 return 1;
2347 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
2348 return 0;
2350 prev_code = code;
2352 return 1;
2355 /* The table we use to reference PIC data. */
2356 static rtx global_offset_table;
2358 /* The function we use to get at it. */
2359 static rtx get_pc_symbol;
2360 static char get_pc_symbol_name[256];
2362 /* Ensure that we are not using patterns that are not OK with PIC. */
2365 check_pic (i)
2366 int i;
2368 switch (flag_pic)
2370 case 1:
2371 if (GET_CODE (recog_operand[i]) == SYMBOL_REF
2372 || (GET_CODE (recog_operand[i]) == CONST
2373 && ! (GET_CODE (XEXP (recog_operand[i], 0)) == MINUS
2374 && (XEXP (XEXP (recog_operand[i], 0), 0)
2375 == global_offset_table)
2376 && (GET_CODE (XEXP (XEXP (recog_operand[i], 0), 1))
2377 == CONST))))
2378 abort ();
2379 case 2:
2380 default:
2381 return 1;
2385 /* Return true if X is an address which needs a temporary register when
2386 reloaded while generating PIC code. */
2389 pic_address_needs_scratch (x)
2390 rtx x;
2392 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
2393 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
2394 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
2395 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2396 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
2397 return 1;
2399 return 0;
2402 /* Legitimize PIC addresses. If the address is already position-independent,
2403 we return ORIG. Newly generated position-independent addresses go into a
2404 reg. This is REG if non zero, otherwise we allocate register(s) as
2405 necessary. */
2408 legitimize_pic_address (orig, mode, reg)
2409 rtx orig;
2410 enum machine_mode mode ATTRIBUTE_UNUSED;
2411 rtx reg;
2413 if (GET_CODE (orig) == SYMBOL_REF)
2415 rtx pic_ref, address;
2416 rtx insn;
2418 if (reg == 0)
2420 if (reload_in_progress || reload_completed)
2421 abort ();
2422 else
2423 reg = gen_reg_rtx (Pmode);
2426 if (flag_pic == 2)
2428 /* If not during reload, allocate another temp reg here for loading
2429 in the address, so that these instructions can be optimized
2430 properly. */
2431 rtx temp_reg = ((reload_in_progress || reload_completed)
2432 ? reg : gen_reg_rtx (Pmode));
2434 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
2435 won't get confused into thinking that these two instructions
2436 are loading in the true address of the symbol. If in the
2437 future a PIC rtx exists, that should be used instead. */
2438 if (Pmode == SImode)
2440 emit_insn (gen_movsi_high_pic (temp_reg, orig));
2441 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
2443 else
2445 emit_insn (gen_movdi_high_pic (temp_reg, orig));
2446 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
2448 address = temp_reg;
2450 else
2451 address = orig;
2453 pic_ref = gen_rtx_MEM (Pmode,
2454 gen_rtx_PLUS (Pmode,
2455 pic_offset_table_rtx, address));
2456 current_function_uses_pic_offset_table = 1;
2457 RTX_UNCHANGING_P (pic_ref) = 1;
2458 insn = emit_move_insn (reg, pic_ref);
2459 /* Put a REG_EQUAL note on this insn, so that it can be optimized
2460 by loop. */
2461 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig,
2462 REG_NOTES (insn));
2463 return reg;
2465 else if (GET_CODE (orig) == CONST)
2467 rtx base, offset;
2469 if (GET_CODE (XEXP (orig, 0)) == PLUS
2470 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
2471 return orig;
2473 if (reg == 0)
2475 if (reload_in_progress || reload_completed)
2476 abort ();
2477 else
2478 reg = gen_reg_rtx (Pmode);
2481 if (GET_CODE (XEXP (orig, 0)) == PLUS)
2483 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
2484 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
2485 base == reg ? 0 : reg);
2487 else
2488 abort ();
2490 if (GET_CODE (offset) == CONST_INT)
2492 if (SMALL_INT (offset))
2493 return plus_constant_for_output (base, INTVAL (offset));
2494 else if (! reload_in_progress && ! reload_completed)
2495 offset = force_reg (Pmode, offset);
2496 else
2497 /* If we reach here, then something is seriously wrong. */
2498 abort ();
2500 return gen_rtx_PLUS (Pmode, base, offset);
2502 else if (GET_CODE (orig) == LABEL_REF)
2503 /* ??? Why do we do this? */
2504 /* Now movsi_pic_label_ref uses it, but we ought to be checking that
2505 the register is live instead, in case it is eliminated. */
2506 current_function_uses_pic_offset_table = 1;
2508 return orig;
2511 /* Return the RTX for insns to set the PIC register. */
2513 static rtx
2514 pic_setup_code ()
2516 rtx seq;
2518 start_sequence ();
2519 emit_insn (gen_get_pc (pic_offset_table_rtx, global_offset_table,
2520 get_pc_symbol));
2521 seq = gen_sequence ();
2522 end_sequence ();
2524 return seq;
2527 /* Emit special PIC prologues and epilogues. */
2529 void
2530 finalize_pic ()
2532 /* Labels to get the PC in the prologue of this function. */
2533 int orig_flag_pic = flag_pic;
2534 rtx insn;
2536 if (current_function_uses_pic_offset_table == 0)
2537 return;
2539 if (! flag_pic)
2540 abort ();
2542 /* If we havn't emitted the special get_pc helper function, do so now. */
2543 if (get_pc_symbol_name[0] == 0)
2545 int align;
2547 ASM_GENERATE_INTERNAL_LABEL (get_pc_symbol_name, "LGETPC", 0);
2548 text_section ();
2550 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
2551 if (align > 0)
2552 ASM_OUTPUT_ALIGN (asm_out_file, align);
2553 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "LGETPC", 0);
2554 fputs ("\tretl\n\tadd %o7,%l7,%l7\n", asm_out_file);
2557 /* Initialize every time through, since we can't easily
2558 know this to be permanent. */
2559 global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
2560 get_pc_symbol = gen_rtx_SYMBOL_REF (Pmode, get_pc_symbol_name);
2561 flag_pic = 0;
2563 emit_insn_after (pic_setup_code (), get_insns ());
2565 /* Insert the code in each nonlocal goto receiver.
2566 If you make changes here or to the nonlocal_goto_receiver
2567 pattern, make sure the unspec_volatile numbers still
2568 match. */
2569 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2570 if (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
2571 && XINT (PATTERN (insn), 1) == 5)
2572 emit_insn_after (pic_setup_code (), insn);
2574 flag_pic = orig_flag_pic;
2576 /* Need to emit this whether or not we obey regdecls,
2577 since setjmp/longjmp can cause life info to screw up.
2578 ??? In the case where we don't obey regdecls, this is not sufficient
2579 since we may not fall out the bottom. */
2580 emit_insn (gen_rtx_USE (VOIDmode, pic_offset_table_rtx));
2583 /* Return 1 if RTX is a MEM which is known to be aligned to at
2584 least an 8 byte boundary. */
2587 mem_min_alignment (mem, desired)
2588 rtx mem;
2589 int desired;
2591 rtx addr, base, offset;
2593 /* If it's not a MEM we can't accept it. */
2594 if (GET_CODE (mem) != MEM)
2595 return 0;
2597 addr = XEXP (mem, 0);
2598 base = offset = NULL_RTX;
2599 if (GET_CODE (addr) == PLUS)
2601 if (GET_CODE (XEXP (addr, 0)) == REG)
2603 base = XEXP (addr, 0);
2605 /* What we are saying here is that if the base
2606 REG is aligned properly, the compiler will make
2607 sure any REG based index upon it will be so
2608 as well. */
2609 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
2610 offset = XEXP (addr, 1);
2611 else
2612 offset = const0_rtx;
2615 else if (GET_CODE (addr) == REG)
2617 base = addr;
2618 offset = const0_rtx;
2621 if (base != NULL_RTX)
2623 int regno = REGNO (base);
2625 if (regno != FRAME_POINTER_REGNUM
2626 && regno != STACK_POINTER_REGNUM)
2628 /* Check if the compiler has recorded some information
2629 about the alignment of the base REG. If reload has
2630 completed, we already matched with proper alignments. */
2631 if (((current_function != 0
2632 && REGNO_POINTER_ALIGN (regno) >= desired)
2633 || reload_completed)
2634 && ((INTVAL (offset) & (desired - 1)) == 0))
2635 return 1;
2637 else
2639 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
2640 return 1;
2643 else if (! TARGET_UNALIGNED_DOUBLES
2644 || CONSTANT_P (addr)
2645 || GET_CODE (addr) == LO_SUM)
2647 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
2648 is true, in which case we can only assume that an access is aligned if
2649 it is to a constant address, or the address involves a LO_SUM. */
2650 return 1;
2653 /* An obviously unaligned address. */
2654 return 0;
2658 /* Vectors to keep interesting information about registers where it can easily
2659 be got. We use to use the actual mode value as the bit number, but there
2660 are more than 32 modes now. Instead we use two tables: one indexed by
2661 hard register number, and one indexed by mode. */
2663 /* The purpose of sparc_mode_class is to shrink the range of modes so that
2664 they all fit (as bit numbers) in a 32 bit word (again). Each real mode is
2665 mapped into one sparc_mode_class mode. */
2667 enum sparc_mode_class {
2668 S_MODE, D_MODE, T_MODE, O_MODE,
2669 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
2670 CC_MODE, CCFP_MODE
2673 /* Modes for single-word and smaller quantities. */
2674 #define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
2676 /* Modes for double-word and smaller quantities. */
2677 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
2679 /* Modes for quad-word and smaller quantities. */
2680 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
2682 /* Modes for single-float quantities. We must allow any single word or
2683 smaller quantity. This is because the fix/float conversion instructions
2684 take integer inputs/outputs from the float registers. */
2685 #define SF_MODES (S_MODES)
2687 /* Modes for double-float and smaller quantities. */
2688 #define DF_MODES (S_MODES | D_MODES)
2690 #define DF_MODES64 DF_MODES
2692 /* Modes for double-float only quantities. */
2693 #define DF_ONLY_MODES ((1 << (int) DF_MODE) | (1 << (int) D_MODE))
2695 /* Modes for double-float and larger quantities. */
2696 #define DF_UP_MODES (DF_ONLY_MODES | TF_ONLY_MODES)
2698 /* Modes for quad-float only quantities. */
2699 #define TF_ONLY_MODES (1 << (int) TF_MODE)
2701 /* Modes for quad-float and smaller quantities. */
2702 #define TF_MODES (DF_MODES | TF_ONLY_MODES)
2704 #define TF_MODES64 (DF_MODES64 | TF_ONLY_MODES)
2706 /* Modes for condition codes. */
2707 #define CC_MODES (1 << (int) CC_MODE)
2708 #define CCFP_MODES (1 << (int) CCFP_MODE)
2710 /* Value is 1 if register/mode pair is acceptable on sparc.
2711 The funny mixture of D and T modes is because integer operations
2712 do not specially operate on tetra quantities, so non-quad-aligned
2713 registers can hold quadword quantities (except %o4 and %i4 because
2714 they cross fixed registers). */
2716 /* This points to either the 32 bit or the 64 bit version. */
2717 int *hard_regno_mode_classes;
2719 static int hard_32bit_mode_classes[] = {
2720 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
2721 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
2722 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
2723 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
2725 TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2726 TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2727 TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2728 TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2730 /* FP regs f32 to f63. Only the even numbered registers actually exist,
2731 and none can hold SFmode/SImode values. */
2732 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2733 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2734 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2735 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2737 /* %fcc[0123] */
2738 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
2740 /* %icc */
2741 CC_MODES
2744 static int hard_64bit_mode_classes[] = {
2745 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2746 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2747 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2748 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2750 TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2751 TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2752 TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2753 TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2755 /* FP regs f32 to f63. Only the even numbered registers actually exist,
2756 and none can hold SFmode/SImode values. */
2757 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2758 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2759 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2760 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2762 /* %fcc[0123] */
2763 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
2765 /* %icc */
2766 CC_MODES
2769 int sparc_mode_class [NUM_MACHINE_MODES];
2771 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
2773 static void
2774 sparc_init_modes ()
2776 int i;
2778 for (i = 0; i < NUM_MACHINE_MODES; i++)
2780 switch (GET_MODE_CLASS (i))
2782 case MODE_INT:
2783 case MODE_PARTIAL_INT:
2784 case MODE_COMPLEX_INT:
2785 if (GET_MODE_SIZE (i) <= 4)
2786 sparc_mode_class[i] = 1 << (int) S_MODE;
2787 else if (GET_MODE_SIZE (i) == 8)
2788 sparc_mode_class[i] = 1 << (int) D_MODE;
2789 else if (GET_MODE_SIZE (i) == 16)
2790 sparc_mode_class[i] = 1 << (int) T_MODE;
2791 else if (GET_MODE_SIZE (i) == 32)
2792 sparc_mode_class[i] = 1 << (int) O_MODE;
2793 else
2794 sparc_mode_class[i] = 0;
2795 break;
2796 case MODE_FLOAT:
2797 case MODE_COMPLEX_FLOAT:
2798 if (GET_MODE_SIZE (i) <= 4)
2799 sparc_mode_class[i] = 1 << (int) SF_MODE;
2800 else if (GET_MODE_SIZE (i) == 8)
2801 sparc_mode_class[i] = 1 << (int) DF_MODE;
2802 else if (GET_MODE_SIZE (i) == 16)
2803 sparc_mode_class[i] = 1 << (int) TF_MODE;
2804 else if (GET_MODE_SIZE (i) == 32)
2805 sparc_mode_class[i] = 1 << (int) OF_MODE;
2806 else
2807 sparc_mode_class[i] = 0;
2808 break;
2809 case MODE_CC:
2810 default:
2811 /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so
2812 we must explicitly check for them here. */
2813 if (i == (int) CCFPmode || i == (int) CCFPEmode)
2814 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
2815 else if (i == (int) CCmode || i == (int) CC_NOOVmode
2816 || i == (int) CCXmode || i == (int) CCX_NOOVmode)
2817 sparc_mode_class[i] = 1 << (int) CC_MODE;
2818 else
2819 sparc_mode_class[i] = 0;
2820 break;
2824 if (TARGET_ARCH64)
2825 hard_regno_mode_classes = hard_64bit_mode_classes;
2826 else
2827 hard_regno_mode_classes = hard_32bit_mode_classes;
2829 /* Initialize the array used by REGNO_REG_CLASS. */
2830 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
2832 if (i < 16 && TARGET_V8PLUS)
2833 sparc_regno_reg_class[i] = I64_REGS;
2834 else if (i < 32)
2835 sparc_regno_reg_class[i] = GENERAL_REGS;
2836 else if (i < 64)
2837 sparc_regno_reg_class[i] = FP_REGS;
2838 else if (i < 96)
2839 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
2840 else if (i < 100)
2841 sparc_regno_reg_class[i] = FPCC_REGS;
2842 else
2843 sparc_regno_reg_class[i] = NO_REGS;
2847 /* Save non call used registers from LOW to HIGH at BASE+OFFSET.
2848 N_REGS is the number of 4-byte regs saved thus far. This applies even to
2849 v9 int regs as it simplifies the code. */
2851 static int
2852 save_regs (file, low, high, base, offset, n_regs, real_offset)
2853 FILE *file;
2854 int low, high;
2855 const char *base;
2856 int offset;
2857 int n_regs;
2858 int real_offset;
2860 int i;
2862 if (TARGET_ARCH64 && high <= 32)
2864 for (i = low; i < high; i++)
2866 if (regs_ever_live[i] && ! call_used_regs[i])
2868 fprintf (file, "\tstx\t%s, [%s+%d]\n",
2869 reg_names[i], base, offset + 4 * n_regs);
2870 if (dwarf2out_do_frame ())
2871 dwarf2out_reg_save ("", i, real_offset + 4 * n_regs);
2872 n_regs += 2;
2876 else
2878 for (i = low; i < high; i += 2)
2880 if (regs_ever_live[i] && ! call_used_regs[i])
2882 if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2884 fprintf (file, "\tstd\t%s, [%s+%d]\n",
2885 reg_names[i], base, offset + 4 * n_regs);
2886 if (dwarf2out_do_frame ())
2888 char *l = dwarf2out_cfi_label ();
2889 dwarf2out_reg_save (l, i, real_offset + 4 * n_regs);
2890 dwarf2out_reg_save (l, i+1, real_offset + 4 * n_regs + 4);
2892 n_regs += 2;
2894 else
2896 fprintf (file, "\tst\t%s, [%s+%d]\n",
2897 reg_names[i], base, offset + 4 * n_regs);
2898 if (dwarf2out_do_frame ())
2899 dwarf2out_reg_save ("", i, real_offset + 4 * n_regs);
2900 n_regs += 2;
2903 else
2905 if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2907 fprintf (file, "\tst\t%s, [%s+%d]\n",
2908 reg_names[i+1], base, offset + 4 * n_regs + 4);
2909 if (dwarf2out_do_frame ())
2910 dwarf2out_reg_save ("", i + 1, real_offset + 4 * n_regs + 4);
2911 n_regs += 2;
2916 return n_regs;
2919 /* Restore non call used registers from LOW to HIGH at BASE+OFFSET.
2921 N_REGS is the number of 4-byte regs saved thus far. This applies even to
2922 v9 int regs as it simplifies the code. */
2924 static int
2925 restore_regs (file, low, high, base, offset, n_regs)
2926 FILE *file;
2927 int low, high;
2928 const char *base;
2929 int offset;
2930 int n_regs;
2932 int i;
2934 if (TARGET_ARCH64 && high <= 32)
2936 for (i = low; i < high; i++)
2938 if (regs_ever_live[i] && ! call_used_regs[i])
2939 fprintf (file, "\tldx\t[%s+%d], %s\n",
2940 base, offset + 4 * n_regs, reg_names[i]),
2941 n_regs += 2;
2944 else
2946 for (i = low; i < high; i += 2)
2948 if (regs_ever_live[i] && ! call_used_regs[i])
2949 if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2950 fprintf (file, "\tldd\t[%s+%d], %s\n",
2951 base, offset + 4 * n_regs, reg_names[i]),
2952 n_regs += 2;
2953 else
2954 fprintf (file, "\tld\t[%s+%d],%s\n",
2955 base, offset + 4 * n_regs, reg_names[i]),
2956 n_regs += 2;
2957 else if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2958 fprintf (file, "\tld\t[%s+%d],%s\n",
2959 base, offset + 4 * n_regs + 4, reg_names[i+1]),
2960 n_regs += 2;
2963 return n_regs;
2966 /* Static variables we want to share between prologue and epilogue. */
2968 /* Number of live general or floating point registers needed to be saved
2969 (as 4-byte quantities). This is only done if TARGET_EPILOGUE. */
2970 static int num_gfregs;
2972 /* Compute the frame size required by the function. This function is called
2973 during the reload pass and also by output_function_prologue(). */
2976 compute_frame_size (size, leaf_function)
2977 int size;
2978 int leaf_function;
2980 int n_regs = 0, i;
2981 int outgoing_args_size = (current_function_outgoing_args_size
2982 + REG_PARM_STACK_SPACE (current_function_decl));
2984 if (TARGET_EPILOGUE)
2986 /* N_REGS is the number of 4-byte regs saved thus far. This applies
2987 even to v9 int regs to be consistent with save_regs/restore_regs. */
2989 if (TARGET_ARCH64)
2991 for (i = 0; i < 8; i++)
2992 if (regs_ever_live[i] && ! call_used_regs[i])
2993 n_regs += 2;
2995 else
2997 for (i = 0; i < 8; i += 2)
2998 if ((regs_ever_live[i] && ! call_used_regs[i])
2999 || (regs_ever_live[i+1] && ! call_used_regs[i+1]))
3000 n_regs += 2;
3003 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
3004 if ((regs_ever_live[i] && ! call_used_regs[i])
3005 || (regs_ever_live[i+1] && ! call_used_regs[i+1]))
3006 n_regs += 2;
3009 /* Set up values for use in `function_epilogue'. */
3010 num_gfregs = n_regs;
3012 if (leaf_function && n_regs == 0
3013 && size == 0 && current_function_outgoing_args_size == 0)
3015 actual_fsize = apparent_fsize = 0;
3017 else
3019 /* We subtract STARTING_FRAME_OFFSET, remember it's negative.
3020 The stack bias (if any) is taken out to undo its effects. */
3021 apparent_fsize = (size - STARTING_FRAME_OFFSET + SPARC_STACK_BIAS + 7) & -8;
3022 apparent_fsize += n_regs * 4;
3023 actual_fsize = apparent_fsize + ((outgoing_args_size + 7) & -8);
3026 /* Make sure nothing can clobber our register windows.
3027 If a SAVE must be done, or there is a stack-local variable,
3028 the register window area must be allocated.
3029 ??? For v8 we apparently need an additional 8 bytes of reserved space. */
3030 if (leaf_function == 0 || size > 0)
3031 actual_fsize += (16 * UNITS_PER_WORD) + (TARGET_ARCH64 ? 0 : 8);
3033 return SPARC_STACK_ALIGN (actual_fsize);
3036 /* Build a (32 bit) big number in a register. */
3037 /* ??? We may be able to use the set macro here too. */
3039 static void
3040 build_big_number (file, num, reg)
3041 FILE *file;
3042 int num;
3043 const char *reg;
3045 if (num >= 0 || ! TARGET_ARCH64)
3047 fprintf (file, "\tsethi\t%%hi(%d), %s\n", num, reg);
3048 if ((num & 0x3ff) != 0)
3049 fprintf (file, "\tor\t%s, %%lo(%d), %s\n", reg, num, reg);
3051 else /* num < 0 && TARGET_ARCH64 */
3053 /* Sethi does not sign extend, so we must use a little trickery
3054 to use it for negative numbers. Invert the constant before
3055 loading it in, then use xor immediate to invert the loaded bits
3056 (along with the upper 32 bits) to the desired constant. This
3057 works because the sethi and immediate fields overlap. */
3058 int asize = num;
3059 int inv = ~asize;
3060 int low = -0x400 + (asize & 0x3FF);
3062 fprintf (file, "\tsethi\t%%hi(%d), %s\n\txor\t%s, %d, %s\n",
3063 inv, reg, reg, low, reg);
3067 /* Output any necessary .register pseudo-ops. */
3068 void
3069 sparc_output_scratch_registers (file)
3070 FILE *file;
3072 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
3073 int i;
3075 if (TARGET_ARCH32)
3076 return;
3078 /* Check if %g[2367] were used without
3079 .register being printed for them already. */
3080 for (i = 2; i < 8; i++)
3082 if (regs_ever_live [i]
3083 && ! sparc_hard_reg_printed [i])
3085 sparc_hard_reg_printed [i] = 1;
3086 fprintf (file, "\t.register\t%%g%d, #scratch\n", i);
3088 if (i == 3) i = 5;
3090 #endif
3093 /* Output code for the function prologue. */
3095 void
3096 output_function_prologue (file, size, leaf_function)
3097 FILE *file;
3098 int size;
3099 int leaf_function;
3101 sparc_output_scratch_registers (file);
3103 /* Need to use actual_fsize, since we are also allocating
3104 space for our callee (and our own register save area). */
3105 actual_fsize = compute_frame_size (size, leaf_function);
3107 if (leaf_function)
3109 frame_base_name = "%sp";
3110 frame_base_offset = actual_fsize + SPARC_STACK_BIAS;
3112 else
3114 frame_base_name = "%fp";
3115 frame_base_offset = SPARC_STACK_BIAS;
3118 /* This is only for the human reader. */
3119 fprintf (file, "\t%s#PROLOGUE# 0\n", ASM_COMMENT_START);
3121 if (actual_fsize == 0)
3122 /* do nothing. */ ;
3123 else if (! leaf_function && ! TARGET_BROKEN_SAVERESTORE)
3125 if (actual_fsize <= 4096)
3126 fprintf (file, "\tsave\t%%sp, -%d, %%sp\n", actual_fsize);
3127 else if (actual_fsize <= 8192)
3129 fprintf (file, "\tsave\t%%sp, -4096, %%sp\n");
3130 fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize - 4096);
3132 else
3134 build_big_number (file, -actual_fsize, "%g1");
3135 fprintf (file, "\tsave\t%%sp, %%g1, %%sp\n");
3138 else if (! leaf_function && TARGET_BROKEN_SAVERESTORE)
3140 /* We assume the environment will properly handle or otherwise avoid
3141 trouble associated with an interrupt occurring after the `save' or
3142 trap occurring during it. */
3143 fprintf (file, "\tsave\n");
3145 if (actual_fsize <= 4096)
3146 fprintf (file, "\tadd\t%%fp, -%d, %%sp\n", actual_fsize);
3147 else if (actual_fsize <= 8192)
3149 fprintf (file, "\tadd\t%%fp, -4096, %%sp\n");
3150 fprintf (file, "\tadd\t%%fp, -%d, %%sp\n", actual_fsize - 4096);
3152 else
3154 build_big_number (file, -actual_fsize, "%g1");
3155 fprintf (file, "\tadd\t%%fp, %%g1, %%sp\n");
3158 else /* leaf function */
3160 if (actual_fsize <= 4096)
3161 fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize);
3162 else if (actual_fsize <= 8192)
3164 fprintf (file, "\tadd\t%%sp, -4096, %%sp\n");
3165 fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize - 4096);
3167 else
3169 build_big_number (file, -actual_fsize, "%g1");
3170 fprintf (file, "\tadd\t%%sp, %%g1, %%sp\n");
3174 if (dwarf2out_do_frame () && actual_fsize)
3176 char *label = dwarf2out_cfi_label ();
3178 /* The canonical frame address refers to the top of the frame. */
3179 dwarf2out_def_cfa (label, (leaf_function ? STACK_POINTER_REGNUM
3180 : FRAME_POINTER_REGNUM),
3181 frame_base_offset);
3183 if (! leaf_function)
3185 /* Note the register window save. This tells the unwinder that
3186 it needs to restore the window registers from the previous
3187 frame's window save area at 0(cfa). */
3188 dwarf2out_window_save (label);
3190 /* The return address (-8) is now in %i7. */
3191 dwarf2out_return_reg (label, 31);
3195 /* If doing anything with PIC, do it now. */
3196 if (! flag_pic)
3197 fprintf (file, "\t%s#PROLOGUE# 1\n", ASM_COMMENT_START);
3199 /* Call saved registers are saved just above the outgoing argument area. */
3200 if (num_gfregs)
3202 int offset, real_offset, n_regs;
3203 const char *base;
3205 real_offset = -apparent_fsize;
3206 offset = -apparent_fsize + frame_base_offset;
3207 if (offset < -4096 || offset + num_gfregs * 4 > 4096)
3209 /* ??? This might be optimized a little as %g1 might already have a
3210 value close enough that a single add insn will do. */
3211 /* ??? Although, all of this is probably only a temporary fix
3212 because if %g1 can hold a function result, then
3213 output_function_epilogue will lose (the result will get
3214 clobbered). */
3215 build_big_number (file, offset, "%g1");
3216 fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
3217 base = "%g1";
3218 offset = 0;
3220 else
3222 base = frame_base_name;
3225 n_regs = 0;
3226 if (TARGET_EPILOGUE && ! leaf_function)
3227 /* ??? Originally saved regs 0-15 here. */
3228 n_regs = save_regs (file, 0, 8, base, offset, 0, real_offset);
3229 else if (leaf_function)
3230 /* ??? Originally saved regs 0-31 here. */
3231 n_regs = save_regs (file, 0, 8, base, offset, 0, real_offset);
3232 if (TARGET_EPILOGUE)
3233 save_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs,
3234 real_offset);
3237 leaf_label = 0;
3238 if (leaf_function && actual_fsize != 0)
3240 /* warning ("leaf procedure with frame size %d", actual_fsize); */
3241 if (! TARGET_EPILOGUE)
3242 leaf_label = gen_label_rtx ();
3246 /* Output code for the function epilogue. */
3248 void
3249 output_function_epilogue (file, size, leaf_function)
3250 FILE *file;
3251 int size ATTRIBUTE_UNUSED;
3252 int leaf_function;
3254 const char *ret;
3256 if (leaf_label)
3258 emit_label_after (leaf_label, get_last_insn ());
3259 final_scan_insn (get_last_insn (), file, 0, 0, 1);
3262 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
3263 else if (profile_block_flag == 2)
3265 FUNCTION_BLOCK_PROFILER_EXIT(file);
3267 #endif
3269 else if (current_function_epilogue_delay_list == 0)
3271 /* If code does not drop into the epilogue, we need
3272 do nothing except output pending case vectors. */
3273 rtx insn = get_last_insn ();
3274 if (GET_CODE (insn) == NOTE)
3275 insn = prev_nonnote_insn (insn);
3276 if (insn && GET_CODE (insn) == BARRIER)
3277 goto output_vectors;
3280 /* Restore any call saved registers. */
3281 if (num_gfregs)
3283 int offset, n_regs;
3284 const char *base;
3286 offset = -apparent_fsize + frame_base_offset;
3287 if (offset < -4096 || offset + num_gfregs * 4 > 4096 - 8 /*double*/)
3289 build_big_number (file, offset, "%g1");
3290 fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
3291 base = "%g1";
3292 offset = 0;
3294 else
3296 base = frame_base_name;
3299 n_regs = 0;
3300 if (TARGET_EPILOGUE && ! leaf_function)
3301 /* ??? Originally saved regs 0-15 here. */
3302 n_regs = restore_regs (file, 0, 8, base, offset, 0);
3303 else if (leaf_function)
3304 /* ??? Originally saved regs 0-31 here. */
3305 n_regs = restore_regs (file, 0, 8, base, offset, 0);
3306 if (TARGET_EPILOGUE)
3307 restore_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs);
3310 /* Work out how to skip the caller's unimp instruction if required. */
3311 if (leaf_function)
3312 ret = (SKIP_CALLERS_UNIMP_P ? "jmp\t%o7+12" : "retl");
3313 else
3314 ret = (SKIP_CALLERS_UNIMP_P ? "jmp\t%i7+12" : "ret");
3316 if (TARGET_EPILOGUE || leaf_label)
3318 int old_target_epilogue = TARGET_EPILOGUE;
3319 target_flags &= ~old_target_epilogue;
3321 if (! leaf_function)
3323 /* If we wound up with things in our delay slot, flush them here. */
3324 if (current_function_epilogue_delay_list)
3326 rtx insn = emit_jump_insn_after (gen_rtx_RETURN (VOIDmode),
3327 get_last_insn ());
3328 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode,
3329 gen_rtvec (2,
3330 PATTERN (XEXP (current_function_epilogue_delay_list, 0)),
3331 PATTERN (insn)));
3332 final_scan_insn (insn, file, 1, 0, 1);
3334 else if (TARGET_V9 && ! SKIP_CALLERS_UNIMP_P)
3335 fputs ("\treturn\t%i7+8\n\tnop\n", file);
3336 else
3337 fprintf (file, "\t%s\n\trestore\n", ret);
3339 /* All of the following cases are for leaf functions. */
3340 else if (current_function_epilogue_delay_list)
3342 /* eligible_for_epilogue_delay_slot ensures that if this is a
3343 leaf function, then we will only have insn in the delay slot
3344 if the frame size is zero, thus no adjust for the stack is
3345 needed here. */
3346 if (actual_fsize != 0)
3347 abort ();
3348 fprintf (file, "\t%s\n", ret);
3349 final_scan_insn (XEXP (current_function_epilogue_delay_list, 0),
3350 file, 1, 0, 1);
3352 /* Output 'nop' instead of 'sub %sp,-0,%sp' when no frame, so as to
3353 avoid generating confusing assembly language output. */
3354 else if (actual_fsize == 0)
3355 fprintf (file, "\t%s\n\tnop\n", ret);
3356 else if (actual_fsize <= 4096)
3357 fprintf (file, "\t%s\n\tsub\t%%sp, -%d, %%sp\n", ret, actual_fsize);
3358 else if (actual_fsize <= 8192)
3359 fprintf (file, "\tsub\t%%sp, -4096, %%sp\n\t%s\n\tsub\t%%sp, -%d, %%sp\n",
3360 ret, actual_fsize - 4096);
3361 else if ((actual_fsize & 0x3ff) == 0)
3362 fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
3363 actual_fsize, ret);
3364 else
3365 fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\tor\t%%g1, %%lo(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
3366 actual_fsize, actual_fsize, ret);
3367 target_flags |= old_target_epilogue;
3370 output_vectors:
3371 sparc_output_deferred_case_vectors ();
3374 /* Functions for handling argument passing.
3376 For v8 the first six args are normally in registers and the rest are
3377 pushed. Any arg that starts within the first 6 words is at least
3378 partially passed in a register unless its data type forbids.
3380 For v9, the argument registers are laid out as an array of 16 elements
3381 and arguments are added sequentially. The first 6 int args and up to the
3382 first 16 fp args (depending on size) are passed in regs.
3384 Slot Stack Integral Float Float in structure Double Long Double
3385 ---- ----- -------- ----- ------------------ ------ -----------
3386 15 [SP+248] %f31 %f30,%f31 %d30
3387 14 [SP+240] %f29 %f28,%f29 %d28 %q28
3388 13 [SP+232] %f27 %f26,%f27 %d26
3389 12 [SP+224] %f25 %f24,%f25 %d24 %q24
3390 11 [SP+216] %f23 %f22,%f23 %d22
3391 10 [SP+208] %f21 %f20,%f21 %d20 %q20
3392 9 [SP+200] %f19 %f18,%f19 %d18
3393 8 [SP+192] %f17 %f16,%f17 %d16 %q16
3394 7 [SP+184] %f15 %f14,%f15 %d14
3395 6 [SP+176] %f13 %f12,%f13 %d12 %q12
3396 5 [SP+168] %o5 %f11 %f10,%f11 %d10
3397 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
3398 3 [SP+152] %o3 %f7 %f6,%f7 %d6
3399 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
3400 1 [SP+136] %o1 %f3 %f2,%f3 %d2
3401 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
3403 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
3405 Integral arguments are always passed as 64 bit quantities appropriately
3406 extended.
3408 Passing of floating point values is handled as follows.
3409 If a prototype is in scope:
3410 If the value is in a named argument (i.e. not a stdarg function or a
3411 value not part of the `...') then the value is passed in the appropriate
3412 fp reg.
3413 If the value is part of the `...' and is passed in one of the first 6
3414 slots then the value is passed in the appropriate int reg.
3415 If the value is part of the `...' and is not passed in one of the first 6
3416 slots then the value is passed in memory.
3417 If a prototype is not in scope:
3418 If the value is one of the first 6 arguments the value is passed in the
3419 appropriate integer reg and the appropriate fp reg.
3420 If the value is not one of the first 6 arguments the value is passed in
3421 the appropriate fp reg and in memory.
3424 /* Maximum number of int regs for args. */
3425 #define SPARC_INT_ARG_MAX 6
3426 /* Maximum number of fp regs for args. */
3427 #define SPARC_FP_ARG_MAX 16
3429 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
3431 /* Handle the INIT_CUMULATIVE_ARGS macro.
3432 Initialize a variable CUM of type CUMULATIVE_ARGS
3433 for a call to a function whose data type is FNTYPE.
3434 For a library call, FNTYPE is 0. */
3436 void
3437 init_cumulative_args (cum, fntype, libname, indirect)
3438 CUMULATIVE_ARGS *cum;
3439 tree fntype;
3440 tree libname ATTRIBUTE_UNUSED;
3441 int indirect ATTRIBUTE_UNUSED;
3443 cum->words = 0;
3444 cum->prototype_p = fntype && TYPE_ARG_TYPES (fntype);
3445 cum->libcall_p = fntype == 0;
3448 /* Compute the slot number to pass an argument in.
3449 Returns the slot number or -1 if passing on the stack.
3451 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3452 the preceding args and about the function being called.
3453 MODE is the argument's machine mode.
3454 TYPE is the data type of the argument (as a tree).
3455 This is null for libcalls where that information may
3456 not be available.
3457 NAMED is nonzero if this argument is a named parameter
3458 (otherwise it is an extra parameter matching an ellipsis).
3459 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
3460 *PREGNO records the register number to use if scalar type.
3461 *PPADDING records the amount of padding needed in words. */
3463 static int
3464 function_arg_slotno (cum, mode, type, named, incoming_p, pregno, ppadding)
3465 const CUMULATIVE_ARGS *cum;
3466 enum machine_mode mode;
3467 tree type;
3468 int named;
3469 int incoming_p;
3470 int *pregno;
3471 int *ppadding;
3473 int regbase = (incoming_p
3474 ? SPARC_INCOMING_INT_ARG_FIRST
3475 : SPARC_OUTGOING_INT_ARG_FIRST);
3476 int slotno = cum->words;
3477 int regno;
3479 *ppadding = 0;
3481 if (type != 0 && TREE_ADDRESSABLE (type))
3482 return -1;
3483 if (TARGET_ARCH32
3484 && type != 0 && mode == BLKmode
3485 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
3486 return -1;
3488 switch (mode)
3490 case VOIDmode :
3491 /* MODE is VOIDmode when generating the actual call.
3492 See emit_call_1. */
3493 return -1;
3495 case QImode : case CQImode :
3496 case HImode : case CHImode :
3497 case SImode : case CSImode :
3498 case DImode : case CDImode :
3499 if (slotno >= SPARC_INT_ARG_MAX)
3500 return -1;
3501 regno = regbase + slotno;
3502 break;
3504 case SFmode : case SCmode :
3505 case DFmode : case DCmode :
3506 case TFmode : case TCmode :
3507 if (TARGET_ARCH32)
3509 if (slotno >= SPARC_INT_ARG_MAX)
3510 return -1;
3511 regno = regbase + slotno;
3513 else
3515 if ((mode == TFmode || mode == TCmode)
3516 && (slotno & 1) != 0)
3517 slotno++, *ppadding = 1;
3518 if (TARGET_FPU && named)
3520 if (slotno >= SPARC_FP_ARG_MAX)
3521 return -1;
3522 regno = SPARC_FP_ARG_FIRST + slotno * 2;
3523 if (mode == SFmode)
3524 regno++;
3526 else
3528 if (slotno >= SPARC_INT_ARG_MAX)
3529 return -1;
3530 regno = regbase + slotno;
3533 break;
3535 case BLKmode :
3536 /* For sparc64, objects requiring 16 byte alignment get it. */
3537 if (TARGET_ARCH64)
3539 if (type && TYPE_ALIGN (type) == 128 && (slotno & 1) != 0)
3540 slotno++, *ppadding = 1;
3543 if (TARGET_ARCH32
3544 || (type && TREE_CODE (type) == UNION_TYPE))
3546 if (slotno >= SPARC_INT_ARG_MAX)
3547 return -1;
3548 regno = regbase + slotno;
3550 else
3552 tree field;
3553 int intregs_p = 0, fpregs_p = 0;
3554 /* The ABI obviously doesn't specify how packed
3555 structures are passed. These are defined to be passed
3556 in int regs if possible, otherwise memory. */
3557 int packed_p = 0;
3559 /* First see what kinds of registers we need. */
3560 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3562 if (TREE_CODE (field) == FIELD_DECL)
3564 if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
3565 && TARGET_FPU)
3566 fpregs_p = 1;
3567 else
3568 intregs_p = 1;
3569 if (DECL_PACKED (field))
3570 packed_p = 1;
3573 if (packed_p || !named)
3574 fpregs_p = 0, intregs_p = 1;
3576 /* If all arg slots are filled, then must pass on stack. */
3577 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
3578 return -1;
3579 /* If there are only int args and all int arg slots are filled,
3580 then must pass on stack. */
3581 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
3582 return -1;
3583 /* Note that even if all int arg slots are filled, fp members may
3584 still be passed in regs if such regs are available.
3585 *PREGNO isn't set because there may be more than one, it's up
3586 to the caller to compute them. */
3587 return slotno;
3589 break;
3591 default :
3592 abort ();
3595 *pregno = regno;
3596 return slotno;
3599 /* Handle recursive register counting for structure field layout. */
3601 struct function_arg_record_value_parms
3603 rtx ret;
3604 int slotno, named, regbase;
3605 int nregs, intoffset;
3608 static void function_arg_record_value_3
3609 PROTO((int, struct function_arg_record_value_parms *));
3610 static void function_arg_record_value_2
3611 PROTO((tree, int, struct function_arg_record_value_parms *));
3612 static rtx function_arg_record_value
3613 PROTO((tree, enum machine_mode, int, int, int));
3615 static void
3616 function_arg_record_value_1 (type, startbitpos, parms)
3617 tree type;
3618 int startbitpos;
3619 struct function_arg_record_value_parms *parms;
3621 tree field;
3623 /* The ABI obviously doesn't specify how packed structures are
3624 passed. These are defined to be passed in int regs if possible,
3625 otherwise memory. */
3626 int packed_p = 0;
3628 /* We need to compute how many registers are needed so we can
3629 allocate the PARALLEL but before we can do that we need to know
3630 whether there are any packed fields. If there are, int regs are
3631 used regardless of whether there are fp values present. */
3632 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3634 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
3636 packed_p = 1;
3637 break;
3641 /* Compute how many registers we need. */
3642 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3644 if (TREE_CODE (field) == FIELD_DECL)
3646 int bitpos = startbitpos;
3647 if (DECL_FIELD_BITPOS (field))
3648 bitpos += TREE_INT_CST_LOW (DECL_FIELD_BITPOS (field));
3649 /* ??? FIXME: else assume zero offset. */
3651 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
3653 function_arg_record_value_1 (TREE_TYPE (field), bitpos, parms);
3655 else if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
3656 && TARGET_FPU
3657 && ! packed_p
3658 && parms->named)
3660 if (parms->intoffset != -1)
3662 int intslots, this_slotno;
3664 intslots = (bitpos - parms->intoffset + BITS_PER_WORD - 1)
3665 / BITS_PER_WORD;
3666 this_slotno = parms->slotno + parms->intoffset
3667 / BITS_PER_WORD;
3669 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
3670 intslots = MAX (intslots, 0);
3671 parms->nregs += intslots;
3672 parms->intoffset = -1;
3675 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
3676 If it wasn't true we wouldn't be here. */
3677 parms->nregs += 1;
3679 else
3681 if (parms->intoffset == -1)
3682 parms->intoffset = bitpos;
3688 /* Handle recursive structure field register assignment. */
3690 static void
3691 function_arg_record_value_3 (bitpos, parms)
3692 int bitpos;
3693 struct function_arg_record_value_parms *parms;
3695 enum machine_mode mode;
3696 int regno, this_slotno, intslots, intoffset;
3697 rtx reg;
3699 if (parms->intoffset == -1)
3700 return;
3701 intoffset = parms->intoffset;
3702 parms->intoffset = -1;
3704 intslots = (bitpos - intoffset + BITS_PER_WORD - 1) / BITS_PER_WORD;
3705 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
3707 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
3708 if (intslots <= 0)
3709 return;
3711 /* If this is the trailing part of a word, only load that much into
3712 the register. Otherwise load the whole register. Note that in
3713 the latter case we may pick up unwanted bits. It's not a problem
3714 at the moment but may wish to revisit. */
3716 if (intoffset % BITS_PER_WORD != 0)
3718 mode = mode_for_size (BITS_PER_WORD - intoffset%BITS_PER_WORD,
3719 MODE_INT, 0);
3721 else
3722 mode = word_mode;
3724 intoffset /= BITS_PER_UNIT;
3727 regno = parms->regbase + this_slotno;
3728 reg = gen_rtx_REG (mode, regno);
3729 XVECEXP (parms->ret, 0, parms->nregs)
3730 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
3732 this_slotno += 1;
3733 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
3734 parms->nregs += 1;
3735 intslots -= 1;
3737 while (intslots > 0);
3740 static void
3741 function_arg_record_value_2 (type, startbitpos, parms)
3742 tree type;
3743 int startbitpos;
3744 struct function_arg_record_value_parms *parms;
3746 tree field;
3747 int packed_p = 0;
3749 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3751 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
3753 packed_p = 1;
3754 break;
3758 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3760 if (TREE_CODE (field) == FIELD_DECL)
3762 int bitpos = startbitpos;
3763 if (DECL_FIELD_BITPOS (field))
3764 bitpos += TREE_INT_CST_LOW (DECL_FIELD_BITPOS (field));
3765 /* ??? FIXME: else assume zero offset. */
3767 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
3769 function_arg_record_value_2 (TREE_TYPE (field), bitpos, parms);
3771 else if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
3772 && TARGET_FPU
3773 && ! packed_p
3774 && parms->named)
3776 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
3777 rtx reg;
3779 function_arg_record_value_3 (bitpos, parms);
3781 reg = gen_rtx_REG (DECL_MODE (field),
3782 (SPARC_FP_ARG_FIRST + this_slotno * 2
3783 + (DECL_MODE (field) == SFmode
3784 && (bitpos & 32) != 0)));
3785 XVECEXP (parms->ret, 0, parms->nregs)
3786 = gen_rtx_EXPR_LIST (VOIDmode, reg,
3787 GEN_INT (bitpos / BITS_PER_UNIT));
3788 parms->nregs += 1;
3790 else
3792 if (parms->intoffset == -1)
3793 parms->intoffset = bitpos;
3799 static rtx
3800 function_arg_record_value (type, mode, slotno, named, regbase)
3801 tree type;
3802 enum machine_mode mode;
3803 int slotno, named, regbase;
3805 HOST_WIDE_INT typesize = int_size_in_bytes (type);
3806 struct function_arg_record_value_parms parms;
3807 int nregs;
3809 parms.ret = NULL_RTX;
3810 parms.slotno = slotno;
3811 parms.named = named;
3812 parms.regbase = regbase;
3814 /* Compute how many registers we need. */
3815 parms.nregs = 0;
3816 parms.intoffset = 0;
3817 function_arg_record_value_1 (type, 0, &parms);
3819 if (parms.intoffset != -1)
3821 int intslots, this_slotno;
3823 intslots = (typesize*BITS_PER_UNIT - parms.intoffset + BITS_PER_WORD - 1)
3824 / BITS_PER_WORD;
3825 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
3827 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
3828 intslots = MAX (intslots, 0);
3830 parms.nregs += intslots;
3832 nregs = parms.nregs;
3834 /* Allocate the vector and handle some annoying special cases. */
3835 if (nregs == 0)
3837 /* ??? Empty structure has no value? Duh? */
3838 if (typesize <= 0)
3840 /* Though there's nothing really to store, return a word register
3841 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
3842 leads to breakage due to the fact that there are zero bytes to
3843 load. */
3844 return gen_rtx_REG (mode, regbase);
3846 else
3848 /* ??? C++ has structures with no fields, and yet a size. Give up
3849 for now and pass everything back in integer registers. */
3850 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3852 if (nregs + slotno > SPARC_INT_ARG_MAX)
3853 nregs = SPARC_INT_ARG_MAX - slotno;
3855 if (nregs == 0)
3856 abort ();
3858 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
3860 /* Fill in the entries. */
3861 parms.nregs = 0;
3862 parms.intoffset = 0;
3863 function_arg_record_value_2 (type, 0, &parms);
3864 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
3866 if (parms.nregs != nregs)
3867 abort ();
3869 return parms.ret;
3872 /* Handle the FUNCTION_ARG macro.
3873 Determine where to put an argument to a function.
3874 Value is zero to push the argument on the stack,
3875 or a hard register in which to store the argument.
3877 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3878 the preceding args and about the function being called.
3879 MODE is the argument's machine mode.
3880 TYPE is the data type of the argument (as a tree).
3881 This is null for libcalls where that information may
3882 not be available.
3883 NAMED is nonzero if this argument is a named parameter
3884 (otherwise it is an extra parameter matching an ellipsis).
3885 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG. */
3888 function_arg (cum, mode, type, named, incoming_p)
3889 const CUMULATIVE_ARGS *cum;
3890 enum machine_mode mode;
3891 tree type;
3892 int named;
3893 int incoming_p;
3895 int regbase = (incoming_p
3896 ? SPARC_INCOMING_INT_ARG_FIRST
3897 : SPARC_OUTGOING_INT_ARG_FIRST);
3898 int slotno, regno, padding;
3899 rtx reg;
3901 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
3902 &regno, &padding);
3904 if (slotno == -1)
3905 return 0;
3907 if (TARGET_ARCH32)
3909 reg = gen_rtx_REG (mode, regno);
3910 return reg;
3913 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
3914 but also have the slot allocated for them.
3915 If no prototype is in scope fp values in register slots get passed
3916 in two places, either fp regs and int regs or fp regs and memory. */
3917 if ((GET_MODE_CLASS (mode) == MODE_FLOAT
3918 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3919 && SPARC_FP_REG_P (regno))
3921 reg = gen_rtx_REG (mode, regno);
3922 if (cum->prototype_p || cum->libcall_p)
3924 /* "* 2" because fp reg numbers are recorded in 4 byte
3925 quantities. */
3926 #if 0
3927 /* ??? This will cause the value to be passed in the fp reg and
3928 in the stack. When a prototype exists we want to pass the
3929 value in the reg but reserve space on the stack. That's an
3930 optimization, and is deferred [for a bit]. */
3931 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
3932 return gen_rtx_PARALLEL (mode,
3933 gen_rtvec (2,
3934 gen_rtx_EXPR_LIST (VOIDmode,
3935 NULL_RTX, const0_rtx),
3936 gen_rtx_EXPR_LIST (VOIDmode,
3937 reg, const0_rtx)));
3938 else
3939 #else
3940 /* ??? It seems that passing back a register even when past
3941 the area declared by REG_PARM_STACK_SPACE will allocate
3942 space appropriately, and will not copy the data onto the
3943 stack, exactly as we desire.
3945 This is due to locate_and_pad_parm being called in
3946 expand_call whenever reg_parm_stack_space > 0, which
3947 while benefical to our example here, would seem to be
3948 in error from what had been intended. Ho hum... -- r~ */
3949 #endif
3950 return reg;
3952 else
3954 rtx v0, v1;
3956 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
3958 int intreg;
3960 /* On incoming, we don't need to know that the value
3961 is passed in %f0 and %i0, and it confuses other parts
3962 causing needless spillage even on the simplest cases. */
3963 if (incoming_p)
3964 return reg;
3966 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
3967 + (regno - SPARC_FP_ARG_FIRST) / 2);
3969 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
3970 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
3971 const0_rtx);
3972 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
3974 else
3976 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
3977 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
3978 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
3982 else if (type && TREE_CODE (type) == RECORD_TYPE)
3984 /* Structures up to 16 bytes in size are passed in arg slots on the
3985 stack and are promoted to registers where possible. */
3987 if (int_size_in_bytes (type) > 16)
3988 abort (); /* shouldn't get here */
3990 return function_arg_record_value (type, mode, slotno, named, regbase);
3992 else if (type && TREE_CODE (type) == UNION_TYPE)
3994 enum machine_mode mode;
3995 int bytes = int_size_in_bytes (type);
3997 if (bytes > 16)
3998 abort ();
4000 mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 0);
4001 reg = gen_rtx_REG (mode, regno);
4003 else
4005 /* Scalar or complex int. */
4006 reg = gen_rtx_REG (mode, regno);
4009 return reg;
4012 /* Handle the FUNCTION_ARG_PARTIAL_NREGS macro.
4013 For an arg passed partly in registers and partly in memory,
4014 this is the number of registers used.
4015 For args passed entirely in registers or entirely in memory, zero.
4017 Any arg that starts in the first 6 regs but won't entirely fit in them
4018 needs partial registers on v8. On v9, structures with integer
4019 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
4020 values that begin in the last fp reg [where "last fp reg" varies with the
4021 mode] will be split between that reg and memory. */
4024 function_arg_partial_nregs (cum, mode, type, named)
4025 const CUMULATIVE_ARGS *cum;
4026 enum machine_mode mode;
4027 tree type;
4028 int named;
4030 int slotno, regno, padding;
4032 /* We pass 0 for incoming_p here, it doesn't matter. */
4033 slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
4035 if (slotno == -1)
4036 return 0;
4038 if (TARGET_ARCH32)
4040 if ((slotno + (mode == BLKmode
4041 ? ROUND_ADVANCE (int_size_in_bytes (type))
4042 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
4043 > NPARM_REGS (SImode))
4044 return NPARM_REGS (SImode) - slotno;
4045 return 0;
4047 else
4049 if (type && AGGREGATE_TYPE_P (type))
4051 int size = int_size_in_bytes (type);
4052 int align = TYPE_ALIGN (type);
4054 if (align == 16)
4055 slotno += slotno & 1;
4056 if (size > 8 && size <= 16
4057 && slotno == SPARC_INT_ARG_MAX - 1)
4058 return 1;
4060 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
4061 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
4062 && ! TARGET_FPU))
4064 if (GET_MODE_ALIGNMENT (mode) == 128)
4066 slotno += slotno & 1;
4067 if (slotno == SPARC_INT_ARG_MAX - 2)
4068 return 1;
4070 else
4072 if (slotno == SPARC_INT_ARG_MAX - 1)
4073 return 1;
4076 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4078 if (GET_MODE_ALIGNMENT (mode) == 128)
4079 slotno += slotno & 1;
4080 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
4081 > SPARC_FP_ARG_MAX)
4082 return 1;
4084 return 0;
4088 /* Handle the FUNCTION_ARG_PASS_BY_REFERENCE macro.
4089 !v9: The SPARC ABI stipulates passing struct arguments (of any size) and
4090 quad-precision floats by invisible reference.
4091 v9: Aggregates greater than 16 bytes are passed by reference.
4092 For Pascal, also pass arrays by reference. */
4095 function_arg_pass_by_reference (cum, mode, type, named)
4096 const CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
4097 enum machine_mode mode;
4098 tree type;
4099 int named ATTRIBUTE_UNUSED;
4101 if (TARGET_ARCH32)
4103 return ((type && AGGREGATE_TYPE_P (type))
4104 || mode == TFmode || mode == TCmode);
4106 else
4108 return ((type && TREE_CODE (type) == ARRAY_TYPE)
4109 /* Consider complex values as aggregates, so care for TCmode. */
4110 || GET_MODE_SIZE (mode) > 16
4111 || (type && AGGREGATE_TYPE_P (type)
4112 && int_size_in_bytes (type) > 16));
4116 /* Handle the FUNCTION_ARG_ADVANCE macro.
4117 Update the data in CUM to advance over an argument
4118 of mode MODE and data type TYPE.
4119 TYPE is null for libcalls where that information may not be available. */
4121 void
4122 function_arg_advance (cum, mode, type, named)
4123 CUMULATIVE_ARGS *cum;
4124 enum machine_mode mode;
4125 tree type;
4126 int named;
4128 int slotno, regno, padding;
4130 /* We pass 0 for incoming_p here, it doesn't matter. */
4131 slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
4133 /* If register required leading padding, add it. */
4134 if (slotno != -1)
4135 cum->words += padding;
4137 if (TARGET_ARCH32)
4139 cum->words += (mode != BLKmode
4140 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
4141 : ROUND_ADVANCE (int_size_in_bytes (type)));
4143 else
4145 if (type && AGGREGATE_TYPE_P (type))
4147 int size = int_size_in_bytes (type);
4149 if (size <= 8)
4150 ++cum->words;
4151 else if (size <= 16)
4152 cum->words += 2;
4153 else /* passed by reference */
4154 ++cum->words;
4156 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
4158 cum->words += 2;
4160 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4162 cum->words += GET_MODE_SIZE (mode) / UNITS_PER_WORD;
4164 else
4166 cum->words += (mode != BLKmode
4167 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
4168 : ROUND_ADVANCE (int_size_in_bytes (type)));
4173 /* Handle the FUNCTION_ARG_PADDING macro.
4174 For the 64 bit ABI structs are always stored left shifted in their
4175 argument slot. */
4177 enum direction
4178 function_arg_padding (mode, type)
4179 enum machine_mode mode;
4180 tree type;
4182 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
4183 return upward;
4185 /* This is the default definition. */
4186 return (! BYTES_BIG_ENDIAN
4187 ? upward
4188 : ((mode == BLKmode
4189 ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
4190 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
4191 : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
4192 ? downward : upward));
4195 /* Handle FUNCTION_VALUE, FUNCTION_OUTGOING_VALUE, and LIBCALL_VALUE macros.
4196 For v9, function return values are subject to the same rules as arguments,
4197 except that up to 32-bytes may be returned in registers. */
4200 function_value (type, mode, incoming_p)
4201 tree type;
4202 enum machine_mode mode;
4203 int incoming_p;
4205 int regno;
4206 int regbase = (incoming_p
4207 ? SPARC_OUTGOING_INT_ARG_FIRST
4208 : SPARC_INCOMING_INT_ARG_FIRST);
4210 if (TARGET_ARCH64 && type)
4212 if (TREE_CODE (type) == RECORD_TYPE)
4214 /* Structures up to 32 bytes in size are passed in registers,
4215 promoted to fp registers where possible. */
4217 if (int_size_in_bytes (type) > 32)
4218 abort (); /* shouldn't get here */
4220 return function_arg_record_value (type, mode, 0, 1, regbase);
4222 else if (TREE_CODE (type) == UNION_TYPE)
4224 int bytes = int_size_in_bytes (type);
4226 if (bytes > 32)
4227 abort ();
4229 mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 0);
4233 if (TARGET_ARCH64
4234 && GET_MODE_CLASS (mode) == MODE_INT
4235 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
4236 && type && TREE_CODE (type) != UNION_TYPE)
4237 mode = DImode;
4239 if (incoming_p)
4240 regno = BASE_RETURN_VALUE_REG (mode);
4241 else
4242 regno = BASE_OUTGOING_VALUE_REG (mode);
4244 return gen_rtx_REG (mode, regno);
4247 /* Do what is necessary for `va_start'. We look at the current function
4248 to determine if stdarg or varargs is used and return the address of
4249 the first unnamed parameter. */
4252 sparc_builtin_saveregs ()
4254 int first_reg = current_function_args_info.words;
4255 rtx address;
4256 int regno;
4258 for (regno = first_reg; regno < NPARM_REGS (word_mode); regno++)
4259 emit_move_insn (gen_rtx_MEM (word_mode,
4260 gen_rtx_PLUS (Pmode,
4261 frame_pointer_rtx,
4262 GEN_INT (STACK_POINTER_OFFSET
4263 + UNITS_PER_WORD * regno))),
4264 gen_rtx_REG (word_mode,
4265 BASE_INCOMING_ARG_REG (word_mode) + regno));
4267 address = gen_rtx_PLUS (Pmode,
4268 frame_pointer_rtx,
4269 GEN_INT (STACK_POINTER_OFFSET
4270 + UNITS_PER_WORD * first_reg));
4272 if (current_function_check_memory_usage
4273 && first_reg < NPARM_REGS (word_mode))
4274 emit_library_call (chkr_set_right_libfunc, 1, VOIDmode, 3,
4275 address, ptr_mode,
4276 GEN_INT (UNITS_PER_WORD
4277 * (NPARM_REGS (word_mode) - first_reg)),
4278 TYPE_MODE (sizetype), GEN_INT (MEMORY_USE_RW),
4279 TYPE_MODE (integer_type_node));
4281 return address;
4284 /* Return the string to output a conditional branch to LABEL, which is
4285 the operand number of the label. OP is the conditional expression.
4286 XEXP (OP, 0) is assumed to be a condition code register (integer or
4287 floating point) and its mode specifies what kind of comparison we made.
4289 REVERSED is non-zero if we should reverse the sense of the comparison.
4291 ANNUL is non-zero if we should generate an annulling branch.
4293 NOOP is non-zero if we have to follow this branch by a noop.
4295 INSN, if set, is the insn. */
4297 char *
4298 output_cbranch (op, label, reversed, annul, noop, insn)
4299 rtx op;
4300 int label;
4301 int reversed, annul, noop;
4302 rtx insn;
4304 static char string[32];
4305 enum rtx_code code = GET_CODE (op);
4306 rtx cc_reg = XEXP (op, 0);
4307 enum machine_mode mode = GET_MODE (cc_reg);
4308 static char v8_labelno[] = "%lX";
4309 static char v9_icc_labelno[] = "%%icc, %lX";
4310 static char v9_xcc_labelno[] = "%%xcc, %lX";
4311 static char v9_fcc_labelno[] = "%%fccX, %lY";
4312 char *labelno;
4313 int labeloff, spaces = 8;
4315 /* ??? !v9: FP branches cannot be preceded by another floating point insn.
4316 Because there is currently no concept of pre-delay slots, we can fix
4317 this only by always emitting a nop before a floating point branch. */
4319 if ((mode == CCFPmode || mode == CCFPEmode) && ! TARGET_V9)
4320 strcpy (string, "nop\n\t");
4321 else
4322 string[0] = '\0';
4324 /* If not floating-point or if EQ or NE, we can just reverse the code. */
4325 if (reversed
4326 && ((mode != CCFPmode && mode != CCFPEmode) || code == EQ || code == NE))
4327 code = reverse_condition (code), reversed = 0;
4329 /* Start by writing the branch condition. */
4330 switch (code)
4332 case NE:
4333 if (mode == CCFPmode || mode == CCFPEmode)
4335 strcat (string, "fbne");
4336 spaces -= 4;
4338 else
4340 strcpy (string, "bne");
4341 spaces -= 3;
4343 break;
4345 case EQ:
4346 if (mode == CCFPmode || mode == CCFPEmode)
4348 strcat (string, "fbe");
4349 spaces -= 3;
4351 else
4353 strcpy (string, "be");
4354 spaces -= 2;
4356 break;
4358 case GE:
4359 if (mode == CCFPmode || mode == CCFPEmode)
4361 if (reversed)
4362 strcat (string, "fbul");
4363 else
4364 strcat (string, "fbge");
4365 spaces -= 4;
4367 else if (mode == CC_NOOVmode)
4369 strcpy (string, "bpos");
4370 spaces -= 4;
4372 else
4374 strcpy (string, "bge");
4375 spaces -= 3;
4377 break;
4379 case GT:
4380 if (mode == CCFPmode || mode == CCFPEmode)
4382 if (reversed)
4384 strcat (string, "fbule");
4385 spaces -= 5;
4387 else
4389 strcat (string, "fbg");
4390 spaces -= 3;
4393 else
4395 strcpy (string, "bg");
4396 spaces -= 2;
4398 break;
4400 case LE:
4401 if (mode == CCFPmode || mode == CCFPEmode)
4403 if (reversed)
4404 strcat (string, "fbug");
4405 else
4406 strcat (string, "fble");
4407 spaces -= 4;
4409 else
4411 strcpy (string, "ble");
4412 spaces -= 3;
4414 break;
4416 case LT:
4417 if (mode == CCFPmode || mode == CCFPEmode)
4419 if (reversed)
4421 strcat (string, "fbuge");
4422 spaces -= 5;
4424 else
4426 strcat (string, "fbl");
4427 spaces -= 3;
4430 else if (mode == CC_NOOVmode)
4432 strcpy (string, "bneg");
4433 spaces -= 4;
4435 else
4437 strcpy (string, "bl");
4438 spaces -= 2;
4440 break;
4442 case GEU:
4443 strcpy (string, "bgeu");
4444 spaces -= 4;
4445 break;
4447 case GTU:
4448 strcpy (string, "bgu");
4449 spaces -= 3;
4450 break;
4452 case LEU:
4453 strcpy (string, "bleu");
4454 spaces -= 4;
4455 break;
4457 case LTU:
4458 strcpy (string, "blu");
4459 spaces -= 3;
4460 break;
4462 default:
4463 abort ();
4466 /* Now add the annulling, the label, and a possible noop. */
4467 if (annul)
4469 strcat (string, ",a");
4470 spaces -= 2;
4473 if (! TARGET_V9)
4475 labeloff = 2;
4476 labelno = v8_labelno;
4478 else
4480 rtx note;
4482 if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
4484 strcat (string,
4485 INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
4486 spaces -= 3;
4489 labeloff = 9;
4490 if (mode == CCFPmode || mode == CCFPEmode)
4492 labeloff = 10;
4493 labelno = v9_fcc_labelno;
4494 /* Set the char indicating the number of the fcc reg to use. */
4495 labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
4497 else if (mode == CCXmode || mode == CCX_NOOVmode)
4498 labelno = v9_xcc_labelno;
4499 else
4500 labelno = v9_icc_labelno;
4502 /* Set the char indicating the number of the operand containing the
4503 label_ref. */
4504 labelno[labeloff] = label + '0';
4505 if (spaces > 0)
4506 strcat (string, "\t");
4507 else
4508 strcat (string, " ");
4509 strcat (string, labelno);
4511 if (noop)
4512 strcat (string, "\n\tnop");
4514 return string;
4517 /* Return the string to output a conditional branch to LABEL, testing
4518 register REG. LABEL is the operand number of the label; REG is the
4519 operand number of the reg. OP is the conditional expression. The mode
4520 of REG says what kind of comparison we made.
4522 REVERSED is non-zero if we should reverse the sense of the comparison.
4524 ANNUL is non-zero if we should generate an annulling branch.
4526 NOOP is non-zero if we have to follow this branch by a noop. */
4528 char *
4529 output_v9branch (op, reg, label, reversed, annul, noop, insn)
4530 rtx op;
4531 int reg, label;
4532 int reversed, annul, noop;
4533 rtx insn;
4535 static char string[20];
4536 enum rtx_code code = GET_CODE (op);
4537 enum machine_mode mode = GET_MODE (XEXP (op, 0));
4538 static char labelno[] = "%X, %lX";
4539 rtx note;
4540 int spaces = 8;
4542 /* If not floating-point or if EQ or NE, we can just reverse the code. */
4543 if (reversed)
4544 code = reverse_condition (code), reversed = 0;
4546 /* Only 64 bit versions of these instructions exist. */
4547 if (mode != DImode)
4548 abort ();
4550 /* Start by writing the branch condition. */
4552 switch (code)
4554 case NE:
4555 strcpy (string, "brnz");
4556 spaces -= 4;
4557 break;
4559 case EQ:
4560 strcpy (string, "brz");
4561 spaces -= 3;
4562 break;
4564 case GE:
4565 strcpy (string, "brgez");
4566 spaces -= 5;
4567 break;
4569 case LT:
4570 strcpy (string, "brlz");
4571 spaces -= 4;
4572 break;
4574 case LE:
4575 strcpy (string, "brlez");
4576 spaces -= 5;
4577 break;
4579 case GT:
4580 strcpy (string, "brgz");
4581 spaces -= 4;
4582 break;
4584 default:
4585 abort ();
4588 /* Now add the annulling, reg, label, and nop. */
4589 if (annul)
4591 strcat (string, ",a");
4592 spaces -= 2;
4595 if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
4597 strcat (string,
4598 INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
4599 spaces -= 3;
4602 labelno[1] = reg + '0';
4603 labelno[6] = label + '0';
4604 if (spaces > 0)
4605 strcat (string, "\t");
4606 else
4607 strcat (string, " ");
4608 strcat (string, labelno);
4610 if (noop)
4611 strcat (string, "\n\tnop");
4613 return string;
4616 /* Renumber registers in delay slot. Replace registers instead of
4617 renumbering because they may be shared.
4619 This does not handle instructions other than move. */
4621 static void
4622 epilogue_renumber (where)
4623 rtx *where;
4625 rtx x = *where;
4626 enum rtx_code code = GET_CODE (x);
4628 switch (code)
4630 case MEM:
4631 *where = x = copy_rtx (x);
4632 epilogue_renumber (&XEXP (x, 0));
4633 return;
4635 case REG:
4637 int regno = REGNO (x);
4638 if (regno > 8 && regno < 24)
4639 abort ();
4640 if (regno >= 24 && regno < 32)
4641 *where = gen_rtx_REG (GET_MODE (x), regno - 16);
4642 return;
4644 case CONST_INT:
4645 case CONST_DOUBLE:
4646 case CONST:
4647 case SYMBOL_REF:
4648 case LABEL_REF:
4649 return;
4651 case IOR:
4652 case AND:
4653 case XOR:
4654 case PLUS:
4655 case MINUS:
4656 epilogue_renumber (&XEXP (x, 1));
4657 case NEG:
4658 case NOT:
4659 epilogue_renumber (&XEXP (x, 0));
4660 return;
4662 default:
4663 debug_rtx (*where);
4664 abort ();
4668 /* Output assembler code to return from a function. */
4670 const char *
4671 output_return (operands)
4672 rtx *operands;
4674 rtx delay = final_sequence ? XVECEXP (final_sequence, 0, 1) : 0;
4676 if (leaf_label)
4678 operands[0] = leaf_label;
4679 return "b%* %l0%(";
4681 else if (current_function_uses_only_leaf_regs)
4683 /* No delay slot in a leaf function. */
4684 if (delay)
4685 abort ();
4687 /* If we didn't allocate a frame pointer for the current function,
4688 the stack pointer might have been adjusted. Output code to
4689 restore it now. */
4691 operands[0] = GEN_INT (actual_fsize);
4693 /* Use sub of negated value in first two cases instead of add to
4694 allow actual_fsize == 4096. */
4696 if (actual_fsize <= 4096)
4698 if (SKIP_CALLERS_UNIMP_P)
4699 return "jmp\t%%o7+12\n\tsub\t%%sp, -%0, %%sp";
4700 else
4701 return "retl\n\tsub\t%%sp, -%0, %%sp";
4703 else if (actual_fsize <= 8192)
4705 operands[0] = GEN_INT (actual_fsize - 4096);
4706 if (SKIP_CALLERS_UNIMP_P)
4707 return "sub\t%%sp, -4096, %%sp\n\tjmp\t%%o7+12\n\tsub\t%%sp, -%0, %%sp";
4708 else
4709 return "sub\t%%sp, -4096, %%sp\n\tretl\n\tsub\t%%sp, -%0, %%sp";
4711 else if (SKIP_CALLERS_UNIMP_P)
4713 if ((actual_fsize & 0x3ff) != 0)
4714 return "sethi\t%%hi(%a0), %%g1\n\tor\t%%g1, %%lo(%a0), %%g1\n\tjmp\t%%o7+12\n\tadd\t%%sp, %%g1, %%sp";
4715 else
4716 return "sethi\t%%hi(%a0), %%g1\n\tjmp\t%%o7+12\n\tadd\t%%sp, %%g1, %%sp";
4718 else
4720 if ((actual_fsize & 0x3ff) != 0)
4721 return "sethi %%hi(%a0),%%g1\n\tor %%g1,%%lo(%a0),%%g1\n\tretl\n\tadd %%sp,%%g1,%%sp";
4722 else
4723 return "sethi %%hi(%a0),%%g1\n\tretl\n\tadd %%sp,%%g1,%%sp";
4726 else if (TARGET_V9)
4728 if (delay)
4730 epilogue_renumber (&SET_DEST (PATTERN (delay)));
4731 epilogue_renumber (&SET_SRC (PATTERN (delay)));
4733 if (SKIP_CALLERS_UNIMP_P)
4734 return "return\t%%i7+12%#";
4735 else
4736 return "return\t%%i7+8%#";
4738 else
4740 if (delay)
4741 abort ();
4742 if (SKIP_CALLERS_UNIMP_P)
4743 return "jmp\t%%i7+12\n\trestore";
4744 else
4745 return "ret\n\trestore";
4749 /* Leaf functions and non-leaf functions have different needs. */
4751 static int
4752 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
4754 static int
4755 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
4757 static int *reg_alloc_orders[] = {
4758 reg_leaf_alloc_order,
4759 reg_nonleaf_alloc_order};
4761 void
4762 order_regs_for_local_alloc ()
4764 static int last_order_nonleaf = 1;
4766 if (regs_ever_live[15] != last_order_nonleaf)
4768 last_order_nonleaf = !last_order_nonleaf;
4769 bcopy ((char *) reg_alloc_orders[last_order_nonleaf],
4770 (char *) reg_alloc_order, FIRST_PSEUDO_REGISTER * sizeof (int));
4774 /* Return 1 if REG and MEM are legitimate enough to allow the various
4775 mem<-->reg splits to be run. */
4778 sparc_splitdi_legitimate (reg, mem)
4779 rtx reg;
4780 rtx mem;
4782 /* Punt if we are here by mistake. */
4783 if (! reload_completed)
4784 abort ();
4786 /* We must have an offsettable memory reference. */
4787 if (! offsettable_memref_p (mem))
4788 return 0;
4790 /* If we have legitimate args for ldd/std, we do not want
4791 the split to happen. */
4792 if ((REGNO (reg) % 2) == 0
4793 && mem_min_alignment (mem, 8))
4794 return 0;
4796 /* Success. */
4797 return 1;
4800 /* Return 1 if x and y are some kind of REG and they refer to
4801 different hard registers. This test is guarenteed to be
4802 run after reload. */
4805 sparc_absnegfloat_split_legitimate (x, y)
4806 rtx x, y;
4808 if (GET_CODE (x) == SUBREG)
4809 x = alter_subreg (x);
4810 if (GET_CODE (x) != REG)
4811 return 0;
4812 if (GET_CODE (y) == SUBREG)
4813 y = alter_subreg (y);
4814 if (GET_CODE (y) != REG)
4815 return 0;
4816 if (REGNO (x) == REGNO (y))
4817 return 0;
4818 return 1;
4821 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
4822 This makes them candidates for using ldd and std insns.
4824 Note reg1 and reg2 *must* be hard registers. */
4827 registers_ok_for_ldd_peep (reg1, reg2)
4828 rtx reg1, reg2;
4830 /* We might have been passed a SUBREG. */
4831 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
4832 return 0;
4834 if (REGNO (reg1) % 2 != 0)
4835 return 0;
4837 /* Integer ldd is deprecated in SPARC V9 */
4838 if (TARGET_V9 && REGNO (reg1) < 32)
4839 return 0;
4841 return (REGNO (reg1) == REGNO (reg2) - 1);
4844 /* Return 1 if addr1 and addr2 are suitable for use in an ldd or
4845 std insn.
4847 This can only happen when addr1 and addr2 are consecutive memory
4848 locations (addr1 + 4 == addr2). addr1 must also be aligned on a
4849 64 bit boundary (addr1 % 8 == 0).
4851 We know %sp and %fp are kept aligned on a 64 bit boundary. Other
4852 registers are assumed to *never* be properly aligned and are
4853 rejected.
4855 Knowing %sp and %fp are kept aligned on a 64 bit boundary, we
4856 need only check that the offset for addr1 % 8 == 0. */
4859 addrs_ok_for_ldd_peep (addr1, addr2)
4860 rtx addr1, addr2;
4862 int reg1, offset1;
4864 /* Extract a register number and offset (if used) from the first addr. */
4865 if (GET_CODE (addr1) == PLUS)
4867 /* If not a REG, return zero. */
4868 if (GET_CODE (XEXP (addr1, 0)) != REG)
4869 return 0;
4870 else
4872 reg1 = REGNO (XEXP (addr1, 0));
4873 /* The offset must be constant! */
4874 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
4875 return 0;
4876 offset1 = INTVAL (XEXP (addr1, 1));
4879 else if (GET_CODE (addr1) != REG)
4880 return 0;
4881 else
4883 reg1 = REGNO (addr1);
4884 /* This was a simple (mem (reg)) expression. Offset is 0. */
4885 offset1 = 0;
4888 /* Make sure the second address is a (mem (plus (reg) (const_int). */
4889 if (GET_CODE (addr2) != PLUS)
4890 return 0;
4892 if (GET_CODE (XEXP (addr2, 0)) != REG
4893 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
4894 return 0;
4896 /* Only %fp and %sp are allowed. Additionally both addresses must
4897 use the same register. */
4898 if (reg1 != FRAME_POINTER_REGNUM && reg1 != STACK_POINTER_REGNUM)
4899 return 0;
4901 if (reg1 != REGNO (XEXP (addr2, 0)))
4902 return 0;
4904 /* The first offset must be evenly divisible by 8 to ensure the
4905 address is 64 bit aligned. */
4906 if (offset1 % 8 != 0)
4907 return 0;
4909 /* The offset for the second addr must be 4 more than the first addr. */
4910 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
4911 return 0;
4913 /* All the tests passed. addr1 and addr2 are valid for ldd and std
4914 instructions. */
4915 return 1;
4918 /* Return 1 if reg is a pseudo, or is the first register in
4919 a hard register pair. This makes it a candidate for use in
4920 ldd and std insns. */
4923 register_ok_for_ldd (reg)
4924 rtx reg;
4926 /* We might have been passed a SUBREG. */
4927 if (GET_CODE (reg) != REG)
4928 return 0;
4930 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
4931 return (REGNO (reg) % 2 == 0);
4932 else
4933 return 1;
4936 /* Print operand X (an rtx) in assembler syntax to file FILE.
4937 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4938 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4940 void
4941 print_operand (file, x, code)
4942 FILE *file;
4943 rtx x;
4944 int code;
4946 switch (code)
4948 case '#':
4949 /* Output a 'nop' if there's nothing for the delay slot. */
4950 if (dbr_sequence_length () == 0)
4951 fputs ("\n\t nop", file);
4952 return;
4953 case '*':
4954 /* Output an annul flag if there's nothing for the delay slot and we
4955 are optimizing. This is always used with '(' below. */
4956 /* Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
4957 this is a dbx bug. So, we only do this when optimizing. */
4958 /* On UltraSPARC, a branch in a delay slot causes a pipeline flush.
4959 Always emit a nop in case the next instruction is a branch. */
4960 if (dbr_sequence_length () == 0
4961 && (optimize && (int)sparc_cpu < PROCESSOR_V9))
4962 fputs (",a", file);
4963 return;
4964 case '(':
4965 /* Output a 'nop' if there's nothing for the delay slot and we are
4966 not optimizing. This is always used with '*' above. */
4967 if (dbr_sequence_length () == 0
4968 && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
4969 fputs ("\n\t nop", file);
4970 return;
4971 case '_':
4972 /* Output the Embedded Medium/Anywhere code model base register. */
4973 fputs (EMBMEDANY_BASE_REG, file);
4974 return;
4975 case '@':
4976 /* Print out what we are using as the frame pointer. This might
4977 be %fp, or might be %sp+offset. */
4978 /* ??? What if offset is too big? Perhaps the caller knows it isn't? */
4979 fprintf (file, "%s+%d", frame_base_name, frame_base_offset);
4980 return;
4981 case 'Y':
4982 /* Adjust the operand to take into account a RESTORE operation. */
4983 if (GET_CODE (x) == CONST_INT)
4984 break;
4985 else if (GET_CODE (x) != REG)
4986 output_operand_lossage ("Invalid %%Y operand");
4987 else if (REGNO (x) < 8)
4988 fputs (reg_names[REGNO (x)], file);
4989 else if (REGNO (x) >= 24 && REGNO (x) < 32)
4990 fputs (reg_names[REGNO (x)-16], file);
4991 else
4992 output_operand_lossage ("Invalid %%Y operand");
4993 return;
4994 case 'L':
4995 /* Print out the low order register name of a register pair. */
4996 if (WORDS_BIG_ENDIAN)
4997 fputs (reg_names[REGNO (x)+1], file);
4998 else
4999 fputs (reg_names[REGNO (x)], file);
5000 return;
5001 case 'H':
5002 /* Print out the high order register name of a register pair. */
5003 if (WORDS_BIG_ENDIAN)
5004 fputs (reg_names[REGNO (x)], file);
5005 else
5006 fputs (reg_names[REGNO (x)+1], file);
5007 return;
5008 case 'R':
5009 /* Print out the second register name of a register pair or quad.
5010 I.e., R (%o0) => %o1. */
5011 fputs (reg_names[REGNO (x)+1], file);
5012 return;
5013 case 'S':
5014 /* Print out the third register name of a register quad.
5015 I.e., S (%o0) => %o2. */
5016 fputs (reg_names[REGNO (x)+2], file);
5017 return;
5018 case 'T':
5019 /* Print out the fourth register name of a register quad.
5020 I.e., T (%o0) => %o3. */
5021 fputs (reg_names[REGNO (x)+3], file);
5022 return;
5023 case 'x':
5024 /* Print a condition code register. */
5025 if (REGNO (x) == SPARC_ICC_REG)
5027 /* We don't handle CC[X]_NOOVmode because they're not supposed
5028 to occur here. */
5029 if (GET_MODE (x) == CCmode)
5030 fputs ("%icc", file);
5031 else if (GET_MODE (x) == CCXmode)
5032 fputs ("%xcc", file);
5033 else
5034 abort ();
5036 else
5037 /* %fccN register */
5038 fputs (reg_names[REGNO (x)], file);
5039 return;
5040 case 'm':
5041 /* Print the operand's address only. */
5042 output_address (XEXP (x, 0));
5043 return;
5044 case 'r':
5045 /* In this case we need a register. Use %g0 if the
5046 operand is const0_rtx. */
5047 if (x == const0_rtx
5048 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
5050 fputs ("%g0", file);
5051 return;
5053 else
5054 break;
5056 case 'A':
5057 switch (GET_CODE (x))
5059 case IOR: fputs ("or", file); break;
5060 case AND: fputs ("and", file); break;
5061 case XOR: fputs ("xor", file); break;
5062 default: output_operand_lossage ("Invalid %%A operand");
5064 return;
5066 case 'B':
5067 switch (GET_CODE (x))
5069 case IOR: fputs ("orn", file); break;
5070 case AND: fputs ("andn", file); break;
5071 case XOR: fputs ("xnor", file); break;
5072 default: output_operand_lossage ("Invalid %%B operand");
5074 return;
5076 /* These are used by the conditional move instructions. */
5077 case 'c' :
5078 case 'C':
5080 enum rtx_code rc = (code == 'c'
5081 ? reverse_condition (GET_CODE (x))
5082 : GET_CODE (x));
5083 switch (rc)
5085 case NE: fputs ("ne", file); break;
5086 case EQ: fputs ("e", file); break;
5087 case GE: fputs ("ge", file); break;
5088 case GT: fputs ("g", file); break;
5089 case LE: fputs ("le", file); break;
5090 case LT: fputs ("l", file); break;
5091 case GEU: fputs ("geu", file); break;
5092 case GTU: fputs ("gu", file); break;
5093 case LEU: fputs ("leu", file); break;
5094 case LTU: fputs ("lu", file); break;
5095 default: output_operand_lossage (code == 'c'
5096 ? "Invalid %%c operand"
5097 : "Invalid %%C operand");
5099 return;
5102 /* These are used by the movr instruction pattern. */
5103 case 'd':
5104 case 'D':
5106 enum rtx_code rc = (code == 'd'
5107 ? reverse_condition (GET_CODE (x))
5108 : GET_CODE (x));
5109 switch (rc)
5111 case NE: fputs ("ne", file); break;
5112 case EQ: fputs ("e", file); break;
5113 case GE: fputs ("gez", file); break;
5114 case LT: fputs ("lz", file); break;
5115 case LE: fputs ("lez", file); break;
5116 case GT: fputs ("gz", file); break;
5117 default: output_operand_lossage (code == 'd'
5118 ? "Invalid %%d operand"
5119 : "Invalid %%D operand");
5121 return;
5124 case 'b':
5126 /* Print a sign-extended character. */
5127 int i = INTVAL (x) & 0xff;
5128 if (i & 0x80)
5129 i |= 0xffffff00;
5130 fprintf (file, "%d", i);
5131 return;
5134 case 'f':
5135 /* Operand must be a MEM; write its address. */
5136 if (GET_CODE (x) != MEM)
5137 output_operand_lossage ("Invalid %%f operand");
5138 output_address (XEXP (x, 0));
5139 return;
5141 case 0:
5142 /* Do nothing special. */
5143 break;
5145 default:
5146 /* Undocumented flag. */
5147 output_operand_lossage ("invalid operand output code");
5150 if (GET_CODE (x) == REG)
5151 fputs (reg_names[REGNO (x)], file);
5152 else if (GET_CODE (x) == MEM)
5154 fputc ('[', file);
5155 /* Poor Sun assembler doesn't understand absolute addressing. */
5156 if (CONSTANT_P (XEXP (x, 0))
5157 && ! TARGET_LIVE_G0)
5158 fputs ("%g0+", file);
5159 output_address (XEXP (x, 0));
5160 fputc (']', file);
5162 else if (GET_CODE (x) == HIGH)
5164 fputs ("%hi(", file);
5165 output_addr_const (file, XEXP (x, 0));
5166 fputc (')', file);
5168 else if (GET_CODE (x) == LO_SUM)
5170 print_operand (file, XEXP (x, 0), 0);
5171 if (TARGET_CM_MEDMID)
5172 fputs ("+%l44(", file);
5173 else
5174 fputs ("+%lo(", file);
5175 output_addr_const (file, XEXP (x, 1));
5176 fputc (')', file);
5178 else if (GET_CODE (x) == CONST_DOUBLE
5179 && (GET_MODE (x) == VOIDmode
5180 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
5182 if (CONST_DOUBLE_HIGH (x) == 0)
5183 fprintf (file, "%u", CONST_DOUBLE_LOW (x));
5184 else if (CONST_DOUBLE_HIGH (x) == -1
5185 && CONST_DOUBLE_LOW (x) < 0)
5186 fprintf (file, "%d", CONST_DOUBLE_LOW (x));
5187 else
5188 output_operand_lossage ("long long constant not a valid immediate operand");
5190 else if (GET_CODE (x) == CONST_DOUBLE)
5191 output_operand_lossage ("floating point constant not a valid immediate operand");
5192 else { output_addr_const (file, x); }
5195 /* This function outputs assembler code for VALUE to FILE, where VALUE is
5196 a 64 bit (DImode) value. */
5198 /* ??? If there is a 64 bit counterpart to .word that the assembler
5199 understands, then using that would simply this code greatly. */
5200 /* ??? We only output .xword's for symbols and only then in environments
5201 where the assembler can handle them. */
5203 void
5204 output_double_int (file, value)
5205 FILE *file;
5206 rtx value;
5208 if (GET_CODE (value) == CONST_INT)
5210 /* ??? This has endianness issues. */
5211 #if HOST_BITS_PER_WIDE_INT == 64
5212 HOST_WIDE_INT xword = INTVAL (value);
5213 HOST_WIDE_INT high, low;
5215 high = (xword >> 32) & 0xffffffff;
5216 low = xword & 0xffffffff;
5217 ASM_OUTPUT_INT (file, GEN_INT (high));
5218 ASM_OUTPUT_INT (file, GEN_INT (low));
5219 #else
5220 if (INTVAL (value) < 0)
5221 ASM_OUTPUT_INT (file, constm1_rtx);
5222 else
5223 ASM_OUTPUT_INT (file, const0_rtx);
5224 ASM_OUTPUT_INT (file, value);
5225 #endif
5227 else if (GET_CODE (value) == CONST_DOUBLE)
5229 ASM_OUTPUT_INT (file, GEN_INT (CONST_DOUBLE_HIGH (value)));
5230 ASM_OUTPUT_INT (file, GEN_INT (CONST_DOUBLE_LOW (value)));
5232 else if (GET_CODE (value) == SYMBOL_REF
5233 || GET_CODE (value) == CONST
5234 || GET_CODE (value) == PLUS
5235 || (TARGET_ARCH64 &&
5236 (GET_CODE (value) == LABEL_REF
5237 || GET_CODE (value) == CODE_LABEL
5238 || GET_CODE (value) == MINUS)))
5240 if (! TARGET_V9)
5242 ASM_OUTPUT_INT (file, const0_rtx);
5243 ASM_OUTPUT_INT (file, value);
5245 else
5247 fprintf (file, "\t%s\t", ASM_LONGLONG);
5248 output_addr_const (file, value);
5249 fprintf (file, "\n");
5252 else
5253 abort ();
5256 /* Return the value of a code used in the .proc pseudo-op that says
5257 what kind of result this function returns. For non-C types, we pick
5258 the closest C type. */
5260 #ifndef CHAR_TYPE_SIZE
5261 #define CHAR_TYPE_SIZE BITS_PER_UNIT
5262 #endif
5264 #ifndef SHORT_TYPE_SIZE
5265 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
5266 #endif
5268 #ifndef INT_TYPE_SIZE
5269 #define INT_TYPE_SIZE BITS_PER_WORD
5270 #endif
5272 #ifndef LONG_TYPE_SIZE
5273 #define LONG_TYPE_SIZE BITS_PER_WORD
5274 #endif
5276 #ifndef LONG_LONG_TYPE_SIZE
5277 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
5278 #endif
5280 #ifndef FLOAT_TYPE_SIZE
5281 #define FLOAT_TYPE_SIZE BITS_PER_WORD
5282 #endif
5284 #ifndef DOUBLE_TYPE_SIZE
5285 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
5286 #endif
5288 #ifndef LONG_DOUBLE_TYPE_SIZE
5289 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
5290 #endif
5292 unsigned long
5293 sparc_type_code (type)
5294 register tree type;
5296 register unsigned long qualifiers = 0;
5297 register unsigned shift;
5299 /* Only the first 30 bits of the qualifier are valid. We must refrain from
5300 setting more, since some assemblers will give an error for this. Also,
5301 we must be careful to avoid shifts of 32 bits or more to avoid getting
5302 unpredictable results. */
5304 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
5306 switch (TREE_CODE (type))
5308 case ERROR_MARK:
5309 return qualifiers;
5311 case ARRAY_TYPE:
5312 qualifiers |= (3 << shift);
5313 break;
5315 case FUNCTION_TYPE:
5316 case METHOD_TYPE:
5317 qualifiers |= (2 << shift);
5318 break;
5320 case POINTER_TYPE:
5321 case REFERENCE_TYPE:
5322 case OFFSET_TYPE:
5323 qualifiers |= (1 << shift);
5324 break;
5326 case RECORD_TYPE:
5327 return (qualifiers | 8);
5329 case UNION_TYPE:
5330 case QUAL_UNION_TYPE:
5331 return (qualifiers | 9);
5333 case ENUMERAL_TYPE:
5334 return (qualifiers | 10);
5336 case VOID_TYPE:
5337 return (qualifiers | 16);
5339 case INTEGER_TYPE:
5340 /* If this is a range type, consider it to be the underlying
5341 type. */
5342 if (TREE_TYPE (type) != 0)
5343 break;
5345 /* Carefully distinguish all the standard types of C,
5346 without messing up if the language is not C. We do this by
5347 testing TYPE_PRECISION and TREE_UNSIGNED. The old code used to
5348 look at both the names and the above fields, but that's redundant.
5349 Any type whose size is between two C types will be considered
5350 to be the wider of the two types. Also, we do not have a
5351 special code to use for "long long", so anything wider than
5352 long is treated the same. Note that we can't distinguish
5353 between "int" and "long" in this code if they are the same
5354 size, but that's fine, since neither can the assembler. */
5356 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
5357 return (qualifiers | (TREE_UNSIGNED (type) ? 12 : 2));
5359 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
5360 return (qualifiers | (TREE_UNSIGNED (type) ? 13 : 3));
5362 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
5363 return (qualifiers | (TREE_UNSIGNED (type) ? 14 : 4));
5365 else
5366 return (qualifiers | (TREE_UNSIGNED (type) ? 15 : 5));
5368 case REAL_TYPE:
5369 /* If this is a range type, consider it to be the underlying
5370 type. */
5371 if (TREE_TYPE (type) != 0)
5372 break;
5374 /* Carefully distinguish all the standard types of C,
5375 without messing up if the language is not C. */
5377 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
5378 return (qualifiers | 6);
5380 else
5381 return (qualifiers | 7);
5383 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
5384 /* ??? We need to distinguish between double and float complex types,
5385 but I don't know how yet because I can't reach this code from
5386 existing front-ends. */
5387 return (qualifiers | 7); /* Who knows? */
5389 case CHAR_TYPE: /* GNU Pascal CHAR type. Not used in C. */
5390 case BOOLEAN_TYPE: /* GNU Fortran BOOLEAN type. */
5391 case FILE_TYPE: /* GNU Pascal FILE type. */
5392 case SET_TYPE: /* GNU Pascal SET type. */
5393 case LANG_TYPE: /* ? */
5394 return qualifiers;
5396 default:
5397 abort (); /* Not a type! */
5401 return qualifiers;
5404 /* Nested function support. */
5406 /* Emit RTL insns to initialize the variable parts of a trampoline.
5407 FNADDR is an RTX for the address of the function's pure code.
5408 CXT is an RTX for the static chain value for the function.
5410 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
5411 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
5412 (to store insns). This is a bit excessive. Perhaps a different
5413 mechanism would be better here.
5415 Emit enough FLUSH insns to synchronize the data and instruction caches. */
5417 void
5418 sparc_initialize_trampoline (tramp, fnaddr, cxt)
5419 rtx tramp, fnaddr, cxt;
5421 /* SPARC 32 bit trampoline:
5423 sethi %hi(fn), %g1
5424 sethi %hi(static), %g2
5425 jmp %g1+%lo(fn)
5426 or %g2, %lo(static), %g2
5428 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
5429 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
5431 #ifdef TRANSFER_FROM_TRAMPOLINE
5432 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
5433 0, VOIDmode, 1, tramp, Pmode);
5434 #endif
5436 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 0)),
5437 expand_binop (SImode, ior_optab,
5438 expand_shift (RSHIFT_EXPR, SImode, fnaddr,
5439 size_int (10), 0, 1),
5440 GEN_INT (0x03000000),
5441 NULL_RTX, 1, OPTAB_DIRECT));
5443 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
5444 expand_binop (SImode, ior_optab,
5445 expand_shift (RSHIFT_EXPR, SImode, cxt,
5446 size_int (10), 0, 1),
5447 GEN_INT (0x05000000),
5448 NULL_RTX, 1, OPTAB_DIRECT));
5450 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
5451 expand_binop (SImode, ior_optab,
5452 expand_and (fnaddr, GEN_INT (0x3ff), NULL_RTX),
5453 GEN_INT (0x81c06000),
5454 NULL_RTX, 1, OPTAB_DIRECT));
5456 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
5457 expand_binop (SImode, ior_optab,
5458 expand_and (cxt, GEN_INT (0x3ff), NULL_RTX),
5459 GEN_INT (0x8410a000),
5460 NULL_RTX, 1, OPTAB_DIRECT));
5462 emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, tramp))));
5463 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
5464 aligned on a 16 byte boundary so one flush clears it all. */
5465 if (sparc_cpu != PROCESSOR_ULTRASPARC)
5466 emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
5467 plus_constant (tramp, 8)))));
5470 /* The 64 bit version is simpler because it makes more sense to load the
5471 values as "immediate" data out of the trampoline. It's also easier since
5472 we can read the PC without clobbering a register. */
5474 void
5475 sparc64_initialize_trampoline (tramp, fnaddr, cxt)
5476 rtx tramp, fnaddr, cxt;
5478 #ifdef TRANSFER_FROM_TRAMPOLINE
5479 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
5480 0, VOIDmode, 1, tramp, Pmode);
5481 #endif
5484 rd %pc, %g1
5485 ldx [%g1+24], %g5
5486 jmp %g5
5487 ldx [%g1+16], %g5
5488 +16 bytes data
5491 emit_move_insn (gen_rtx_MEM (SImode, tramp),
5492 GEN_INT (0x83414000));
5493 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
5494 GEN_INT (0xca586018));
5495 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
5496 GEN_INT (0x81c14000));
5497 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
5498 GEN_INT (0xca586010));
5499 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), cxt);
5500 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 24)), fnaddr);
5501 emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, tramp))));
5503 if (sparc_cpu != PROCESSOR_ULTRASPARC)
5504 emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8)))));
5507 /* Subroutines to support a flat (single) register window calling
5508 convention. */
5510 /* Single-register window sparc stack frames look like:
5512 Before call After call
5513 +-----------------------+ +-----------------------+
5514 high | | | |
5515 mem | caller's temps. | | caller's temps. |
5516 | | | |
5517 +-----------------------+ +-----------------------+
5518 | | | |
5519 | arguments on stack. | | arguments on stack. |
5520 | | | |
5521 +-----------------------+FP+92->+-----------------------+
5522 | 6 words to save | | 6 words to save |
5523 | arguments passed | | arguments passed |
5524 | in registers, even | | in registers, even |
5525 | if not passed. | | if not passed. |
5526 SP+68->+-----------------------+FP+68->+-----------------------+
5527 | 1 word struct addr | | 1 word struct addr |
5528 +-----------------------+FP+64->+-----------------------+
5529 | | | |
5530 | 16 word reg save area | | 16 word reg save area |
5531 | | | |
5532 SP->+-----------------------+ FP->+-----------------------+
5533 | 4 word area for |
5534 | fp/alu reg moves |
5535 FP-16->+-----------------------+
5537 | local variables |
5539 +-----------------------+
5541 | fp register save |
5543 +-----------------------+
5545 | gp register save |
5547 +-----------------------+
5549 | alloca allocations |
5551 +-----------------------+
5553 | arguments on stack |
5555 SP+92->+-----------------------+
5556 | 6 words to save |
5557 | arguments passed |
5558 | in registers, even |
5559 low | if not passed. |
5560 memory SP+68->+-----------------------+
5561 | 1 word struct addr |
5562 SP+64->+-----------------------+
5564 I 16 word reg save area |
5566 SP->+-----------------------+ */
5568 /* Structure to be filled in by sparc_flat_compute_frame_size with register
5569 save masks, and offsets for the current function. */
5571 struct sparc_frame_info
5573 unsigned long total_size; /* # bytes that the entire frame takes up. */
5574 unsigned long var_size; /* # bytes that variables take up. */
5575 unsigned long args_size; /* # bytes that outgoing arguments take up. */
5576 unsigned long extra_size; /* # bytes of extra gunk. */
5577 unsigned int gp_reg_size; /* # bytes needed to store gp regs. */
5578 unsigned int fp_reg_size; /* # bytes needed to store fp regs. */
5579 unsigned long gmask; /* Mask of saved gp registers. */
5580 unsigned long fmask; /* Mask of saved fp registers. */
5581 unsigned long reg_offset; /* Offset from new sp to store regs. */
5582 int initialized; /* Nonzero if frame size already calculated. */
5585 /* Current frame information calculated by sparc_flat_compute_frame_size. */
5586 struct sparc_frame_info current_frame_info;
5588 /* Zero structure to initialize current_frame_info. */
5589 struct sparc_frame_info zero_frame_info;
5591 /* Tell prologue and epilogue if register REGNO should be saved / restored. */
5593 #define RETURN_ADDR_REGNUM 15
5594 #define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM))
5595 #define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM))
5597 #define MUST_SAVE_REGISTER(regno) \
5598 ((regs_ever_live[regno] && !call_used_regs[regno]) \
5599 || (regno == FRAME_POINTER_REGNUM && frame_pointer_needed) \
5600 || (regno == RETURN_ADDR_REGNUM && regs_ever_live[RETURN_ADDR_REGNUM]))
5602 /* Return the bytes needed to compute the frame pointer from the current
5603 stack pointer. */
5605 unsigned long
5606 sparc_flat_compute_frame_size (size)
5607 int size; /* # of var. bytes allocated. */
5609 int regno;
5610 unsigned long total_size; /* # bytes that the entire frame takes up. */
5611 unsigned long var_size; /* # bytes that variables take up. */
5612 unsigned long args_size; /* # bytes that outgoing arguments take up. */
5613 unsigned long extra_size; /* # extra bytes. */
5614 unsigned int gp_reg_size; /* # bytes needed to store gp regs. */
5615 unsigned int fp_reg_size; /* # bytes needed to store fp regs. */
5616 unsigned long gmask; /* Mask of saved gp registers. */
5617 unsigned long fmask; /* Mask of saved fp registers. */
5618 unsigned long reg_offset; /* Offset to register save area. */
5619 int need_aligned_p; /* 1 if need the save area 8 byte aligned. */
5621 /* This is the size of the 16 word reg save area, 1 word struct addr
5622 area, and 4 word fp/alu register copy area. */
5623 extra_size = -STARTING_FRAME_OFFSET + FIRST_PARM_OFFSET(0);
5624 var_size = size;
5625 gp_reg_size = 0;
5626 fp_reg_size = 0;
5627 gmask = 0;
5628 fmask = 0;
5629 reg_offset = 0;
5630 need_aligned_p = 0;
5632 args_size = 0;
5633 if (!leaf_function_p ())
5635 /* Also include the size needed for the 6 parameter registers. */
5636 args_size = current_function_outgoing_args_size + 24;
5638 total_size = var_size + args_size;
5640 /* Calculate space needed for gp registers. */
5641 for (regno = 1; regno <= 31; regno++)
5643 if (MUST_SAVE_REGISTER (regno))
5645 /* If we need to save two regs in a row, ensure there's room to bump
5646 up the address to align it to a doubleword boundary. */
5647 if ((regno & 0x1) == 0 && MUST_SAVE_REGISTER (regno+1))
5649 if (gp_reg_size % 8 != 0)
5650 gp_reg_size += 4;
5651 gp_reg_size += 2 * UNITS_PER_WORD;
5652 gmask |= 3 << regno;
5653 regno++;
5654 need_aligned_p = 1;
5656 else
5658 gp_reg_size += UNITS_PER_WORD;
5659 gmask |= 1 << regno;
5664 /* Calculate space needed for fp registers. */
5665 for (regno = 32; regno <= 63; regno++)
5667 if (regs_ever_live[regno] && !call_used_regs[regno])
5669 fp_reg_size += UNITS_PER_WORD;
5670 fmask |= 1 << (regno - 32);
5674 if (gmask || fmask)
5676 int n;
5677 reg_offset = FIRST_PARM_OFFSET(0) + args_size;
5678 /* Ensure save area is 8 byte aligned if we need it. */
5679 n = reg_offset % 8;
5680 if (need_aligned_p && n != 0)
5682 total_size += 8 - n;
5683 reg_offset += 8 - n;
5685 total_size += gp_reg_size + fp_reg_size;
5688 /* If we must allocate a stack frame at all, we must also allocate
5689 room for register window spillage, so as to be binary compatible
5690 with libraries and operating systems that do not use -mflat. */
5691 if (total_size > 0)
5692 total_size += extra_size;
5693 else
5694 extra_size = 0;
5696 total_size = SPARC_STACK_ALIGN (total_size);
5698 /* Save other computed information. */
5699 current_frame_info.total_size = total_size;
5700 current_frame_info.var_size = var_size;
5701 current_frame_info.args_size = args_size;
5702 current_frame_info.extra_size = extra_size;
5703 current_frame_info.gp_reg_size = gp_reg_size;
5704 current_frame_info.fp_reg_size = fp_reg_size;
5705 current_frame_info.gmask = gmask;
5706 current_frame_info.fmask = fmask;
5707 current_frame_info.reg_offset = reg_offset;
5708 current_frame_info.initialized = reload_completed;
5710 /* Ok, we're done. */
5711 return total_size;
5714 /* Save/restore registers in GMASK and FMASK at register BASE_REG plus offset
5715 OFFSET.
5717 BASE_REG must be 8 byte aligned. This allows us to test OFFSET for
5718 appropriate alignment and use DOUBLEWORD_OP when we can. We assume
5719 [BASE_REG+OFFSET] will always be a valid address.
5721 WORD_OP is either "st" for save, "ld" for restore.
5722 DOUBLEWORD_OP is either "std" for save, "ldd" for restore. */
5724 void
5725 sparc_flat_save_restore (file, base_reg, offset, gmask, fmask, word_op,
5726 doubleword_op, base_offset)
5727 FILE *file;
5728 char *base_reg;
5729 unsigned int offset;
5730 unsigned long gmask;
5731 unsigned long fmask;
5732 char *word_op;
5733 char *doubleword_op;
5734 unsigned long base_offset;
5736 int regno;
5738 if (gmask == 0 && fmask == 0)
5739 return;
5741 /* Save registers starting from high to low. We've already saved the
5742 previous frame pointer and previous return address for the debugger's
5743 sake. The debugger allows us to not need a nop in the epilog if at least
5744 one register is reloaded in addition to return address. */
5746 if (gmask)
5748 for (regno = 1; regno <= 31; regno++)
5750 if ((gmask & (1L << regno)) != 0)
5752 if ((regno & 0x1) == 0 && ((gmask & (1L << (regno+1))) != 0))
5754 /* We can save two registers in a row. If we're not at a
5755 double word boundary, move to one.
5756 sparc_flat_compute_frame_size ensures there's room to do
5757 this. */
5758 if (offset % 8 != 0)
5759 offset += UNITS_PER_WORD;
5761 if (word_op[0] == 's')
5763 fprintf (file, "\t%s\t%s, [%s+%d]\n",
5764 doubleword_op, reg_names[regno],
5765 base_reg, offset);
5766 if (dwarf2out_do_frame ())
5768 char *l = dwarf2out_cfi_label ();
5769 dwarf2out_reg_save (l, regno, offset + base_offset);
5770 dwarf2out_reg_save
5771 (l, regno+1, offset+base_offset + UNITS_PER_WORD);
5774 else
5775 fprintf (file, "\t%s\t[%s+%d], %s\n",
5776 doubleword_op, base_reg, offset,
5777 reg_names[regno]);
5779 offset += 2 * UNITS_PER_WORD;
5780 regno++;
5782 else
5784 if (word_op[0] == 's')
5786 fprintf (file, "\t%s\t%s, [%s+%d]\n",
5787 word_op, reg_names[regno],
5788 base_reg, offset);
5789 if (dwarf2out_do_frame ())
5790 dwarf2out_reg_save ("", regno, offset + base_offset);
5792 else
5793 fprintf (file, "\t%s\t[%s+%d], %s\n",
5794 word_op, base_reg, offset, reg_names[regno]);
5796 offset += UNITS_PER_WORD;
5802 if (fmask)
5804 for (regno = 32; regno <= 63; regno++)
5806 if ((fmask & (1L << (regno - 32))) != 0)
5808 if (word_op[0] == 's')
5810 fprintf (file, "\t%s\t%s, [%s+%d]\n",
5811 word_op, reg_names[regno],
5812 base_reg, offset);
5813 if (dwarf2out_do_frame ())
5814 dwarf2out_reg_save ("", regno, offset + base_offset);
5816 else
5817 fprintf (file, "\t%s\t[%s+%d], %s\n",
5818 word_op, base_reg, offset, reg_names[regno]);
5820 offset += UNITS_PER_WORD;
5826 /* Set up the stack and frame (if desired) for the function. */
5828 void
5829 sparc_flat_output_function_prologue (file, size)
5830 FILE *file;
5831 int size;
5833 char *sp_str = reg_names[STACK_POINTER_REGNUM];
5834 unsigned long gmask = current_frame_info.gmask;
5836 sparc_output_scratch_registers (file);
5838 /* This is only for the human reader. */
5839 fprintf (file, "\t%s#PROLOGUE# 0\n", ASM_COMMENT_START);
5840 fprintf (file, "\t%s# vars= %ld, regs= %d/%d, args= %d, extra= %ld\n",
5841 ASM_COMMENT_START,
5842 current_frame_info.var_size,
5843 current_frame_info.gp_reg_size / 4,
5844 current_frame_info.fp_reg_size / 4,
5845 current_function_outgoing_args_size,
5846 current_frame_info.extra_size);
5848 size = SPARC_STACK_ALIGN (size);
5849 size = (! current_frame_info.initialized
5850 ? sparc_flat_compute_frame_size (size)
5851 : current_frame_info.total_size);
5853 /* These cases shouldn't happen. Catch them now. */
5854 if (size == 0 && (gmask || current_frame_info.fmask))
5855 abort ();
5857 /* Allocate our stack frame by decrementing %sp.
5858 At present, the only algorithm gdb can use to determine if this is a
5859 flat frame is if we always set %i7 if we set %sp. This can be optimized
5860 in the future by putting in some sort of debugging information that says
5861 this is a `flat' function. However, there is still the case of debugging
5862 code without such debugging information (including cases where most fns
5863 have such info, but there is one that doesn't). So, always do this now
5864 so we don't get a lot of code out there that gdb can't handle.
5865 If the frame pointer isn't needn't then that's ok - gdb won't be able to
5866 distinguish us from a non-flat function but there won't (and shouldn't)
5867 be any differences anyway. The return pc is saved (if necessary) right
5868 after %i7 so gdb won't have to look too far to find it. */
5869 if (size > 0)
5871 unsigned int reg_offset = current_frame_info.reg_offset;
5872 char *fp_str = reg_names[FRAME_POINTER_REGNUM];
5873 const char *t1_str = "%g1";
5875 /* Things get a little tricky if local variables take up more than ~4096
5876 bytes and outgoing arguments take up more than ~4096 bytes. When that
5877 happens, the register save area can't be accessed from either end of
5878 the frame. Handle this by decrementing %sp to the start of the gp
5879 register save area, save the regs, update %i7, and then set %sp to its
5880 final value. Given that we only have one scratch register to play
5881 with it is the cheapest solution, and it helps gdb out as it won't
5882 slow down recognition of flat functions.
5883 Don't change the order of insns emitted here without checking with
5884 the gdb folk first. */
5886 /* Is the entire register save area offsettable from %sp? */
5887 if (reg_offset < 4096 - 64 * UNITS_PER_WORD)
5889 if (size <= 4096)
5891 fprintf (file, "\tadd\t%s, %d, %s\n",
5892 sp_str, -size, sp_str);
5893 if (gmask & FRAME_POINTER_MASK)
5895 fprintf (file, "\tst\t%s, [%s+%d]\n",
5896 fp_str, sp_str, reg_offset);
5897 fprintf (file, "\tsub\t%s, %d, %s\t%s# set up frame pointer\n",
5898 sp_str, -size, fp_str, ASM_COMMENT_START);
5899 reg_offset += 4;
5902 else
5904 fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
5905 size, t1_str, sp_str, t1_str, sp_str);
5906 if (gmask & FRAME_POINTER_MASK)
5908 fprintf (file, "\tst\t%s, [%s+%d]\n",
5909 fp_str, sp_str, reg_offset);
5910 fprintf (file, "\tadd\t%s, %s, %s\t%s# set up frame pointer\n",
5911 sp_str, t1_str, fp_str, ASM_COMMENT_START);
5912 reg_offset += 4;
5915 if (dwarf2out_do_frame ())
5917 char *l = dwarf2out_cfi_label ();
5918 if (gmask & FRAME_POINTER_MASK)
5920 dwarf2out_reg_save (l, FRAME_POINTER_REGNUM,
5921 reg_offset - 4 - size);
5922 dwarf2out_def_cfa (l, FRAME_POINTER_REGNUM, 0);
5924 else
5925 dwarf2out_def_cfa (l, STACK_POINTER_REGNUM, size);
5927 if (gmask & RETURN_ADDR_MASK)
5929 fprintf (file, "\tst\t%s, [%s+%d]\n",
5930 reg_names[RETURN_ADDR_REGNUM], sp_str, reg_offset);
5931 if (dwarf2out_do_frame ())
5932 dwarf2out_return_save ("", reg_offset - size);
5933 reg_offset += 4;
5935 sparc_flat_save_restore (file, sp_str, reg_offset,
5936 gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
5937 current_frame_info.fmask,
5938 "st", "std", -size);
5940 else
5942 /* Subtract %sp in two steps, but make sure there is always a
5943 64 byte register save area, and %sp is properly aligned. */
5944 /* Amount to decrement %sp by, the first time. */
5945 unsigned int size1 = ((size - reg_offset + 64) + 15) & -16;
5946 /* Offset to register save area from %sp. */
5947 unsigned int offset = size1 - (size - reg_offset);
5949 if (size1 <= 4096)
5951 fprintf (file, "\tadd\t%s, %d, %s\n",
5952 sp_str, -size1, sp_str);
5953 if (gmask & FRAME_POINTER_MASK)
5955 fprintf (file, "\tst\t%s, [%s+%d]\n\tsub\t%s, %d, %s\t%s# set up frame pointer\n",
5956 fp_str, sp_str, offset, sp_str, -size1, fp_str,
5957 ASM_COMMENT_START);
5958 offset += 4;
5961 else
5963 fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
5964 size1, t1_str, sp_str, t1_str, sp_str);
5965 if (gmask & FRAME_POINTER_MASK)
5967 fprintf (file, "\tst\t%s, [%s+%d]\n\tadd\t%s, %s, %s\t%s# set up frame pointer\n",
5968 fp_str, sp_str, offset, sp_str, t1_str, fp_str,
5969 ASM_COMMENT_START);
5970 offset += 4;
5973 if (dwarf2out_do_frame ())
5975 char *l = dwarf2out_cfi_label ();
5976 if (gmask & FRAME_POINTER_MASK)
5978 dwarf2out_reg_save (l, FRAME_POINTER_REGNUM,
5979 offset - 4 - size1);
5980 dwarf2out_def_cfa (l, FRAME_POINTER_REGNUM, 0);
5982 else
5983 dwarf2out_def_cfa (l, STACK_POINTER_REGNUM, size1);
5985 if (gmask & RETURN_ADDR_MASK)
5987 fprintf (file, "\tst\t%s, [%s+%d]\n",
5988 reg_names[RETURN_ADDR_REGNUM], sp_str, offset);
5989 if (dwarf2out_do_frame ())
5990 /* offset - size1 == reg_offset - size
5991 if reg_offset were updated above like offset. */
5992 dwarf2out_return_save ("", offset - size1);
5993 offset += 4;
5995 sparc_flat_save_restore (file, sp_str, offset,
5996 gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
5997 current_frame_info.fmask,
5998 "st", "std", -size1);
5999 fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
6000 size - size1, t1_str, sp_str, t1_str, sp_str);
6001 if (dwarf2out_do_frame ())
6002 if (! (gmask & FRAME_POINTER_MASK))
6003 dwarf2out_def_cfa ("", STACK_POINTER_REGNUM, size);
6007 fprintf (file, "\t%s#PROLOGUE# 1\n", ASM_COMMENT_START);
6010 /* Do any necessary cleanup after a function to restore stack, frame,
6011 and regs. */
6013 void
6014 sparc_flat_output_function_epilogue (file, size)
6015 FILE *file;
6016 int size;
6018 rtx epilogue_delay = current_function_epilogue_delay_list;
6019 int noepilogue = FALSE;
6021 /* This is only for the human reader. */
6022 fprintf (file, "\t%s#EPILOGUE#\n", ASM_COMMENT_START);
6024 /* The epilogue does not depend on any registers, but the stack
6025 registers, so we assume that if we have 1 pending nop, it can be
6026 ignored, and 2 it must be filled (2 nops occur for integer
6027 multiply and divide). */
6029 size = SPARC_STACK_ALIGN (size);
6030 size = (!current_frame_info.initialized
6031 ? sparc_flat_compute_frame_size (size)
6032 : current_frame_info.total_size);
6034 if (size == 0 && epilogue_delay == 0)
6036 rtx insn = get_last_insn ();
6038 /* If the last insn was a BARRIER, we don't have to write any code
6039 because a jump (aka return) was put there. */
6040 if (GET_CODE (insn) == NOTE)
6041 insn = prev_nonnote_insn (insn);
6042 if (insn && GET_CODE (insn) == BARRIER)
6043 noepilogue = TRUE;
6046 if (!noepilogue)
6048 unsigned int reg_offset = current_frame_info.reg_offset;
6049 unsigned int size1;
6050 char *sp_str = reg_names[STACK_POINTER_REGNUM];
6051 char *fp_str = reg_names[FRAME_POINTER_REGNUM];
6052 const char *t1_str = "%g1";
6054 /* In the reload sequence, we don't need to fill the load delay
6055 slots for most of the loads, also see if we can fill the final
6056 delay slot if not otherwise filled by the reload sequence. */
6058 if (size > 4095)
6059 fprintf (file, "\tset\t%d, %s\n", size, t1_str);
6061 if (frame_pointer_needed)
6063 if (size > 4095)
6064 fprintf (file,"\tsub\t%s, %s, %s\t\t%s# sp not trusted here\n",
6065 fp_str, t1_str, sp_str, ASM_COMMENT_START);
6066 else
6067 fprintf (file,"\tsub\t%s, %d, %s\t\t%s# sp not trusted here\n",
6068 fp_str, size, sp_str, ASM_COMMENT_START);
6071 /* Is the entire register save area offsettable from %sp? */
6072 if (reg_offset < 4096 - 64 * UNITS_PER_WORD)
6074 size1 = 0;
6076 else
6078 /* Restore %sp in two steps, but make sure there is always a
6079 64 byte register save area, and %sp is properly aligned. */
6080 /* Amount to increment %sp by, the first time. */
6081 size1 = ((reg_offset - 64 - 16) + 15) & -16;
6082 /* Offset to register save area from %sp. */
6083 reg_offset = size1 - reg_offset;
6085 fprintf (file, "\tset\t%d, %s\n\tadd\t%s, %s, %s\n",
6086 size1, t1_str, sp_str, t1_str, sp_str);
6089 /* We must restore the frame pointer and return address reg first
6090 because they are treated specially by the prologue output code. */
6091 if (current_frame_info.gmask & FRAME_POINTER_MASK)
6093 fprintf (file, "\tld\t[%s+%d], %s\n",
6094 sp_str, reg_offset, fp_str);
6095 reg_offset += 4;
6097 if (current_frame_info.gmask & RETURN_ADDR_MASK)
6099 fprintf (file, "\tld\t[%s+%d], %s\n",
6100 sp_str, reg_offset, reg_names[RETURN_ADDR_REGNUM]);
6101 reg_offset += 4;
6104 /* Restore any remaining saved registers. */
6105 sparc_flat_save_restore (file, sp_str, reg_offset,
6106 current_frame_info.gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
6107 current_frame_info.fmask,
6108 "ld", "ldd", 0);
6110 /* If we had to increment %sp in two steps, record it so the second
6111 restoration in the epilogue finishes up. */
6112 if (size1 > 0)
6114 size -= size1;
6115 if (size > 4095)
6116 fprintf (file, "\tset\t%d, %s\n",
6117 size, t1_str);
6120 if (current_function_returns_struct)
6121 fprintf (file, "\tjmp\t%%o7+12\n");
6122 else
6123 fprintf (file, "\tretl\n");
6125 /* If the only register saved is the return address, we need a
6126 nop, unless we have an instruction to put into it. Otherwise
6127 we don't since reloading multiple registers doesn't reference
6128 the register being loaded. */
6130 if (epilogue_delay)
6132 if (size)
6133 abort ();
6134 final_scan_insn (XEXP (epilogue_delay, 0), file, 1, -2, 1);
6137 else if (size > 4095)
6138 fprintf (file, "\tadd\t%s, %s, %s\n", sp_str, t1_str, sp_str);
6140 else if (size > 0)
6141 fprintf (file, "\tadd\t%s, %d, %s\n", sp_str, size, sp_str);
6143 else
6144 fprintf (file, "\tnop\n");
6147 /* Reset state info for each function. */
6148 current_frame_info = zero_frame_info;
6150 sparc_output_deferred_case_vectors ();
6153 /* Define the number of delay slots needed for the function epilogue.
6155 On the sparc, we need a slot if either no stack has been allocated,
6156 or the only register saved is the return register. */
6159 sparc_flat_epilogue_delay_slots ()
6161 if (!current_frame_info.initialized)
6162 (void) sparc_flat_compute_frame_size (get_frame_size ());
6164 if (current_frame_info.total_size == 0)
6165 return 1;
6167 return 0;
6170 /* Return true is TRIAL is a valid insn for the epilogue delay slot.
6171 Any single length instruction which doesn't reference the stack or frame
6172 pointer is OK. */
6175 sparc_flat_eligible_for_epilogue_delay (trial, slot)
6176 rtx trial;
6177 int slot ATTRIBUTE_UNUSED;
6179 rtx pat = PATTERN (trial);
6181 if (get_attr_length (trial) != 1)
6182 return 0;
6184 /* If %g0 is live, there are lots of things we can't handle.
6185 Rather than trying to find them all now, let's punt and only
6186 optimize things as necessary. */
6187 if (TARGET_LIVE_G0)
6188 return 0;
6190 if (! reg_mentioned_p (stack_pointer_rtx, pat)
6191 && ! reg_mentioned_p (frame_pointer_rtx, pat))
6192 return 1;
6194 return 0;
6197 /* Adjust the cost of a scheduling dependency. Return the new cost of
6198 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
6200 static int
6201 supersparc_adjust_cost (insn, link, dep_insn, cost)
6202 rtx insn;
6203 rtx link;
6204 rtx dep_insn;
6205 int cost;
6207 enum attr_type insn_type;
6209 if (! recog_memoized (insn))
6210 return 0;
6212 insn_type = get_attr_type (insn);
6214 if (REG_NOTE_KIND (link) == 0)
6216 /* Data dependency; DEP_INSN writes a register that INSN reads some
6217 cycles later. */
6219 /* if a load, then the dependence must be on the memory address;
6220 add an extra "cycle". Note that the cost could be two cycles
6221 if the reg was written late in an instruction group; we ca not tell
6222 here. */
6223 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
6224 return cost + 3;
6226 /* Get the delay only if the address of the store is the dependence. */
6227 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
6229 rtx pat = PATTERN(insn);
6230 rtx dep_pat = PATTERN (dep_insn);
6232 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
6233 return cost; /* This should not happen! */
6235 /* The dependency between the two instructions was on the data that
6236 is being stored. Assume that this implies that the address of the
6237 store is not dependent. */
6238 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
6239 return cost;
6241 return cost + 3; /* An approximation. */
6244 /* A shift instruction cannot receive its data from an instruction
6245 in the same cycle; add a one cycle penalty. */
6246 if (insn_type == TYPE_SHIFT)
6247 return cost + 3; /* Split before cascade into shift. */
6249 else
6251 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
6252 INSN writes some cycles later. */
6254 /* These are only significant for the fpu unit; writing a fp reg before
6255 the fpu has finished with it stalls the processor. */
6257 /* Reusing an integer register causes no problems. */
6258 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
6259 return 0;
6262 return cost;
6265 static int
6266 hypersparc_adjust_cost (insn, link, dep_insn, cost)
6267 rtx insn;
6268 rtx link;
6269 rtx dep_insn;
6270 int cost;
6272 enum attr_type insn_type, dep_type;
6273 rtx pat = PATTERN(insn);
6274 rtx dep_pat = PATTERN (dep_insn);
6276 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
6277 return cost;
6279 insn_type = get_attr_type (insn);
6280 dep_type = get_attr_type (dep_insn);
6282 switch (REG_NOTE_KIND (link))
6284 case 0:
6285 /* Data dependency; DEP_INSN writes a register that INSN reads some
6286 cycles later. */
6288 switch (insn_type)
6290 case TYPE_STORE:
6291 case TYPE_FPSTORE:
6292 /* Get the delay iff the address of the store is the dependence. */
6293 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
6294 return cost;
6296 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
6297 return cost;
6298 return cost + 3;
6300 case TYPE_LOAD:
6301 case TYPE_SLOAD:
6302 case TYPE_FPLOAD:
6303 /* If a load, then the dependence must be on the memory address. If
6304 the addresses aren't equal, then it might be a false dependency */
6305 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
6307 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
6308 || GET_CODE (SET_DEST (dep_pat)) != MEM
6309 || GET_CODE (SET_SRC (pat)) != MEM
6310 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
6311 XEXP (SET_SRC (pat), 0)))
6312 return cost + 2;
6314 return cost + 8;
6316 break;
6318 case TYPE_BRANCH:
6319 /* Compare to branch latency is 0. There is no benefit from
6320 separating compare and branch. */
6321 if (dep_type == TYPE_COMPARE)
6322 return 0;
6323 /* Floating point compare to branch latency is less than
6324 compare to conditional move. */
6325 if (dep_type == TYPE_FPCMP)
6326 return cost - 1;
6327 break;
6328 default:
6329 break;
6331 break;
6333 case REG_DEP_ANTI:
6334 /* Anti-dependencies only penalize the fpu unit. */
6335 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
6336 return 0;
6337 break;
6339 default:
6340 break;
6343 return cost;
6346 static int
6347 ultrasparc_adjust_cost (insn, link, dep_insn, cost)
6348 rtx insn;
6349 rtx link;
6350 rtx dep_insn;
6351 int cost;
6353 enum attr_type insn_type, dep_type;
6354 rtx pat = PATTERN(insn);
6355 rtx dep_pat = PATTERN (dep_insn);
6357 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
6358 return cost;
6360 insn_type = get_attr_type (insn);
6361 dep_type = get_attr_type (dep_insn);
6363 /* Nothing issues in parallel with integer multiplies, so
6364 mark as zero cost since the scheduler can not do anything
6365 about it. */
6366 if (insn_type == TYPE_IMUL)
6367 return 0;
6369 #define SLOW_FP(dep_type) \
6370 (dep_type == TYPE_FPSQRT || dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)
6372 switch (REG_NOTE_KIND (link))
6374 case 0:
6375 /* Data dependency; DEP_INSN writes a register that INSN reads some
6376 cycles later. */
6378 if (dep_type == TYPE_CMOVE)
6380 /* Instructions that read the result of conditional moves cannot
6381 be in the same group or the following group. */
6382 return cost + 1;
6385 switch (insn_type)
6387 /* UltraSPARC can dual issue a store and an instruction setting
6388 the value stored, except for divide and square root. */
6389 case TYPE_FPSTORE:
6390 if (! SLOW_FP (dep_type))
6391 return 0;
6392 return cost;
6394 case TYPE_STORE:
6395 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
6396 return cost;
6398 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
6399 /* The dependency between the two instructions is on the data
6400 that is being stored. Assume that the address of the store
6401 is not also dependent. */
6402 return 0;
6403 return cost;
6405 case TYPE_LOAD:
6406 case TYPE_SLOAD:
6407 case TYPE_FPLOAD:
6408 /* A load does not return data until at least 11 cycles after
6409 a store to the same location. 3 cycles are accounted for
6410 in the load latency; add the other 8 here. */
6411 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
6413 /* If the addresses are not equal this may be a false
6414 dependency because pointer aliasing could not be
6415 determined. Add only 2 cycles in that case. 2 is
6416 an arbitrary compromise between 8, which would cause
6417 the scheduler to generate worse code elsewhere to
6418 compensate for a dependency which might not really
6419 exist, and 0. */
6420 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
6421 || GET_CODE (SET_SRC (pat)) != MEM
6422 || GET_CODE (SET_DEST (dep_pat)) != MEM
6423 || ! rtx_equal_p (XEXP (SET_SRC (pat), 0),
6424 XEXP (SET_DEST (dep_pat), 0)))
6425 return cost + 2;
6427 return cost + 8;
6429 return cost;
6431 case TYPE_BRANCH:
6432 /* Compare to branch latency is 0. There is no benefit from
6433 separating compare and branch. */
6434 if (dep_type == TYPE_COMPARE)
6435 return 0;
6436 /* Floating point compare to branch latency is less than
6437 compare to conditional move. */
6438 if (dep_type == TYPE_FPCMP)
6439 return cost - 1;
6440 return cost;
6442 case TYPE_FPCMOVE:
6443 /* FMOVR class instructions can not issue in the same cycle
6444 or the cycle after an instruction which writes any
6445 integer register. Model this as cost 2 for dependent
6446 instructions. */
6447 if ((dep_type == TYPE_IALU || dep_type == TYPE_UNARY
6448 || dep_type == TYPE_BINARY)
6449 && cost < 2)
6450 return 2;
6451 /* Otherwise check as for integer conditional moves. */
6453 case TYPE_CMOVE:
6454 /* Conditional moves involving integer registers wait until
6455 3 cycles after loads return data. The interlock applies
6456 to all loads, not just dependent loads, but that is hard
6457 to model. */
6458 if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)
6459 return cost + 3;
6460 return cost;
6462 default:
6463 break;
6465 break;
6467 case REG_DEP_ANTI:
6468 /* Divide and square root lock destination registers for full latency. */
6469 if (! SLOW_FP (dep_type))
6470 return 0;
6471 break;
6473 case REG_DEP_OUTPUT:
6474 /* IEU and FPU instruction that have the same destination
6475 register cannot be grouped together. */
6476 return cost + 1;
6478 default:
6479 break;
6482 /* Other costs not accounted for:
6483 - Single precision floating point loads lock the other half of
6484 the even/odd register pair.
6485 - Several hazards associated with ldd/std are ignored because these
6486 instructions are rarely generated for V9.
6487 - The floating point pipeline can not have both a single and double
6488 precision operation active at the same time. Format conversions
6489 and graphics instructions are given honorary double precision status.
6490 - call and jmpl are always the first instruction in a group. */
6492 return cost;
6494 #undef SLOW_FP
6498 sparc_adjust_cost(insn, link, dep, cost)
6499 rtx insn;
6500 rtx link;
6501 rtx dep;
6502 int cost;
6504 switch (sparc_cpu)
6506 case PROCESSOR_SUPERSPARC:
6507 cost = supersparc_adjust_cost (insn, link, dep, cost);
6508 break;
6509 case PROCESSOR_HYPERSPARC:
6510 case PROCESSOR_SPARCLITE86X:
6511 cost = hypersparc_adjust_cost (insn, link, dep, cost);
6512 break;
6513 case PROCESSOR_ULTRASPARC:
6514 cost = ultrasparc_adjust_cost (insn, link, dep, cost);
6515 break;
6516 default:
6517 break;
6519 return cost;
6522 /* This describes the state of the UltraSPARC pipeline during
6523 instruction scheduling. */
6525 #define TMASK(__x) ((unsigned)1 << ((int)(__x)))
6526 #define UMASK(__x) ((unsigned)1 << ((int)(__x)))
6528 enum ultra_code { NONE=0, /* no insn at all */
6529 IEU0, /* shifts and conditional moves */
6530 IEU1, /* condition code setting insns, calls+jumps */
6531 IEUN, /* all other single cycle ieu insns */
6532 LSU, /* loads and stores */
6533 CTI, /* branches */
6534 FPM, /* FPU pipeline 1, multiplies and divides */
6535 FPA, /* FPU pipeline 2, all other operations */
6536 SINGLE, /* single issue instructions */
6537 NUM_ULTRA_CODES };
6539 static const char *ultra_code_names[NUM_ULTRA_CODES] = {
6540 "NONE", "IEU0", "IEU1", "IEUN", "LSU", "CTI",
6541 "FPM", "FPA", "SINGLE" };
6543 struct ultrasparc_pipeline_state {
6544 /* The insns in this group. */
6545 rtx group[4];
6547 /* The code for each insn. */
6548 enum ultra_code codes[4];
6550 /* Which insns in this group have been committed by the
6551 scheduler. This is how we determine how many more
6552 can issue this cycle. */
6553 char commit[4];
6555 /* How many insns in this group. */
6556 char group_size;
6558 /* Mask of free slots still in this group. */
6559 char free_slot_mask;
6561 /* The slotter uses the following to determine what other
6562 insn types can still make their way into this group. */
6563 char contents [NUM_ULTRA_CODES];
6564 char num_ieu_insns;
6567 #define ULTRA_NUM_HIST 8
6568 static struct ultrasparc_pipeline_state ultra_pipe_hist[ULTRA_NUM_HIST];
6569 static int ultra_cur_hist;
6570 static int ultra_cycles_elapsed;
6572 #define ultra_pipe (ultra_pipe_hist[ultra_cur_hist])
6574 /* Given TYPE_MASK compute the ultra_code it has. */
6575 static enum ultra_code
6576 ultra_code_from_mask (type_mask)
6577 int type_mask;
6579 if (type_mask & (TMASK (TYPE_SHIFT) | TMASK (TYPE_CMOVE)))
6580 return IEU0;
6581 else if (type_mask & (TMASK (TYPE_COMPARE) |
6582 TMASK (TYPE_CALL) |
6583 TMASK (TYPE_UNCOND_BRANCH)))
6584 return IEU1;
6585 else if (type_mask & (TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
6586 TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY)))
6587 return IEUN;
6588 else if (type_mask & (TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
6589 TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
6590 TMASK (TYPE_FPSTORE)))
6591 return LSU;
6592 else if (type_mask & (TMASK (TYPE_FPMUL) | TMASK (TYPE_FPDIVS) |
6593 TMASK (TYPE_FPDIVD) | TMASK (TYPE_FPSQRT)))
6594 return FPM;
6595 else if (type_mask & (TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
6596 TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)))
6597 return FPA;
6598 else if (type_mask & TMASK (TYPE_BRANCH))
6599 return CTI;
6601 return SINGLE;
6604 /* Check INSN (a conditional move) and make sure that it's
6605 results are available at this cycle. Return 1 if the
6606 results are in fact ready. */
6607 static int
6608 ultra_cmove_results_ready_p (insn)
6609 rtx insn;
6611 struct ultrasparc_pipeline_state *up;
6612 int entry, slot;
6614 /* If this got dispatched in the previous
6615 group, the results are not ready. */
6616 entry = (ultra_cur_hist - 1) % (ULTRA_NUM_HIST - 1);
6617 up = &ultra_pipe_hist[entry];
6618 slot = 4;
6619 while (--slot >= 0)
6620 if (up->group[slot] == insn)
6621 return 0;
6623 return 1;
6626 /* Walk backwards in pipeline history looking for FPU
6627 operations which use a mode different than FPMODE and
6628 will create a stall if an insn using FPMODE were to be
6629 dispatched this cycle. */
6630 static int
6631 ultra_fpmode_conflict_exists (fpmode)
6632 enum machine_mode fpmode;
6634 int hist_ent;
6635 int hist_lim;
6637 hist_ent = (ultra_cur_hist - 1) % (ULTRA_NUM_HIST - 1);
6638 if (ultra_cycles_elapsed < 4)
6639 hist_lim = ultra_cycles_elapsed;
6640 else
6641 hist_lim = 4;
6642 while (hist_lim > 0)
6644 struct ultrasparc_pipeline_state *up = &ultra_pipe_hist[hist_ent];
6645 int slot = 4;
6647 while (--slot >= 0)
6649 rtx insn = up->group[slot];
6650 enum machine_mode this_mode;
6651 rtx pat;
6653 if (! insn
6654 || GET_CODE (insn) != INSN
6655 || (pat = PATTERN (insn)) == 0
6656 || GET_CODE (pat) != SET)
6657 continue;
6659 this_mode = GET_MODE (SET_DEST (pat));
6660 if ((this_mode != SFmode
6661 && this_mode != DFmode)
6662 || this_mode == fpmode)
6663 continue;
6665 /* If it is not FMOV, FABS, FNEG, FDIV, or FSQRT then
6666 we will get a stall. Loads and stores are independant
6667 of these rules. */
6668 if (GET_CODE (SET_SRC (pat)) != ABS
6669 && GET_CODE (SET_SRC (pat)) != NEG
6670 && ((TMASK (get_attr_type (insn)) &
6671 (TMASK (TYPE_FPDIVS) | TMASK (TYPE_FPDIVD) |
6672 TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPSQRT) |
6673 TMASK (TYPE_LOAD) | TMASK (TYPE_STORE))) == 0))
6674 return 1;
6676 hist_lim--;
6677 hist_ent = (hist_ent - 1) % (ULTRA_NUM_HIST - 1);
6680 /* No conflicts, safe to dispatch. */
6681 return 0;
6684 /* Find an instruction in LIST which has one of the
6685 type attributes enumerated in TYPE_MASK. START
6686 says where to begin the search.
6688 NOTE: This scheme depends upon the fact that we
6689 have less than 32 distinct type attributes. */
6691 static int ultra_types_avail;
6693 static rtx *
6694 ultra_find_type (type_mask, list, start)
6695 int type_mask;
6696 rtx *list;
6697 int start;
6699 int i;
6701 /* Short circuit if no such insn exists in the ready
6702 at the moment. */
6703 if ((type_mask & ultra_types_avail) == 0)
6704 return 0;
6706 for (i = start; i >= 0; i--)
6708 rtx insn = list[i];
6710 if (recog_memoized (insn) >= 0
6711 && (TMASK(get_attr_type (insn)) & type_mask))
6713 enum machine_mode fpmode = SFmode;
6714 rtx pat = 0;
6715 int slot;
6716 int check_depend = 0;
6717 int check_fpmode_conflict = 0;
6719 if (GET_CODE (insn) == INSN
6720 && (pat = PATTERN(insn)) != 0
6721 && GET_CODE (pat) == SET
6722 && !(type_mask & (TMASK (TYPE_STORE) |
6723 TMASK (TYPE_FPSTORE))))
6725 check_depend = 1;
6726 if (GET_MODE (SET_DEST (pat)) == SFmode
6727 || GET_MODE (SET_DEST (pat)) == DFmode)
6729 fpmode = GET_MODE (SET_DEST (pat));
6730 check_fpmode_conflict = 1;
6734 slot = 4;
6735 while(--slot >= 0)
6737 rtx slot_insn = ultra_pipe.group[slot];
6738 rtx slot_pat;
6740 /* Already issued, bad dependency, or FPU
6741 mode conflict. */
6742 if (slot_insn != 0
6743 && (slot_pat = PATTERN (slot_insn)) != 0
6744 && ((insn == slot_insn)
6745 || (check_depend == 1
6746 && GET_CODE (slot_insn) == INSN
6747 && GET_CODE (slot_pat) == SET
6748 && ((GET_CODE (SET_DEST (slot_pat)) == REG
6749 && GET_CODE (SET_SRC (pat)) == REG
6750 && REGNO (SET_DEST (slot_pat)) ==
6751 REGNO (SET_SRC (pat)))
6752 || (GET_CODE (SET_DEST (slot_pat)) == SUBREG
6753 && GET_CODE (SET_SRC (pat)) == SUBREG
6754 && REGNO (SUBREG_REG (SET_DEST (slot_pat))) ==
6755 REGNO (SUBREG_REG (SET_SRC (pat)))
6756 && SUBREG_WORD (SET_DEST (slot_pat)) ==
6757 SUBREG_WORD (SET_SRC (pat)))))
6758 || (check_fpmode_conflict == 1
6759 && GET_CODE (slot_insn) == INSN
6760 && GET_CODE (slot_pat) == SET
6761 && (GET_MODE (SET_DEST (slot_pat)) == SFmode
6762 || GET_MODE (SET_DEST (slot_pat)) == DFmode)
6763 && GET_MODE (SET_DEST (slot_pat)) != fpmode)))
6764 goto next;
6767 /* Check for peculiar result availability and dispatch
6768 interference situations. */
6769 if (pat != 0
6770 && ultra_cycles_elapsed > 0)
6772 rtx link;
6774 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6776 rtx link_insn = XEXP (link, 0);
6777 if (GET_CODE (link_insn) == INSN
6778 && recog_memoized (link_insn) >= 0
6779 && (TMASK (get_attr_type (link_insn)) &
6780 (TMASK (TYPE_CMOVE) | TMASK (TYPE_FPCMOVE)))
6781 && ! ultra_cmove_results_ready_p (link_insn))
6782 goto next;
6785 if (check_fpmode_conflict
6786 && ultra_fpmode_conflict_exists (fpmode))
6787 goto next;
6790 return &list[i];
6792 next:
6795 return 0;
6798 static void
6799 ultra_build_types_avail (ready, n_ready)
6800 rtx *ready;
6801 int n_ready;
6803 int i = n_ready - 1;
6805 ultra_types_avail = 0;
6806 while(i >= 0)
6808 rtx insn = ready[i];
6810 if (recog_memoized (insn) >= 0)
6811 ultra_types_avail |= TMASK (get_attr_type (insn));
6813 i -= 1;
6817 /* Place insn pointed to my IP into the pipeline.
6818 Make element THIS of READY be that insn if it
6819 is not already. TYPE indicates the pipeline class
6820 this insn falls into. */
6821 static void
6822 ultra_schedule_insn (ip, ready, this, type)
6823 rtx *ip;
6824 rtx *ready;
6825 int this;
6826 enum ultra_code type;
6828 int pipe_slot;
6829 char mask = ultra_pipe.free_slot_mask;
6831 /* Obtain free slot. */
6832 for (pipe_slot = 0; pipe_slot < 4; pipe_slot++)
6833 if ((mask & (1 << pipe_slot)) != 0)
6834 break;
6835 if (pipe_slot == 4)
6836 abort ();
6838 /* In it goes, and it hasn't been committed yet. */
6839 ultra_pipe.group[pipe_slot] = *ip;
6840 ultra_pipe.codes[pipe_slot] = type;
6841 ultra_pipe.contents[type] = 1;
6842 if (UMASK (type) &
6843 (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
6844 ultra_pipe.num_ieu_insns += 1;
6846 ultra_pipe.free_slot_mask = (mask & ~(1 << pipe_slot));
6847 ultra_pipe.group_size += 1;
6848 ultra_pipe.commit[pipe_slot] = 0;
6850 /* Update ready list. */
6851 if (ip != &ready[this])
6853 rtx temp = *ip;
6855 *ip = ready[this];
6856 ready[this] = temp;
6860 /* Advance to the next pipeline group. */
6861 static void
6862 ultra_flush_pipeline ()
6864 ultra_cur_hist = (ultra_cur_hist + 1) % (ULTRA_NUM_HIST - 1);
6865 ultra_cycles_elapsed += 1;
6866 bzero ((char *) &ultra_pipe, sizeof ultra_pipe);
6867 ultra_pipe.free_slot_mask = 0xf;
6870 /* Init our data structures for this current block. */
6871 void
6872 ultrasparc_sched_init (dump, sched_verbose)
6873 FILE *dump ATTRIBUTE_UNUSED;
6874 int sched_verbose ATTRIBUTE_UNUSED;
6876 bzero ((char *) ultra_pipe_hist, sizeof ultra_pipe_hist);
6877 ultra_cur_hist = 0;
6878 ultra_cycles_elapsed = 0;
6879 ultra_pipe.free_slot_mask = 0xf;
6882 /* INSN has been scheduled, update pipeline commit state
6883 and return how many instructions are still to be
6884 scheduled in this group. */
6886 ultrasparc_variable_issue (insn)
6887 rtx insn;
6889 struct ultrasparc_pipeline_state *up = &ultra_pipe;
6890 int i, left_to_fire;
6892 left_to_fire = 0;
6893 for (i = 0; i < 4; i++)
6895 if (up->group[i] == 0)
6896 continue;
6898 if (up->group[i] == insn)
6900 up->commit[i] = 1;
6902 else if (! up->commit[i])
6903 left_to_fire++;
6906 return left_to_fire;
6909 /* In actual_hazard_this_instance, we may have yanked some
6910 instructions from the ready list due to conflict cost
6911 adjustments. If so, and such an insn was in our pipeline
6912 group, remove it and update state. */
6913 static void
6914 ultra_rescan_pipeline_state (ready, n_ready)
6915 rtx *ready;
6916 int n_ready;
6918 struct ultrasparc_pipeline_state *up = &ultra_pipe;
6919 int i;
6921 for (i = 0; i < 4; i++)
6923 rtx insn = up->group[i];
6924 int j;
6926 if (! insn)
6927 continue;
6929 /* If it has been committed, then it was removed from
6930 the ready list because it was actually scheduled,
6931 and that is not the case we are searching for here. */
6932 if (up->commit[i] != 0)
6933 continue;
6935 for (j = n_ready - 1; j >= 0; j--)
6936 if (ready[j] == insn)
6937 break;
6939 /* If we didn't find it, toss it. */
6940 if (j < 0)
6942 enum ultra_code ucode = up->codes[i];
6944 up->group[i] = 0;
6945 up->codes[i] = NONE;
6946 up->contents[ucode] = 0;
6947 if (UMASK (ucode) &
6948 (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
6949 up->num_ieu_insns -= 1;
6951 up->free_slot_mask |= (1 << i);
6952 up->group_size -= 1;
6953 up->commit[i] = 0;
6958 void
6959 ultrasparc_sched_reorder (dump, sched_verbose, ready, n_ready)
6960 FILE *dump;
6961 int sched_verbose;
6962 rtx *ready;
6963 int n_ready;
6965 struct ultrasparc_pipeline_state *up = &ultra_pipe;
6966 int i, this_insn;
6968 if (sched_verbose)
6970 int n;
6972 fprintf (dump, "\n;;\tUltraSPARC Looking at [");
6973 for (n = n_ready - 1; n >= 0; n--)
6975 rtx insn = ready[n];
6976 enum ultra_code ucode;
6978 if (recog_memoized (insn) < 0)
6979 continue;
6980 ucode = ultra_code_from_mask (TMASK (get_attr_type (insn)));
6981 if (n != 0)
6982 fprintf (dump, "%s(%d) ",
6983 ultra_code_names[ucode],
6984 INSN_UID (insn));
6985 else
6986 fprintf (dump, "%s(%d)",
6987 ultra_code_names[ucode],
6988 INSN_UID (insn));
6990 fprintf (dump, "]\n");
6993 this_insn = n_ready - 1;
6995 /* Skip over junk we don't understand. */
6996 while ((this_insn >= 0)
6997 && recog_memoized (ready[this_insn]) < 0)
6998 this_insn--;
7000 ultra_build_types_avail (ready, this_insn + 1);
7002 while (this_insn >= 0) {
7003 int old_group_size = up->group_size;
7005 if (up->group_size != 0)
7007 int num_committed;
7009 num_committed = (up->commit[0] + up->commit[1] +
7010 up->commit[2] + up->commit[3]);
7011 /* If nothing has been commited from our group, or all of
7012 them have. Clear out the (current cycle's) pipeline
7013 state and start afresh. */
7014 if (num_committed == 0
7015 || num_committed == up->group_size)
7017 ultra_flush_pipeline ();
7018 up = &ultra_pipe;
7019 old_group_size = 0;
7021 else
7023 /* OK, some ready list insns got requeued and thus removed
7024 from the ready list. Account for this fact. */
7025 ultra_rescan_pipeline_state (ready, n_ready);
7027 /* Something "changed", make this look like a newly
7028 formed group so the code at the end of the loop
7029 knows that progress was in fact made. */
7030 if (up->group_size != old_group_size)
7031 old_group_size = 0;
7035 if (up->group_size == 0)
7037 /* If the pipeline is (still) empty and we have any single
7038 group insns, get them out now as this is a good time. */
7039 rtx *ip = ultra_find_type ((TMASK (TYPE_RETURN) | TMASK (TYPE_ADDRESS) |
7040 TMASK (TYPE_IMUL) | TMASK (TYPE_CMOVE) |
7041 TMASK (TYPE_MULTI) | TMASK (TYPE_MISC)),
7042 ready, this_insn);
7043 if (ip)
7045 ultra_schedule_insn (ip, ready, this_insn, SINGLE);
7046 break;
7049 /* If we are not in the process of emptying out the pipe, try to
7050 obtain an instruction which must be the first in it's group. */
7051 ip = ultra_find_type ((TMASK (TYPE_CALL) |
7052 TMASK (TYPE_CALL_NO_DELAY_SLOT) |
7053 TMASK (TYPE_UNCOND_BRANCH)),
7054 ready, this_insn);
7055 if (ip)
7057 ultra_schedule_insn (ip, ready, this_insn, IEU1);
7058 this_insn--;
7060 else if ((ip = ultra_find_type ((TMASK (TYPE_FPDIVS) |
7061 TMASK (TYPE_FPDIVD) |
7062 TMASK (TYPE_FPSQRT)),
7063 ready, this_insn)) != 0)
7065 ultra_schedule_insn (ip, ready, this_insn, FPM);
7066 this_insn--;
7070 /* Try to fill the integer pipeline. First, look for an IEU0 specific
7071 operation. We can't do more IEU operations if the first 3 slots are
7072 all full or we have dispatched two IEU insns already. */
7073 if ((up->free_slot_mask & 0x7) != 0
7074 && up->num_ieu_insns < 2
7075 && up->contents[IEU0] == 0
7076 && up->contents[IEUN] == 0)
7078 rtx *ip = ultra_find_type (TMASK(TYPE_SHIFT), ready, this_insn);
7079 if (ip)
7081 ultra_schedule_insn (ip, ready, this_insn, IEU0);
7082 this_insn--;
7086 /* If we can, try to find an IEU1 specific or an unnamed
7087 IEU instruction. */
7088 if ((up->free_slot_mask & 0x7) != 0
7089 && up->num_ieu_insns < 2)
7091 rtx *ip = ultra_find_type ((TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
7092 TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY) |
7093 (up->contents[IEU1] == 0 ? TMASK (TYPE_COMPARE) : 0)),
7094 ready, this_insn);
7095 if (ip)
7097 rtx insn = *ip;
7099 ultra_schedule_insn (ip, ready, this_insn,
7100 (!up->contents[IEU1]
7101 && get_attr_type (insn) == TYPE_COMPARE)
7102 ? IEU1 : IEUN);
7103 this_insn--;
7107 /* If only one IEU insn has been found, try to find another unnamed
7108 IEU operation or an IEU1 specific one. */
7109 if ((up->free_slot_mask & 0x7) != 0
7110 && up->num_ieu_insns < 2)
7112 rtx *ip;
7113 int tmask = (TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
7114 TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY));
7116 if (!up->contents[IEU1])
7117 tmask |= TMASK (TYPE_COMPARE);
7118 ip = ultra_find_type (tmask, ready, this_insn);
7119 if (ip)
7121 rtx insn = *ip;
7123 ultra_schedule_insn (ip, ready, this_insn,
7124 (!up->contents[IEU1]
7125 && get_attr_type (insn) == TYPE_COMPARE)
7126 ? IEU1 : IEUN);
7127 this_insn--;
7131 /* Try for a load or store, but such an insn can only be issued
7132 if it is within' one of the first 3 slots. */
7133 if ((up->free_slot_mask & 0x7) != 0
7134 && up->contents[LSU] == 0)
7136 rtx *ip = ultra_find_type ((TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
7137 TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
7138 TMASK (TYPE_FPSTORE)), ready, this_insn);
7139 if (ip)
7141 ultra_schedule_insn (ip, ready, this_insn, LSU);
7142 this_insn--;
7146 /* Now find FPU operations, first FPM class. But not divisions or
7147 square-roots because those will break the group up. Unlike all
7148 the previous types, these can go in any slot. */
7149 if (up->free_slot_mask != 0
7150 && up->contents[FPM] == 0)
7152 rtx *ip = ultra_find_type (TMASK (TYPE_FPMUL), ready, this_insn);
7153 if (ip)
7155 ultra_schedule_insn (ip, ready, this_insn, FPM);
7156 this_insn--;
7160 /* Continue on with FPA class if we have not filled the group already. */
7161 if (up->free_slot_mask != 0
7162 && up->contents[FPA] == 0)
7164 rtx *ip = ultra_find_type ((TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
7165 TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)),
7166 ready, this_insn);
7167 if (ip)
7169 ultra_schedule_insn (ip, ready, this_insn, FPA);
7170 this_insn--;
7174 /* Finally, maybe stick a branch in here. */
7175 if (up->free_slot_mask != 0
7176 && up->contents[CTI] == 0)
7178 rtx *ip = ultra_find_type (TMASK (TYPE_BRANCH), ready, this_insn);
7180 /* Try to slip in a branch only if it is one of the
7181 next 2 in the ready list. */
7182 if (ip && ((&ready[this_insn] - ip) < 2))
7184 ultra_schedule_insn (ip, ready, this_insn, CTI);
7185 this_insn--;
7189 up->group_size = 0;
7190 for (i = 0; i < 4; i++)
7191 if ((up->free_slot_mask & (1 << i)) == 0)
7192 up->group_size++;
7194 /* See if we made any progress... */
7195 if (old_group_size != up->group_size)
7196 break;
7198 /* Clean out the (current cycle's) pipeline state
7199 and try once more. If we placed no instructions
7200 into the pipeline at all, it means a real hard
7201 conflict exists with some earlier issued instruction
7202 so we must advance to the next cycle to clear it up. */
7203 if (up->group_size == 0)
7205 ultra_flush_pipeline ();
7206 up = &ultra_pipe;
7208 else
7210 bzero ((char *) &ultra_pipe, sizeof ultra_pipe);
7211 ultra_pipe.free_slot_mask = 0xf;
7215 if (sched_verbose)
7217 int n, gsize;
7219 fprintf (dump, ";;\tUltraSPARC Launched [");
7220 gsize = up->group_size;
7221 for (n = 0; n < 4; n++)
7223 rtx insn = up->group[n];
7225 if (! insn)
7226 continue;
7228 gsize -= 1;
7229 if (gsize != 0)
7230 fprintf (dump, "%s(%d) ",
7231 ultra_code_names[up->codes[n]],
7232 INSN_UID (insn));
7233 else
7234 fprintf (dump, "%s(%d)",
7235 ultra_code_names[up->codes[n]],
7236 INSN_UID (insn));
7238 fprintf (dump, "]\n");
7242 int
7243 sparc_issue_rate ()
7245 switch (sparc_cpu)
7247 default:
7248 return 1;
7249 case PROCESSOR_V9:
7250 /* Assume V9 processors are capable of at least dual-issue. */
7251 return 2;
7252 case PROCESSOR_SUPERSPARC:
7253 return 3;
7254 case PROCESSOR_HYPERSPARC:
7255 case PROCESSOR_SPARCLITE86X:
7256 return 2;
7257 case PROCESSOR_ULTRASPARC:
7258 return 4;
7262 static int
7263 set_extends(x, insn)
7264 rtx x, insn;
7266 register rtx pat = PATTERN (insn);
7268 switch (GET_CODE (SET_SRC (pat)))
7270 /* Load and some shift instructions zero extend. */
7271 case MEM:
7272 case ZERO_EXTEND:
7273 /* sethi clears the high bits */
7274 case HIGH:
7275 /* LO_SUM is used with sethi. sethi cleared the high
7276 bits and the values used with lo_sum are positive */
7277 case LO_SUM:
7278 /* Store flag stores 0 or 1 */
7279 case LT: case LTU:
7280 case GT: case GTU:
7281 case LE: case LEU:
7282 case GE: case GEU:
7283 case EQ:
7284 case NE:
7285 return 1;
7286 case AND:
7288 rtx op1 = XEXP (SET_SRC (pat), 1);
7289 if (GET_CODE (op1) == CONST_INT)
7290 return INTVAL (op1) >= 0;
7291 if (GET_CODE (XEXP (SET_SRC (pat), 0)) == REG
7292 && sparc_check_64 (XEXP (SET_SRC (pat), 0), insn) == 1)
7293 return 1;
7294 if (GET_CODE (op1) == REG
7295 && sparc_check_64 ((op1), insn) == 1)
7296 return 1;
7298 case ASHIFT:
7299 case LSHIFTRT:
7300 return GET_MODE (SET_SRC (pat)) == SImode;
7301 /* Positive integers leave the high bits zero. */
7302 case CONST_DOUBLE:
7303 return ! (CONST_DOUBLE_LOW (x) & 0x80000000);
7304 case CONST_INT:
7305 return ! (INTVAL (x) & 0x80000000);
7306 case ASHIFTRT:
7307 case SIGN_EXTEND:
7308 return - (GET_MODE (SET_SRC (pat)) == SImode);
7309 default:
7310 return 0;
7314 /* We _ought_ to have only one kind per function, but... */
7315 static rtx sparc_addr_diff_list;
7316 static rtx sparc_addr_list;
7318 void
7319 sparc_defer_case_vector (lab, vec, diff)
7320 rtx lab, vec;
7321 int diff;
7323 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
7324 if (diff)
7325 sparc_addr_diff_list
7326 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
7327 else
7328 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
7331 static void
7332 sparc_output_addr_vec (vec)
7333 rtx vec;
7335 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7336 int idx, vlen = XVECLEN (body, 0);
7338 #ifdef ASM_OUTPUT_ADDR_VEC_START
7339 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7340 #endif
7342 #ifdef ASM_OUTPUT_CASE_LABEL
7343 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7344 NEXT_INSN (lab));
7345 #else
7346 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7347 #endif
7349 for (idx = 0; idx < vlen; idx++)
7351 ASM_OUTPUT_ADDR_VEC_ELT
7352 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
7355 #ifdef ASM_OUTPUT_ADDR_VEC_END
7356 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7357 #endif
7360 static void
7361 sparc_output_addr_diff_vec (vec)
7362 rtx vec;
7364 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7365 rtx base = XEXP (XEXP (body, 0), 0);
7366 int idx, vlen = XVECLEN (body, 1);
7368 #ifdef ASM_OUTPUT_ADDR_VEC_START
7369 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7370 #endif
7372 #ifdef ASM_OUTPUT_CASE_LABEL
7373 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7374 NEXT_INSN (lab));
7375 #else
7376 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7377 #endif
7379 for (idx = 0; idx < vlen; idx++)
7381 ASM_OUTPUT_ADDR_DIFF_ELT
7382 (asm_out_file,
7383 body,
7384 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
7385 CODE_LABEL_NUMBER (base));
7388 #ifdef ASM_OUTPUT_ADDR_VEC_END
7389 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7390 #endif
7393 static void
7394 sparc_output_deferred_case_vectors ()
7396 rtx t;
7397 int align;
7399 if (sparc_addr_list == NULL_RTX
7400 && sparc_addr_diff_list == NULL_RTX)
7401 return;
7403 /* Align to cache line in the function's code section. */
7404 function_section (current_function_decl);
7406 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
7407 if (align > 0)
7408 ASM_OUTPUT_ALIGN (asm_out_file, align);
7410 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
7411 sparc_output_addr_vec (XEXP (t, 0));
7412 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
7413 sparc_output_addr_diff_vec (XEXP (t, 0));
7415 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
7418 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
7419 unknown. Return 1 if the high bits are zero, -1 if the register is
7420 sign extended. */
7422 sparc_check_64 (x, insn)
7423 rtx x, insn;
7425 /* If a register is set only once it is safe to ignore insns this
7426 code does not know how to handle. The loop will either recognize
7427 the single set and return the correct value or fail to recognize
7428 it and return 0. */
7429 int set_once = 0;
7431 if (GET_CODE (x) == REG
7432 && flag_expensive_optimizations
7433 && REG_N_SETS (REGNO (x)) == 1)
7434 set_once = 1;
7436 if (insn == 0)
7438 if (set_once)
7439 insn = get_last_insn_anywhere ();
7440 else
7441 return 0;
7444 while ((insn = PREV_INSN (insn)))
7446 switch (GET_CODE (insn))
7448 case JUMP_INSN:
7449 case NOTE:
7450 break;
7451 case CODE_LABEL:
7452 case CALL_INSN:
7453 default:
7454 if (! set_once)
7455 return 0;
7456 break;
7457 case INSN:
7459 rtx pat = PATTERN (insn);
7460 if (GET_CODE (pat) != SET)
7461 return 0;
7462 if (rtx_equal_p (x, SET_DEST (pat)))
7463 return set_extends (x, insn);
7464 if (reg_overlap_mentioned_p (SET_DEST (pat), x))
7465 return 0;
7469 return 0;
7472 char *
7473 sparc_v8plus_shift (operands, insn, opcode)
7474 rtx *operands;
7475 rtx insn;
7476 char *opcode;
7478 static char asm_code[60];
7480 if (GET_CODE (operands[3]) == SCRATCH)
7481 operands[3] = operands[0];
7482 if (GET_CODE (operands[1]) == CONST_INT)
7484 output_asm_insn ("mov %1,%3", operands);
7486 else
7488 output_asm_insn ("sllx %H1,32,%3", operands);
7489 if (sparc_check_64 (operands[1], insn) <= 0)
7490 output_asm_insn ("srl %L1,0,%L1", operands);
7491 output_asm_insn ("or %L1,%3,%3", operands);
7494 strcpy(asm_code, opcode);
7495 if (which_alternative != 2)
7496 return strcat (asm_code, " %0,%2,%L0\n\tsrlx %L0,32,%H0");
7497 else
7498 return strcat (asm_code, " %3,%2,%3\n\tsrlx %3,32,%H0\n\tmov %3,%L0");
7502 /* Return 1 if DEST and SRC reference only global and in registers. */
7505 sparc_return_peephole_ok (dest, src)
7506 rtx dest, src;
7508 if (! TARGET_V9)
7509 return 0;
7510 if (current_function_uses_only_leaf_regs)
7511 return 0;
7512 if (GET_CODE (src) != CONST_INT
7513 && (GET_CODE (src) != REG || ! IN_OR_GLOBAL_P (src)))
7514 return 0;
7515 return IN_OR_GLOBAL_P (dest);
7518 /* Output assembler code to FILE to increment profiler label # LABELNO
7519 for profiling a function entry.
7521 32 bit sparc uses %g2 as the STATIC_CHAIN_REGNUM which gets clobbered
7522 during profiling so we need to save/restore it around the call to mcount.
7523 We're guaranteed that a save has just been done, and we use the space
7524 allocated for intreg/fpreg value passing. */
7526 void
7527 sparc_function_profiler (file, labelno)
7528 FILE *file;
7529 int labelno;
7531 char buf[32];
7532 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
7534 if (! TARGET_ARCH64)
7535 fputs ("\tst\t%g2,[%fp-4]\n", file);
7537 fputs ("\tsethi\t%hi(", file);
7538 assemble_name (file, buf);
7539 fputs ("),%o0\n", file);
7541 fputs ("\tcall\t", file);
7542 assemble_name (file, MCOUNT_FUNCTION);
7543 putc ('\n', file);
7545 fputs ("\t or\t%o0,%lo(", file);
7546 assemble_name (file, buf);
7547 fputs ("),%o0\n", file);
7549 if (! TARGET_ARCH64)
7550 fputs ("\tld\t[%fp-4],%g2\n", file);
7554 /* The following macro shall output assembler code to FILE
7555 to initialize basic-block profiling.
7557 If profile_block_flag == 2
7559 Output code to call the subroutine `__bb_init_trace_func'
7560 and pass two parameters to it. The first parameter is
7561 the address of a block allocated in the object module.
7562 The second parameter is the number of the first basic block
7563 of the function.
7565 The name of the block is a local symbol made with this statement:
7567 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
7569 Of course, since you are writing the definition of
7570 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
7571 can take a short cut in the definition of this macro and use the
7572 name that you know will result.
7574 The number of the first basic block of the function is
7575 passed to the macro in BLOCK_OR_LABEL.
7577 If described in a virtual assembler language the code to be
7578 output looks like:
7580 parameter1 <- LPBX0
7581 parameter2 <- BLOCK_OR_LABEL
7582 call __bb_init_trace_func
7584 else if profile_block_flag != 0
7586 Output code to call the subroutine `__bb_init_func'
7587 and pass one single parameter to it, which is the same
7588 as the first parameter to `__bb_init_trace_func'.
7590 The first word of this parameter is a flag which will be nonzero if
7591 the object module has already been initialized. So test this word
7592 first, and do not call `__bb_init_func' if the flag is nonzero.
7593 Note: When profile_block_flag == 2 the test need not be done
7594 but `__bb_init_trace_func' *must* be called.
7596 BLOCK_OR_LABEL may be used to generate a label number as a
7597 branch destination in case `__bb_init_func' will not be called.
7599 If described in a virtual assembler language the code to be
7600 output looks like:
7602 cmp (LPBX0),0
7603 jne local_label
7604 parameter1 <- LPBX0
7605 call __bb_init_func
7606 local_label:
7610 void
7611 sparc_function_block_profiler(file, block_or_label)
7612 FILE *file;
7613 int block_or_label;
7615 char LPBX[32];
7616 ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 0);
7618 if (profile_block_flag == 2)
7620 fputs ("\tsethi\t%hi(", file);
7621 assemble_name (file, LPBX);
7622 fputs ("),%o0\n", file);
7624 fprintf (file, "\tsethi\t%%hi(%d),%%o1\n", block_or_label);
7626 fputs ("\tor\t%o0,%lo(", file);
7627 assemble_name (file, LPBX);
7628 fputs ("),%o0\n", file);
7630 fprintf (file, "\tcall\t%s__bb_init_trace_func\n", user_label_prefix);
7632 fprintf (file, "\t or\t%%o1,%%lo(%d),%%o1\n", block_or_label);
7634 else if (profile_block_flag != 0)
7636 char LPBY[32];
7637 ASM_GENERATE_INTERNAL_LABEL (LPBY, "LPBY", block_or_label);
7639 fputs ("\tsethi\t%hi(", file);
7640 assemble_name (file, LPBX);
7641 fputs ("),%o0\n", file);
7643 fputs ("\tld\t[%lo(", file);
7644 assemble_name (file, LPBX);
7645 fputs (")+%o0],%o1\n", file);
7647 fputs ("\ttst\t%o1\n", file);
7649 if (TARGET_V9)
7651 fputs ("\tbne,pn\t%icc,", file);
7652 assemble_name (file, LPBY);
7653 putc ('\n', file);
7655 else
7657 fputs ("\tbne\t", file);
7658 assemble_name (file, LPBY);
7659 putc ('\n', file);
7662 fputs ("\t or\t%o0,%lo(", file);
7663 assemble_name (file, LPBX);
7664 fputs ("),%o0\n", file);
7666 fprintf (file, "\tcall\t%s__bb_init_func\n\t nop\n", user_label_prefix);
7668 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBY", block_or_label);
7672 /* The following macro shall output assembler code to FILE
7673 to increment a counter associated with basic block number BLOCKNO.
7675 If profile_block_flag == 2
7677 Output code to initialize the global structure `__bb' and
7678 call the function `__bb_trace_func' which will increment the
7679 counter.
7681 `__bb' consists of two words. In the first word the number
7682 of the basic block has to be stored. In the second word
7683 the address of a block allocated in the object module
7684 has to be stored.
7686 The basic block number is given by BLOCKNO.
7688 The address of the block is given by the label created with
7690 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
7692 by FUNCTION_BLOCK_PROFILER.
7694 Of course, since you are writing the definition of
7695 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
7696 can take a short cut in the definition of this macro and use the
7697 name that you know will result.
7699 If described in a virtual assembler language the code to be
7700 output looks like:
7702 move BLOCKNO -> (__bb)
7703 move LPBX0 -> (__bb+4)
7704 call __bb_trace_func
7706 Note that function `__bb_trace_func' must not change the
7707 machine state, especially the flag register. To grant
7708 this, you must output code to save and restore registers
7709 either in this macro or in the macros MACHINE_STATE_SAVE
7710 and MACHINE_STATE_RESTORE. The last two macros will be
7711 used in the function `__bb_trace_func', so you must make
7712 sure that the function prologue does not change any
7713 register prior to saving it with MACHINE_STATE_SAVE.
7715 else if profile_block_flag != 0
7717 Output code to increment the counter directly.
7718 Basic blocks are numbered separately from zero within each
7719 compiled object module. The count associated with block number
7720 BLOCKNO is at index BLOCKNO in an array of words; the name of
7721 this array is a local symbol made with this statement:
7723 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
7725 Of course, since you are writing the definition of
7726 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
7727 can take a short cut in the definition of this macro and use the
7728 name that you know will result.
7730 If described in a virtual assembler language, the code to be
7731 output looks like:
7733 inc (LPBX2+4*BLOCKNO)
7737 void
7738 sparc_block_profiler(file, blockno)
7739 FILE *file;
7740 int blockno;
7742 char LPBX[32];
7744 if (profile_block_flag == 2)
7746 ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 0);
7748 fprintf (file, "\tsethi\t%%hi(%s__bb),%%g1\n", user_label_prefix);
7749 fprintf (file, "\tsethi\t%%hi(%d),%%g2\n", blockno);
7750 fprintf (file, "\tor\t%%g1,%%lo(%s__bb),%%g1\n", user_label_prefix);
7751 fprintf (file, "\tor\t%%g2,%%lo(%d),%%g2\n", blockno);
7753 fputs ("\tst\t%g2,[%g1]\n", file);
7755 fputs ("\tsethi\t%hi(", file);
7756 assemble_name (file, LPBX);
7757 fputs ("),%g2\n", file);
7759 fputs ("\tor\t%o2,%lo(", file);
7760 assemble_name (file, LPBX);
7761 fputs ("),%g2\n", file);
7763 fputs ("\tst\t%g2,[%g1+4]\n", file);
7764 fputs ("\tmov\t%o7,%g2\n", file);
7766 fprintf (file, "\tcall\t%s__bb_trace_func\n\t nop\n", user_label_prefix);
7768 fputs ("\tmov\t%g2,%o7\n", file);
7770 else if (profile_block_flag != 0)
7772 ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 2);
7774 fputs ("\tsethi\t%hi(", file);
7775 assemble_name (file, LPBX);
7776 fprintf (file, "+%d),%%g1\n", blockno*4);
7778 fputs ("\tld\t[%g1+%lo(", file);
7779 assemble_name (file, LPBX);
7780 fprintf (file, "+%d)],%%g2\n", blockno*4);
7782 fputs ("\tadd\t%g2,1,%g2\n", file);
7784 fputs ("\tst\t%g2,[%g1+%lo(", file);
7785 assemble_name (file, LPBX);
7786 fprintf (file, "+%d)]\n", blockno*4);
7790 /* The following macro shall output assembler code to FILE
7791 to indicate a return from function during basic-block profiling.
7793 If profile_block_flag == 2:
7795 Output assembler code to call function `__bb_trace_ret'.
7797 Note that function `__bb_trace_ret' must not change the
7798 machine state, especially the flag register. To grant
7799 this, you must output code to save and restore registers
7800 either in this macro or in the macros MACHINE_STATE_SAVE_RET
7801 and MACHINE_STATE_RESTORE_RET. The last two macros will be
7802 used in the function `__bb_trace_ret', so you must make
7803 sure that the function prologue does not change any
7804 register prior to saving it with MACHINE_STATE_SAVE_RET.
7806 else if profile_block_flag != 0:
7808 The macro will not be used, so it need not distinguish
7809 these cases.
7812 void
7813 sparc_function_block_profiler_exit(file)
7814 FILE *file;
7816 if (profile_block_flag == 2)
7817 fprintf (file, "\tcall\t%s__bb_trace_ret\n\t nop\n", user_label_prefix);
7818 else
7819 abort ();