* configure.in: Add check for GAS subsection -1 support.
[official-gcc.git] / gcc / config / sparc / sparc.c
blob6e7d27af75cbcef903eb06b5e62a16dc55eeaba6
1 /* Subroutines for insn-output.c for Sun SPARC.
2 Copyright (C) 1987, 88, 89, 92-97, 1998 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64 bit SPARC V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
7 This file is part of GNU CC.
9 GNU CC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GNU CC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "tree.h"
27 #include "rtl.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "toplev.h"
41 /* 1 if the caller has placed an "unimp" insn immediately after the call.
42 This is used in v8 code when calling a function that returns a structure.
43 v9 doesn't have this. Be careful to have this test be the same as that
44 used on the call. */
46 #define SKIP_CALLERS_UNIMP_P \
47 (!TARGET_ARCH64 && current_function_returns_struct \
48 && ! integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))) \
49 && (TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl))) \
50 == INTEGER_CST))
52 /* Global variables for machine-dependent things. */
54 /* Size of frame. Need to know this to emit return insns from leaf procedures.
55 ACTUAL_FSIZE is set by compute_frame_size() which is called during the
56 reload pass. This is important as the value is later used in insn
57 scheduling (to see what can go in a delay slot).
58 APPARENT_FSIZE is the size of the stack less the register save area and less
59 the outgoing argument area. It is used when saving call preserved regs. */
60 static int apparent_fsize;
61 static int actual_fsize;
63 /* Save the operands last given to a compare for use when we
64 generate a scc or bcc insn. */
66 rtx sparc_compare_op0, sparc_compare_op1;
68 /* We may need an epilogue if we spill too many registers.
69 If this is non-zero, then we branch here for the epilogue. */
70 static rtx leaf_label;
72 #ifdef LEAF_REGISTERS
74 /* Vector to say how input registers are mapped to output
75 registers. FRAME_POINTER_REGNUM cannot be remapped by
76 this function to eliminate it. You must use -fomit-frame-pointer
77 to get that. */
78 char leaf_reg_remap[] =
79 { 0, 1, 2, 3, 4, 5, 6, 7,
80 -1, -1, -1, -1, -1, -1, 14, -1,
81 -1, -1, -1, -1, -1, -1, -1, -1,
82 8, 9, 10, 11, 12, 13, -1, 15,
84 32, 33, 34, 35, 36, 37, 38, 39,
85 40, 41, 42, 43, 44, 45, 46, 47,
86 48, 49, 50, 51, 52, 53, 54, 55,
87 56, 57, 58, 59, 60, 61, 62, 63,
88 64, 65, 66, 67, 68, 69, 70, 71,
89 72, 73, 74, 75, 76, 77, 78, 79,
90 80, 81, 82, 83, 84, 85, 86, 87,
91 88, 89, 90, 91, 92, 93, 94, 95,
92 96, 97, 98, 99, 100};
94 #endif
96 /* Name of where we pretend to think the frame pointer points.
97 Normally, this is "%fp", but if we are in a leaf procedure,
98 this is "%sp+something". We record "something" separately as it may be
99 too big for reg+constant addressing. */
101 static char *frame_base_name;
102 static int frame_base_offset;
104 static rtx pic_setup_code PROTO((void));
105 static void sparc_init_modes PROTO((void));
106 static int save_regs PROTO((FILE *, int, int, char *,
107 int, int, int));
108 static int restore_regs PROTO((FILE *, int, int, char *, int, int));
109 static void build_big_number PROTO((FILE *, int, char *));
110 static int function_arg_slotno PROTO((const CUMULATIVE_ARGS *,
111 enum machine_mode, tree, int, int,
112 int *, int *));
113 static void sparc_output_addr_vec PROTO((rtx));
114 static void sparc_output_addr_diff_vec PROTO((rtx));
115 static void sparc_output_deferred_case_vectors PROTO((void));
118 #ifdef DWARF2_DEBUGGING_INFO
119 extern char *dwarf2out_cfi_label ();
120 #endif
122 /* Option handling. */
124 /* Code model option as passed by user. */
125 char *sparc_cmodel_string;
126 /* Parsed value. */
127 enum cmodel sparc_cmodel;
129 /* Record alignment options as passed by user. */
130 char *sparc_align_loops_string;
131 char *sparc_align_jumps_string;
132 char *sparc_align_funcs_string;
134 /* Parsed values, as a power of two. */
135 int sparc_align_loops;
136 int sparc_align_jumps;
137 int sparc_align_funcs;
139 struct sparc_cpu_select sparc_select[] =
141 /* switch name, tune arch */
142 { (char *)0, "default", 1, 1 },
143 { (char *)0, "-mcpu=", 1, 1 },
144 { (char *)0, "-mtune=", 1, 0 },
145 { 0, 0, 0, 0 }
148 /* CPU type. This is set from TARGET_CPU_DEFAULT and -m{cpu,tune}=xxx. */
149 enum processor_type sparc_cpu;
151 /* Validate and override various options, and do some machine dependent
152 initialization. */
154 void
155 sparc_override_options ()
157 static struct code_model {
158 char *name;
159 int value;
160 } cmodels[] = {
161 { "32", CM_32 },
162 { "medlow", CM_MEDLOW },
163 { "medmid", CM_MEDMID },
164 { "medany", CM_MEDANY },
165 { "embmedany", CM_EMBMEDANY },
166 { 0, 0 }
168 struct code_model *cmodel;
169 /* Map TARGET_CPU_DEFAULT to value for -m{arch,tune}=. */
170 static struct cpu_default {
171 int cpu;
172 char *name;
173 } cpu_default[] = {
174 /* There must be one entry here for each TARGET_CPU value. */
175 { TARGET_CPU_sparc, "cypress" },
176 { TARGET_CPU_sparclet, "tsc701" },
177 { TARGET_CPU_sparclite, "f930" },
178 { TARGET_CPU_v8, "v8" },
179 { TARGET_CPU_supersparc, "supersparc" },
180 { TARGET_CPU_v9, "v9" },
181 { TARGET_CPU_ultrasparc, "ultrasparc" },
182 { 0, 0 }
184 struct cpu_default *def;
185 /* Table of values for -m{cpu,tune}=. */
186 static struct cpu_table {
187 char *name;
188 enum processor_type processor;
189 int disable;
190 int enable;
191 } cpu_table[] = {
192 { "v7", PROCESSOR_V7, MASK_ISA, 0 },
193 { "cypress", PROCESSOR_CYPRESS, MASK_ISA, 0 },
194 { "v8", PROCESSOR_V8, MASK_ISA, MASK_V8 },
195 /* TI TMS390Z55 supersparc */
196 { "supersparc", PROCESSOR_SUPERSPARC, MASK_ISA, MASK_V8 },
197 { "sparclite", PROCESSOR_SPARCLITE, MASK_ISA, MASK_SPARCLITE },
198 /* The Fujitsu MB86930 is the original sparclite chip, with no fpu.
199 The Fujitsu MB86934 is the recent sparclite chip, with an fpu. */
200 { "f930", PROCESSOR_F930, MASK_ISA|MASK_FPU, MASK_SPARCLITE },
201 { "f934", PROCESSOR_F934, MASK_ISA, MASK_SPARCLITE|MASK_FPU },
202 { "sparclet", PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
203 /* TEMIC sparclet */
204 { "tsc701", PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
205 { "v9", PROCESSOR_V9, MASK_ISA, MASK_V9 },
206 /* TI ultrasparc */
207 { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9 },
208 { 0, 0, 0, 0 }
210 struct cpu_table *cpu;
211 struct sparc_cpu_select *sel;
212 int fpu;
214 #ifndef SPARC_BI_ARCH
215 /* Check for unsupported architecture size. */
216 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
218 error ("%s is not supported by this configuration",
219 DEFAULT_ARCH32_P ? "-m64" : "-m32");
221 #endif
223 /* Code model selection. */
224 sparc_cmodel = SPARC_DEFAULT_CMODEL;
225 if (sparc_cmodel_string != NULL)
227 if (TARGET_ARCH64)
229 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
230 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
231 break;
232 if (cmodel->name == NULL)
233 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
234 else
235 sparc_cmodel = cmodel->value;
237 else
238 error ("-mcmodel= is not supported on 32 bit systems");
241 fpu = TARGET_FPU; /* save current -mfpu status */
243 /* Set the default CPU. */
244 for (def = &cpu_default[0]; def->name; ++def)
245 if (def->cpu == TARGET_CPU_DEFAULT)
246 break;
247 if (! def->name)
248 abort ();
249 sparc_select[0].string = def->name;
251 for (sel = &sparc_select[0]; sel->name; ++sel)
253 if (sel->string)
255 for (cpu = &cpu_table[0]; cpu->name; ++cpu)
256 if (! strcmp (sel->string, cpu->name))
258 if (sel->set_tune_p)
259 sparc_cpu = cpu->processor;
261 if (sel->set_arch_p)
263 target_flags &= ~cpu->disable;
264 target_flags |= cpu->enable;
266 break;
269 if (! cpu->name)
270 error ("bad value (%s) for %s switch", sel->string, sel->name);
274 /* If -mfpu or -mno-fpu was explicitly used, don't override with
275 the processor default. */
276 if (TARGET_FPU_SET)
277 target_flags = (target_flags & ~MASK_FPU) | fpu;
279 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
280 if (TARGET_V9 && TARGET_ARCH32)
281 target_flags |= MASK_DEPRECATED_V8_INSNS;
283 /* V8PLUS requires V9 */
284 if (! TARGET_V9)
285 target_flags &= ~MASK_V8PLUS;
287 /* Don't use stack biasing in 32 bit mode. */
288 if (TARGET_ARCH32)
289 target_flags &= ~MASK_STACK_BIAS;
291 /* Don't allow -mvis if FPU is disabled. */
292 if (! TARGET_FPU)
293 target_flags &= ~MASK_VIS;
295 /* Validate -malign-loops= value, or provide default. */
296 if (sparc_align_loops_string)
298 sparc_align_loops = exact_log2 (atoi (sparc_align_loops_string));
299 if (sparc_align_loops < 2 || sparc_align_loops > 7)
300 fatal ("-malign-loops=%s is not between 4 and 128 or is not a power of two",
301 sparc_align_loops_string);
303 else
305 /* ??? This relies on ASM_OUTPUT_ALIGN to not emit the alignment if
306 its 0. This sounds a bit kludgey. */
307 sparc_align_loops = 0;
310 /* Validate -malign-jumps= value, or provide default. */
311 if (sparc_align_jumps_string)
313 sparc_align_jumps = exact_log2 (atoi (sparc_align_jumps_string));
314 if (sparc_align_jumps < 2 || sparc_align_loops > 7)
315 fatal ("-malign-jumps=%s is not between 4 and 128 or is not a power of two",
316 sparc_align_jumps_string);
318 else
320 /* ??? This relies on ASM_OUTPUT_ALIGN to not emit the alignment if
321 its 0. This sounds a bit kludgey. */
322 sparc_align_jumps = 0;
325 /* Validate -malign-functions= value, or provide default. */
326 if (sparc_align_funcs_string)
328 sparc_align_funcs = exact_log2 (atoi (sparc_align_funcs_string));
329 if (sparc_align_funcs < 2 || sparc_align_loops > 7)
330 fatal ("-malign-functions=%s is not between 4 and 128 or is not a power of two",
331 sparc_align_funcs_string);
333 else
334 sparc_align_funcs = DEFAULT_SPARC_ALIGN_FUNCS;
336 /* Validate PCC_STRUCT_RETURN. */
337 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
338 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
340 /* Do various machine dependent initializations. */
341 sparc_init_modes ();
344 /* Miscellaneous utilities. */
346 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
347 or branch on register contents instructions. */
350 v9_regcmp_p (code)
351 enum rtx_code code;
353 return (code == EQ || code == NE || code == GE || code == LT
354 || code == LE || code == GT);
358 /* Operand constraints. */
360 /* Return non-zero only if OP is a register of mode MODE,
361 or const0_rtx. Don't allow const0_rtx if TARGET_LIVE_G0 because
362 %g0 may contain anything. */
365 reg_or_0_operand (op, mode)
366 rtx op;
367 enum machine_mode mode;
369 if (register_operand (op, mode))
370 return 1;
371 if (TARGET_LIVE_G0)
372 return 0;
373 if (op == const0_rtx)
374 return 1;
375 if (GET_MODE (op) == VOIDmode && GET_CODE (op) == CONST_DOUBLE
376 && CONST_DOUBLE_HIGH (op) == 0
377 && CONST_DOUBLE_LOW (op) == 0)
378 return 1;
379 if (GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT
380 && GET_CODE (op) == CONST_DOUBLE
381 && fp_zero_operand (op))
382 return 1;
383 return 0;
386 /* Nonzero if OP is a floating point value with value 0.0. */
389 fp_zero_operand (op)
390 rtx op;
392 REAL_VALUE_TYPE r;
394 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
395 return (REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r));
398 /* Nonzero if OP is an integer register. */
401 intreg_operand (op, mode)
402 rtx op;
403 enum machine_mode mode ATTRIBUTE_UNUSED;
405 return (register_operand (op, SImode)
406 || (TARGET_ARCH64 && register_operand (op, DImode)));
409 /* Nonzero if OP is a floating point condition code register. */
412 fcc_reg_operand (op, mode)
413 rtx op;
414 enum machine_mode mode;
416 /* This can happen when recog is called from combine. Op may be a MEM.
417 Fail instead of calling abort in this case. */
418 if (GET_CODE (op) != REG)
419 return 0;
421 if (mode != VOIDmode && mode != GET_MODE (op))
422 return 0;
423 if (mode == VOIDmode
424 && (GET_MODE (op) != CCFPmode && GET_MODE (op) != CCFPEmode))
425 return 0;
427 #if 0 /* ??? ==> 1 when %fcc0-3 are pseudos first. See gen_compare_reg(). */
428 if (reg_renumber == 0)
429 return REGNO (op) >= FIRST_PSEUDO_REGISTER;
430 return REGNO_OK_FOR_CCFP_P (REGNO (op));
431 #else
432 return (unsigned) REGNO (op) - SPARC_FIRST_V9_FCC_REG < 4;
433 #endif
436 /* Nonzero if OP is an integer or floating point condition code register. */
439 icc_or_fcc_reg_operand (op, mode)
440 rtx op;
441 enum machine_mode mode;
443 if (GET_CODE (op) == REG && REGNO (op) == SPARC_ICC_REG)
445 if (mode != VOIDmode && mode != GET_MODE (op))
446 return 0;
447 if (mode == VOIDmode
448 && GET_MODE (op) != CCmode && GET_MODE (op) != CCXmode)
449 return 0;
450 return 1;
453 return fcc_reg_operand (op, mode);
456 /* Nonzero if OP can appear as the dest of a RESTORE insn. */
458 restore_operand (op, mode)
459 rtx op;
460 enum machine_mode mode;
462 return (GET_CODE (op) == REG && GET_MODE (op) == mode
463 && (REGNO (op) < 8 || (REGNO (op) >= 24 && REGNO (op) < 32)));
466 /* Call insn on SPARC can take a PC-relative constant address, or any regular
467 memory address. */
470 call_operand (op, mode)
471 rtx op;
472 enum machine_mode mode;
474 if (GET_CODE (op) != MEM)
475 abort ();
476 op = XEXP (op, 0);
477 return (symbolic_operand (op, mode) || memory_address_p (Pmode, op));
481 call_operand_address (op, mode)
482 rtx op;
483 enum machine_mode mode;
485 return (symbolic_operand (op, mode) || memory_address_p (Pmode, op));
488 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
489 reference and a constant. */
492 symbolic_operand (op, mode)
493 register rtx op;
494 enum machine_mode mode;
496 switch (GET_CODE (op))
498 case SYMBOL_REF:
499 case LABEL_REF:
500 return 1;
502 case CONST:
503 op = XEXP (op, 0);
504 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
505 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
506 && GET_CODE (XEXP (op, 1)) == CONST_INT);
508 /* ??? This clause seems to be irrelevant. */
509 case CONST_DOUBLE:
510 return GET_MODE (op) == mode;
512 default:
513 return 0;
517 /* Return truth value of statement that OP is a symbolic memory
518 operand of mode MODE. */
521 symbolic_memory_operand (op, mode)
522 rtx op;
523 enum machine_mode mode ATTRIBUTE_UNUSED;
525 if (GET_CODE (op) == SUBREG)
526 op = SUBREG_REG (op);
527 if (GET_CODE (op) != MEM)
528 return 0;
529 op = XEXP (op, 0);
530 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
531 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
534 /* Return truth value of statement that OP is a LABEL_REF of mode MODE. */
537 label_ref_operand (op, mode)
538 rtx op;
539 enum machine_mode mode;
541 if (GET_CODE (op) != LABEL_REF)
542 return 0;
543 if (GET_MODE (op) != mode)
544 return 0;
545 return 1;
548 /* Return 1 if the operand is an argument used in generating pic references
549 in either the medium/low or medium/anywhere code models of sparc64. */
552 sp64_medium_pic_operand (op, mode)
553 rtx op;
554 enum machine_mode mode ATTRIBUTE_UNUSED;
556 /* Check for (const (minus (symbol_ref:GOT)
557 (const (minus (label) (pc))))). */
558 if (GET_CODE (op) != CONST)
559 return 0;
560 op = XEXP (op, 0);
561 if (GET_CODE (op) != MINUS)
562 return 0;
563 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
564 return 0;
565 /* ??? Ensure symbol is GOT. */
566 if (GET_CODE (XEXP (op, 1)) != CONST)
567 return 0;
568 if (GET_CODE (XEXP (XEXP (op, 1), 0)) != MINUS)
569 return 0;
570 return 1;
573 /* Return 1 if the operand is a data segment reference. This includes
574 the readonly data segment, or in other words anything but the text segment.
575 This is needed in the medium/anywhere code model on v9. These values
576 are accessed with EMBMEDANY_BASE_REG. */
579 data_segment_operand (op, mode)
580 rtx op;
581 enum machine_mode mode ATTRIBUTE_UNUSED;
583 switch (GET_CODE (op))
585 case SYMBOL_REF :
586 return ! SYMBOL_REF_FLAG (op);
587 case PLUS :
588 /* Assume canonical format of symbol + constant.
589 Fall through. */
590 case CONST :
591 return data_segment_operand (XEXP (op, 0));
592 default :
593 return 0;
597 /* Return 1 if the operand is a text segment reference.
598 This is needed in the medium/anywhere code model on v9. */
601 text_segment_operand (op, mode)
602 rtx op;
603 enum machine_mode mode ATTRIBUTE_UNUSED;
605 switch (GET_CODE (op))
607 case LABEL_REF :
608 return 1;
609 case SYMBOL_REF :
610 return SYMBOL_REF_FLAG (op);
611 case PLUS :
612 /* Assume canonical format of symbol + constant.
613 Fall through. */
614 case CONST :
615 return text_segment_operand (XEXP (op, 0));
616 default :
617 return 0;
621 /* Return 1 if the operand is either a register or a memory operand that is
622 not symbolic. */
625 reg_or_nonsymb_mem_operand (op, mode)
626 register rtx op;
627 enum machine_mode mode;
629 if (register_operand (op, mode))
630 return 1;
632 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
633 return 1;
635 return 0;
639 splittable_symbolic_memory_operand (op, mode)
640 rtx op;
641 enum machine_mode mode ATTRIBUTE_UNUSED;
643 if (GET_CODE (op) != MEM)
644 return 0;
645 if (! symbolic_operand (XEXP (op, 0), Pmode))
646 return 0;
647 return 1;
651 splittable_immediate_memory_operand (op, mode)
652 rtx op;
653 enum machine_mode mode ATTRIBUTE_UNUSED;
655 if (GET_CODE (op) != MEM)
656 return 0;
657 if (! immediate_operand (XEXP (op, 0), Pmode))
658 return 0;
659 return 1;
662 /* Return truth value of whether OP is EQ or NE. */
665 eq_or_neq (op, mode)
666 rtx op;
667 enum machine_mode mode ATTRIBUTE_UNUSED;
669 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
672 /* Return 1 if this is a comparison operator, but not an EQ, NE, GEU,
673 or LTU for non-floating-point. We handle those specially. */
676 normal_comp_operator (op, mode)
677 rtx op;
678 enum machine_mode mode ATTRIBUTE_UNUSED;
680 enum rtx_code code = GET_CODE (op);
682 if (GET_RTX_CLASS (code) != '<')
683 return 0;
685 if (GET_MODE (XEXP (op, 0)) == CCFPmode
686 || GET_MODE (XEXP (op, 0)) == CCFPEmode)
687 return 1;
689 return (code != NE && code != EQ && code != GEU && code != LTU);
692 /* Return 1 if this is a comparison operator. This allows the use of
693 MATCH_OPERATOR to recognize all the branch insns. */
696 noov_compare_op (op, mode)
697 register rtx op;
698 enum machine_mode mode ATTRIBUTE_UNUSED;
700 enum rtx_code code = GET_CODE (op);
702 if (GET_RTX_CLASS (code) != '<')
703 return 0;
705 if (GET_MODE (XEXP (op, 0)) == CC_NOOVmode)
706 /* These are the only branches which work with CC_NOOVmode. */
707 return (code == EQ || code == NE || code == GE || code == LT);
708 return 1;
711 /* Nonzero if OP is a comparison operator suitable for use in v9
712 conditional move or branch on register contents instructions. */
715 v9_regcmp_op (op, mode)
716 register rtx op;
717 enum machine_mode mode ATTRIBUTE_UNUSED;
719 enum rtx_code code = GET_CODE (op);
721 if (GET_RTX_CLASS (code) != '<')
722 return 0;
724 return v9_regcmp_p (code);
727 /* Return 1 if this is a SIGN_EXTEND or ZERO_EXTEND operation. */
730 extend_op (op, mode)
731 rtx op;
732 enum machine_mode mode ATTRIBUTE_UNUSED;
734 return GET_CODE (op) == SIGN_EXTEND || GET_CODE (op) == ZERO_EXTEND;
737 /* Return nonzero if OP is an operator of mode MODE which can set
738 the condition codes explicitly. We do not include PLUS and MINUS
739 because these require CC_NOOVmode, which we handle explicitly. */
742 cc_arithop (op, mode)
743 rtx op;
744 enum machine_mode mode ATTRIBUTE_UNUSED;
746 if (GET_CODE (op) == AND
747 || GET_CODE (op) == IOR
748 || GET_CODE (op) == XOR)
749 return 1;
751 return 0;
754 /* Return nonzero if OP is an operator of mode MODE which can bitwise
755 complement its second operand and set the condition codes explicitly. */
758 cc_arithopn (op, mode)
759 rtx op;
760 enum machine_mode mode ATTRIBUTE_UNUSED;
762 /* XOR is not here because combine canonicalizes (xor (not ...) ...)
763 and (xor ... (not ...)) to (not (xor ...)). */
764 return (GET_CODE (op) == AND
765 || GET_CODE (op) == IOR);
768 /* Return true if OP is a register, or is a CONST_INT that can fit in a
769 signed 13 bit immediate field. This is an acceptable SImode operand for
770 most 3 address instructions. */
773 arith_operand (op, mode)
774 rtx op;
775 enum machine_mode mode;
777 int val;
778 if (register_operand (op, mode)
779 || GET_CODE (op) == CONSTANT_P_RTX)
780 return 1;
781 if (GET_CODE (op) != CONST_INT)
782 return 0;
783 val = INTVAL (op) & 0xffffffff;
784 return SPARC_SIMM13_P (val);
787 /* Return true if OP is a constant 4096 */
790 arith_4096_operand (op, mode)
791 rtx op;
792 enum machine_mode mode ATTRIBUTE_UNUSED;
794 int val;
795 if (GET_CODE (op) != CONST_INT)
796 return 0;
797 val = INTVAL (op) & 0xffffffff;
798 return val == 4096;
801 /* Return true if OP is suitable as second operand for add/sub */
804 arith_add_operand (op, mode)
805 rtx op;
806 enum machine_mode mode;
808 return arith_operand (op, mode) || arith_4096_operand (op, mode);
811 /* Return true if OP is a CONST_INT or a CONST_DOUBLE which can fit in the
812 immediate field of OR and XOR instructions. Used for 64-bit
813 constant formation patterns. */
815 const64_operand (op, mode)
816 rtx op;
817 enum machine_mode mode ATTRIBUTE_UNUSED;
819 return ((GET_CODE (op) == CONST_INT
820 && SPARC_SIMM13_P (INTVAL (op)))
821 #if HOST_BITS_PER_WIDE_INT != 64
822 || (GET_CODE (op) == CONST_DOUBLE
823 && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))
824 && (CONST_DOUBLE_HIGH (op) ==
825 ((CONST_DOUBLE_LOW (op) & 0x80000000) != 0 ?
826 (HOST_WIDE_INT)0xffffffff : 0)))
827 #endif
828 || GET_CODE (op) == CONSTANT_P_RTX);
831 /* The same, but only for sethi instructions. */
833 const64_high_operand (op, mode)
834 rtx op;
835 enum machine_mode mode ATTRIBUTE_UNUSED;
837 return ((GET_CODE (op) == CONST_INT
838 && (INTVAL (op) & 0xfffffc00) != 0
839 && SPARC_SETHI_P (INTVAL (op))
840 #if HOST_BITS_PER_WIDE_INT != 64
841 /* Must be positive on non-64bit host else the
842 optimizer is fooled into thinking that sethi
843 sign extends, even though it does not. */
844 && INTVAL (op) >= 0
845 #endif
847 || (GET_CODE (op) == CONST_DOUBLE
848 && CONST_DOUBLE_HIGH (op) == 0
849 && (CONST_DOUBLE_LOW (op) & 0xfffffc00) != 0
850 && SPARC_SETHI_P (CONST_DOUBLE_LOW (op)))
851 || GET_CODE (op) == CONSTANT_P_RTX);
854 /* Return true if OP is a register, or is a CONST_INT that can fit in a
855 signed 11 bit immediate field. This is an acceptable SImode operand for
856 the movcc instructions. */
859 arith11_operand (op, mode)
860 rtx op;
861 enum machine_mode mode;
863 return (register_operand (op, mode)
864 || GET_CODE (op) == CONSTANT_P_RTX
865 || (GET_CODE (op) == CONST_INT && SPARC_SIMM11_P (INTVAL (op))));
868 /* Return true if OP is a register, or is a CONST_INT that can fit in a
869 signed 10 bit immediate field. This is an acceptable SImode operand for
870 the movrcc instructions. */
873 arith10_operand (op, mode)
874 rtx op;
875 enum machine_mode mode;
877 return (register_operand (op, mode)
878 || GET_CODE (op) == CONSTANT_P_RTX
879 || (GET_CODE (op) == CONST_INT && SPARC_SIMM10_P (INTVAL (op))));
882 /* Return true if OP is a register, is a CONST_INT that fits in a 13 bit
883 immediate field, or is a CONST_DOUBLE whose both parts fit in a 13 bit
884 immediate field.
885 v9: Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
886 can fit in a 13 bit immediate field. This is an acceptable DImode operand
887 for most 3 address instructions. */
890 arith_double_operand (op, mode)
891 rtx op;
892 enum machine_mode mode;
894 return (register_operand (op, mode)
895 || GET_CODE (op) == CONSTANT_P_RTX
896 || (GET_CODE (op) == CONST_INT && SMALL_INT (op))
897 || (! TARGET_ARCH64
898 && GET_CODE (op) == CONST_DOUBLE
899 && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x1000) < 0x2000
900 && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_HIGH (op) + 0x1000) < 0x2000)
901 || (TARGET_ARCH64
902 && GET_CODE (op) == CONST_DOUBLE
903 && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x1000) < 0x2000
904 && ((CONST_DOUBLE_HIGH (op) == -1
905 && (CONST_DOUBLE_LOW (op) & 0x1000) == 0x1000)
906 || (CONST_DOUBLE_HIGH (op) == 0
907 && (CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
910 /* Return true if OP is a constant 4096 for DImode on ARCH64 */
913 arith_double_4096_operand (op, mode)
914 rtx op;
915 enum machine_mode mode ATTRIBUTE_UNUSED;
917 return (TARGET_ARCH64 &&
918 ((GET_CODE (op) == CONST_INT && INTVAL (op) == 4096) ||
919 (GET_CODE (op) == CONST_DOUBLE &&
920 CONST_DOUBLE_LOW (op) == 4096 &&
921 CONST_DOUBLE_HIGH (op) == 0)));
924 /* Return true if OP is suitable as second operand for add/sub in DImode */
927 arith_double_add_operand (op, mode)
928 rtx op;
929 enum machine_mode mode;
931 return arith_double_operand (op, mode) || arith_double_4096_operand (op, mode);
934 /* Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
935 can fit in an 11 bit immediate field. This is an acceptable DImode
936 operand for the movcc instructions. */
937 /* ??? Replace with arith11_operand? */
940 arith11_double_operand (op, mode)
941 rtx op;
942 enum machine_mode mode;
944 return (register_operand (op, mode)
945 || GET_CODE (op) == CONSTANT_P_RTX
946 || (GET_CODE (op) == CONST_DOUBLE
947 && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
948 && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x400) < 0x800
949 && ((CONST_DOUBLE_HIGH (op) == -1
950 && (CONST_DOUBLE_LOW (op) & 0x400) == 0x400)
951 || (CONST_DOUBLE_HIGH (op) == 0
952 && (CONST_DOUBLE_LOW (op) & 0x400) == 0)))
953 || (GET_CODE (op) == CONST_INT
954 && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
955 && (unsigned HOST_WIDE_INT) (INTVAL (op) + 0x400) < 0x800));
958 /* Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
959 can fit in an 10 bit immediate field. This is an acceptable DImode
960 operand for the movrcc instructions. */
961 /* ??? Replace with arith10_operand? */
964 arith10_double_operand (op, mode)
965 rtx op;
966 enum machine_mode mode;
968 return (register_operand (op, mode)
969 || GET_CODE (op) == CONSTANT_P_RTX
970 || (GET_CODE (op) == CONST_DOUBLE
971 && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
972 && (unsigned) (CONST_DOUBLE_LOW (op) + 0x200) < 0x400
973 && ((CONST_DOUBLE_HIGH (op) == -1
974 && (CONST_DOUBLE_LOW (op) & 0x200) == 0x200)
975 || (CONST_DOUBLE_HIGH (op) == 0
976 && (CONST_DOUBLE_LOW (op) & 0x200) == 0)))
977 || (GET_CODE (op) == CONST_INT
978 && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
979 && (unsigned HOST_WIDE_INT) (INTVAL (op) + 0x200) < 0x400));
982 /* Return truth value of whether OP is a integer which fits the
983 range constraining immediate operands in most three-address insns,
984 which have a 13 bit immediate field. */
987 small_int (op, mode)
988 rtx op;
989 enum machine_mode mode ATTRIBUTE_UNUSED;
991 return ((GET_CODE (op) == CONST_INT && SMALL_INT (op))
992 || GET_CODE (op) == CONSTANT_P_RTX);
996 small_int_or_double (op, mode)
997 rtx op;
998 enum machine_mode mode ATTRIBUTE_UNUSED;
1000 return ((GET_CODE (op) == CONST_INT && SMALL_INT (op))
1001 || (GET_CODE (op) == CONST_DOUBLE
1002 && CONST_DOUBLE_HIGH (op) == 0
1003 && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op)))
1004 || GET_CODE (op) == CONSTANT_P_RTX);
1007 /* Recognize operand values for the umul instruction. That instruction sign
1008 extends immediate values just like all other sparc instructions, but
1009 interprets the extended result as an unsigned number. */
1012 uns_small_int (op, mode)
1013 rtx op;
1014 enum machine_mode mode ATTRIBUTE_UNUSED;
1016 #if HOST_BITS_PER_WIDE_INT > 32
1017 /* All allowed constants will fit a CONST_INT. */
1018 return ((GET_CODE (op) == CONST_INT
1019 && ((INTVAL (op) >= 0 && INTVAL (op) < 0x1000)
1020 || (INTVAL (op) >= 0xFFFFF000
1021 && INTVAL (op) < 0x100000000)))
1022 || GET_CODE (op) == CONSTANT_P_RTX);
1023 #else
1024 return (((GET_CODE (op) == CONST_INT && (unsigned) INTVAL (op) < 0x1000)
1025 || (GET_CODE (op) == CONST_DOUBLE
1026 && CONST_DOUBLE_HIGH (op) == 0
1027 && (unsigned) CONST_DOUBLE_LOW (op) - 0xFFFFF000 < 0x1000))
1028 || GET_CODE (op) == CONSTANT_P_RTX);
1029 #endif
1033 uns_arith_operand (op, mode)
1034 rtx op;
1035 enum machine_mode mode;
1037 return register_operand (op, mode) || uns_small_int (op, mode);
1040 /* Return truth value of statement that OP is a call-clobbered register. */
1042 clobbered_register (op, mode)
1043 rtx op;
1044 enum machine_mode mode ATTRIBUTE_UNUSED;
1046 return (GET_CODE (op) == REG && call_used_regs[REGNO (op)]);
1049 /* Return 1 if OP is const0_rtx, used for TARGET_LIVE_G0 insns. */
1052 zero_operand (op, mode)
1053 rtx op;
1054 enum machine_mode mode ATTRIBUTE_UNUSED;
1056 return (op == const0_rtx || GET_CODE (op) == CONSTANT_P_RTX);
1059 /* Return 1 if OP is a valid operand for the source of a move insn. */
1062 input_operand (op, mode)
1063 rtx op;
1064 enum machine_mode mode;
1066 /* If both modes are non-void they must be the same. */
1067 if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
1068 return 0;
1070 /* Allow any one instruction integer constant, and all CONST_INT
1071 variants when we are working in DImode and !arch64. */
1072 if (GET_MODE_CLASS (mode) == MODE_INT
1073 && ((GET_CODE (op) == CONST_INT
1074 && ((SPARC_SETHI_P (INTVAL (op))
1075 && (! TARGET_ARCH64
1076 || (INTVAL (op) >= 0)
1077 || mode == SImode))
1078 || SPARC_SIMM13_P (INTVAL (op))
1079 || (mode == DImode
1080 && ! TARGET_ARCH64)))
1081 || (TARGET_ARCH64
1082 && GET_CODE (op) == CONST_DOUBLE
1083 && ((CONST_DOUBLE_HIGH (op) == 0
1084 && SPARC_SETHI_P (CONST_DOUBLE_LOW (op)))
1086 #if HOST_BITS_PER_WIDE_INT == 64
1087 (CONST_DOUBLE_HIGH (op) == 0
1088 && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op)))
1089 #else
1090 (SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))
1091 && (((CONST_DOUBLE_LOW (op) & 0x80000000) == 0
1092 && CONST_DOUBLE_HIGH (op) == 0)
1093 || (CONST_DOUBLE_HIGH (op) == -1)))
1094 #endif
1095 ))))
1096 return 1;
1098 /* Always match this. */
1099 if (GET_CODE (op) == CONSTANT_P_RTX)
1100 return 1;
1102 /* If !arch64 and this is a DImode const, allow it so that
1103 the splits can be generated. */
1104 if (! TARGET_ARCH64
1105 && mode == DImode
1106 && GET_CODE (op) == CONST_DOUBLE)
1107 return 1;
1109 if (register_operand (op, mode))
1110 return 1;
1112 /* If this is a SUBREG, look inside so that we handle
1113 paradoxical ones. */
1114 if (GET_CODE (op) == SUBREG)
1115 op = SUBREG_REG (op);
1117 /* Check for valid MEM forms. */
1118 if (GET_CODE (op) == MEM)
1120 rtx inside = XEXP (op, 0);
1122 if (GET_CODE (inside) == LO_SUM)
1124 /* We can't allow these because all of the splits
1125 (eventually as they trickle down into DFmode
1126 splits) require offsettable memory references. */
1127 if (! TARGET_V9
1128 && GET_MODE (op) == TFmode)
1129 return 0;
1131 return (register_operand (XEXP (inside, 0), Pmode)
1132 && CONSTANT_P (XEXP (inside, 1)));
1134 return memory_address_p (mode, inside);
1137 return 0;
1141 /* We know it can't be done in one insn when we get here,
1142 the movsi expander guarentees this. */
1143 void
1144 sparc_emit_set_const32 (op0, op1)
1145 rtx op0;
1146 rtx op1;
1148 enum machine_mode mode = GET_MODE (op0);
1149 rtx temp;
1151 if (GET_CODE (op1) == CONST_INT)
1153 HOST_WIDE_INT value = INTVAL (op1);
1155 if (SPARC_SETHI_P (value)
1156 || SPARC_SIMM13_P (value))
1157 abort ();
1160 /* Full 2-insn decomposition is needed. */
1161 if (reload_in_progress || reload_completed)
1162 temp = op0;
1163 else
1164 temp = gen_reg_rtx (mode);
1166 if (GET_CODE (op1) == CONST_INT)
1168 /* Emit them as real moves instead of a HIGH/LO_SUM,
1169 this way CSE can see everything and reuse intermediate
1170 values if it wants. */
1171 if (TARGET_ARCH64
1172 && HOST_BITS_PER_WIDE_INT != 64
1173 && (INTVAL (op1) & 0x80000000) != 0)
1175 emit_insn (gen_rtx_SET (VOIDmode,
1176 temp,
1177 gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx,
1178 INTVAL (op1) & 0xfffffc00, 0)));
1180 else
1182 emit_insn (gen_rtx_SET (VOIDmode,
1183 temp,
1184 GEN_INT (INTVAL (op1) & 0xfffffc00)));
1186 emit_insn (gen_rtx_SET (VOIDmode,
1187 op0,
1188 gen_rtx_IOR (mode,
1189 temp,
1190 GEN_INT (INTVAL (op1) & 0x3ff))));
1192 else
1194 /* A symbol, emit in the traditional way. */
1195 emit_insn (gen_rtx_SET (VOIDmode,
1196 temp,
1197 gen_rtx_HIGH (mode,
1198 op1)));
1199 emit_insn (gen_rtx_SET (VOIDmode,
1200 op0,
1201 gen_rtx_LO_SUM (mode,
1202 temp,
1203 op1)));
1209 /* Sparc-v9 code-model support. */
1210 void
1211 sparc_emit_set_symbolic_const64 (op0, op1, temp1)
1212 rtx op0;
1213 rtx op1;
1214 rtx temp1;
1216 switch (sparc_cmodel)
1218 case CM_MEDLOW:
1219 /* The range spanned by all instructions in the object is less
1220 than 2^31 bytes (2GB) and the distance from any instruction
1221 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1222 than 2^31 bytes (2GB).
1224 The executable must be in the low 4TB of the virtual address
1225 space.
1227 sethi %hi(symbol), %temp
1228 or %temp, %lo(symbol), %reg */
1229 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1230 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1231 break;
1233 case CM_MEDMID:
1234 /* The range spanned by all instructions in the object is less
1235 than 2^31 bytes (2GB) and the distance from any instruction
1236 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1237 than 2^31 bytes (2GB).
1239 The executable must be in the low 16TB of the virtual address
1240 space.
1242 sethi %h44(symbol), %temp1
1243 or %temp1, %m44(symbol), %temp2
1244 sllx %temp2, 12, %temp3
1245 or %temp3, %l44(symbol), %reg */
1246 emit_insn (gen_seth44 (op0, op1));
1247 emit_insn (gen_setm44 (op0, op0, op1));
1248 emit_insn (gen_rtx_SET (VOIDmode, temp1,
1249 gen_rtx_ASHIFT (DImode, op0, GEN_INT (12))));
1250 emit_insn (gen_setl44 (op0, temp1, op1));
1251 break;
1253 case CM_MEDANY:
1254 /* The range spanned by all instructions in the object is less
1255 than 2^31 bytes (2GB) and the distance from any instruction
1256 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1257 than 2^31 bytes (2GB).
1259 The executable can be placed anywhere in the virtual address
1260 space.
1262 sethi %hh(symbol), %temp1
1263 sethi %lm(symbol), %temp2
1264 or %temp1, %hm(symbol), %temp3
1265 or %temp2, %lo(symbol), %temp4
1266 sllx %temp3, 32, %temp5
1267 or %temp4, %temp5, %reg */
1269 /* Getting this right wrt. reloading is really tricky.
1270 We _MUST_ have a seperate temporary at this point,
1271 if we don't barf immediately instead of generating
1272 incorrect code. */
1273 if (temp1 == op0)
1274 abort ();
1276 emit_insn (gen_sethh (op0, op1));
1277 emit_insn (gen_setlm (temp1, op1));
1278 emit_insn (gen_sethm (op0, op0, op1));
1279 emit_insn (gen_rtx_SET (VOIDmode, op0,
1280 gen_rtx_ASHIFT (DImode, op0, GEN_INT (32))));
1281 emit_insn (gen_rtx_SET (VOIDmode, op0,
1282 gen_rtx_PLUS (DImode, op0, temp1)));
1283 emit_insn (gen_setlo (op0, op0, op1));
1284 break;
1286 case CM_EMBMEDANY:
1287 /* Old old old backwards compatibility kruft here.
1288 Essentially it is MEDLOW with a fixed 64-bit
1289 virtual base added to all data segment addresses.
1290 Text-segment stuff is computed like MEDANY, we can't
1291 reuse the code above because the relocation knobs
1292 look different.
1294 Data segment: sethi %hi(symbol), %temp1
1295 or %temp1, %lo(symbol), %temp2
1296 add %temp2, EMBMEDANY_BASE_REG, %reg
1298 Text segment: sethi %uhi(symbol), %temp1
1299 sethi %hi(symbol), %temp2
1300 or %temp1, %ulo(symbol), %temp3
1301 or %temp2, %lo(symbol), %temp4
1302 sllx %temp3, 32, %temp5
1303 or %temp4, %temp5, %reg */
1304 if (data_segment_operand (op1, GET_MODE (op1)))
1306 emit_insn (gen_embmedany_sethi (temp1, op1));
1307 emit_insn (gen_embmedany_brsum (op0, temp1));
1308 emit_insn (gen_embmedany_losum (op0, op0, op1));
1310 else
1312 /* Getting this right wrt. reloading is really tricky.
1313 We _MUST_ have a seperate temporary at this point,
1314 so we barf immediately instead of generating
1315 incorrect code. */
1316 if (temp1 == op0)
1317 abort ();
1319 emit_insn (gen_embmedany_textuhi (op0, op1));
1320 emit_insn (gen_embmedany_texthi (temp1, op1));
1321 emit_insn (gen_embmedany_textulo (op0, op0, op1));
1322 emit_insn (gen_rtx_SET (VOIDmode, op0,
1323 gen_rtx_ASHIFT (DImode, op0, GEN_INT (32))));
1324 emit_insn (gen_rtx_SET (VOIDmode, op0,
1325 gen_rtx_PLUS (DImode, op0, temp1)));
1326 emit_insn (gen_embmedany_textlo (op0, op0, op1));
1328 break;
1330 default:
1331 abort();
1335 /* These avoid problems when cross compiling. If we do not
1336 go through all this hair then the optimizer will see
1337 invalid REG_EQUAL notes or in some cases none at all. */
1338 static void sparc_emit_set_safe_HIGH64 PROTO ((rtx, HOST_WIDE_INT));
1339 static rtx gen_safe_SET64 PROTO ((rtx, HOST_WIDE_INT));
1340 static rtx gen_safe_OR64 PROTO ((rtx, HOST_WIDE_INT));
1341 static rtx gen_safe_XOR64 PROTO ((rtx, HOST_WIDE_INT));
1343 #if HOST_BITS_PER_WIDE_INT == 64
1344 #define GEN_HIGHINT64(__x) GEN_INT ((__x) & 0xfffffc00)
1345 #define GEN_INT64(__x) GEN_INT (__x)
1346 #else
1347 #define GEN_HIGHINT64(__x) \
1348 gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx, \
1349 (__x) & 0xfffffc00, 0)
1350 #define GEN_INT64(__x) \
1351 gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx, \
1352 (__x) & 0xffffffff, \
1353 ((__x) & 0x80000000 \
1354 ? 0xffffffff : 0))
1355 #endif
1357 /* The optimizer is not to assume anything about exactly
1358 which bits are set for a HIGH, they are unspecified.
1359 Unfortunately this leads to many missed optimizations
1360 during CSE. We mask out the non-HIGH bits, and matches
1361 a plain movdi, to alleviate this problem. */
1362 static void
1363 sparc_emit_set_safe_HIGH64 (dest, val)
1364 rtx dest;
1365 HOST_WIDE_INT val;
1367 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_HIGHINT64 (val)));
1370 static rtx
1371 gen_safe_SET64 (dest, val)
1372 rtx dest;
1373 HOST_WIDE_INT val;
1375 return gen_rtx_SET (VOIDmode, dest, GEN_INT64 (val));
1378 static rtx
1379 gen_safe_OR64 (src, val)
1380 rtx src;
1381 HOST_WIDE_INT val;
1383 return gen_rtx_IOR (DImode, src, GEN_INT64 (val));
1386 static rtx
1387 gen_safe_XOR64 (src, val)
1388 rtx src;
1389 HOST_WIDE_INT val;
1391 return gen_rtx_XOR (DImode, src, GEN_INT64 (val));
1394 /* Worker routines for 64-bit constant formation on arch64.
1395 One of the key things to be doing in these emissions is
1396 to create as many temp REGs as possible. This makes it
1397 possible for half-built constants to be used later when
1398 such values are similar to something required later on.
1399 Without doing this, the optimizer cannot see such
1400 opportunities. */
1402 static void sparc_emit_set_const64_quick1
1403 PROTO((rtx, rtx, unsigned HOST_WIDE_INT, int));
1405 static void
1406 sparc_emit_set_const64_quick1 (op0, temp, low_bits, is_neg)
1407 rtx op0;
1408 rtx temp;
1409 unsigned HOST_WIDE_INT low_bits;
1410 int is_neg;
1412 unsigned HOST_WIDE_INT high_bits;
1414 if (is_neg)
1415 high_bits = (~low_bits) & 0xffffffff;
1416 else
1417 high_bits = low_bits;
1419 sparc_emit_set_safe_HIGH64 (temp, high_bits);
1420 if (!is_neg)
1422 emit_insn (gen_rtx_SET (VOIDmode, op0,
1423 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1425 else
1427 /* If we are XOR'ing with -1, then we should emit a one's complement
1428 instead. This way the combiner will notice logical operations
1429 such as ANDN later on and substitute. */
1430 if ((low_bits & 0x3ff) == 0x3ff)
1432 emit_insn (gen_rtx_SET (VOIDmode, op0,
1433 gen_rtx_NOT (DImode, temp)));
1435 else
1437 emit_insn (gen_rtx_SET (VOIDmode, op0,
1438 gen_safe_XOR64 (temp,
1439 (-0x400 | (low_bits & 0x3ff)))));
1444 static void sparc_emit_set_const64_quick2
1445 PROTO((rtx, rtx, unsigned HOST_WIDE_INT,
1446 unsigned HOST_WIDE_INT, int));
1448 static void
1449 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_immediate, shift_count)
1450 rtx op0;
1451 rtx temp;
1452 unsigned HOST_WIDE_INT high_bits;
1453 unsigned HOST_WIDE_INT low_immediate;
1454 int shift_count;
1456 rtx temp2 = op0;
1458 if ((high_bits & 0xfffffc00) != 0)
1460 sparc_emit_set_safe_HIGH64 (temp, high_bits);
1461 if ((high_bits & ~0xfffffc00) != 0)
1462 emit_insn (gen_rtx_SET (VOIDmode, op0,
1463 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1464 else
1465 temp2 = temp;
1467 else
1469 emit_insn (gen_safe_SET64 (temp, high_bits));
1470 temp2 = temp;
1473 /* Now shift it up into place. */
1474 emit_insn (gen_rtx_SET (VOIDmode, op0,
1475 gen_rtx_ASHIFT (DImode, temp2,
1476 GEN_INT (shift_count))));
1478 /* If there is a low immediate part piece, finish up by
1479 putting that in as well. */
1480 if (low_immediate != 0)
1481 emit_insn (gen_rtx_SET (VOIDmode, op0,
1482 gen_safe_OR64 (op0, low_immediate)));
1485 static void sparc_emit_set_const64_longway
1486 PROTO((rtx, rtx, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
1488 /* Full 64-bit constant decomposition. Even though this is the
1489 'worst' case, we still optimize a few things away. */
1490 static void
1491 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits)
1492 rtx op0;
1493 rtx temp;
1494 unsigned HOST_WIDE_INT high_bits;
1495 unsigned HOST_WIDE_INT low_bits;
1497 rtx sub_temp;
1499 if (reload_in_progress || reload_completed)
1500 sub_temp = op0;
1501 else
1502 sub_temp = gen_reg_rtx (DImode);
1504 if ((high_bits & 0xfffffc00) != 0)
1506 sparc_emit_set_safe_HIGH64 (temp, high_bits);
1507 if ((high_bits & ~0xfffffc00) != 0)
1508 emit_insn (gen_rtx_SET (VOIDmode,
1509 sub_temp,
1510 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1511 else
1512 sub_temp = temp;
1514 else
1516 emit_insn (gen_safe_SET64 (temp, high_bits));
1517 sub_temp = temp;
1520 if (!reload_in_progress && !reload_completed)
1522 rtx temp2 = gen_reg_rtx (DImode);
1523 rtx temp3 = gen_reg_rtx (DImode);
1524 rtx temp4 = gen_reg_rtx (DImode);
1526 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1527 gen_rtx_ASHIFT (DImode, sub_temp,
1528 GEN_INT (32))));
1530 sparc_emit_set_safe_HIGH64 (temp2, low_bits);
1531 if ((low_bits & ~0xfffffc00) != 0)
1533 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1534 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
1535 emit_insn (gen_rtx_SET (VOIDmode, op0,
1536 gen_rtx_PLUS (DImode, temp4, temp3)));
1538 else
1540 emit_insn (gen_rtx_SET (VOIDmode, op0,
1541 gen_rtx_PLUS (DImode, temp4, temp2)));
1544 else
1546 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
1547 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
1548 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
1549 int to_shift = 12;
1551 /* We are in the middle of reload, so this is really
1552 painful. However we do still make an attempt to
1553 avoid emitting truly stupid code. */
1554 if (low1 != const0_rtx)
1556 emit_insn (gen_rtx_SET (VOIDmode, op0,
1557 gen_rtx_ASHIFT (DImode, sub_temp,
1558 GEN_INT (to_shift))));
1559 emit_insn (gen_rtx_SET (VOIDmode, op0,
1560 gen_rtx_IOR (DImode, op0, low1)));
1561 sub_temp = op0;
1562 to_shift = 12;
1564 else
1566 to_shift += 12;
1568 if (low2 != const0_rtx)
1570 emit_insn (gen_rtx_SET (VOIDmode, op0,
1571 gen_rtx_ASHIFT (DImode, sub_temp,
1572 GEN_INT (to_shift))));
1573 emit_insn (gen_rtx_SET (VOIDmode, op0,
1574 gen_rtx_IOR (DImode, op0, low2)));
1575 sub_temp = op0;
1576 to_shift = 8;
1578 else
1580 to_shift += 8;
1582 emit_insn (gen_rtx_SET (VOIDmode, op0,
1583 gen_rtx_ASHIFT (DImode, sub_temp,
1584 GEN_INT (to_shift))));
1585 if (low3 != const0_rtx)
1586 emit_insn (gen_rtx_SET (VOIDmode, op0,
1587 gen_rtx_IOR (DImode, op0, low3)));
1588 /* phew... */
1592 /* Analyze a 64-bit constant for certain properties. */
1593 static void analyze_64bit_constant
1594 PROTO((unsigned HOST_WIDE_INT,
1595 unsigned HOST_WIDE_INT,
1596 int *, int *, int *));
1598 static void
1599 analyze_64bit_constant (high_bits, low_bits, hbsp, lbsp, abbasp)
1600 unsigned HOST_WIDE_INT high_bits, low_bits;
1601 int *hbsp, *lbsp, *abbasp;
1603 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
1604 int i;
1606 lowest_bit_set = highest_bit_set = -1;
1607 i = 0;
1610 if ((lowest_bit_set == -1)
1611 && ((low_bits >> i) & 1))
1612 lowest_bit_set = i;
1613 if ((highest_bit_set == -1)
1614 && ((high_bits >> (32 - i - 1)) & 1))
1615 highest_bit_set = (64 - i - 1);
1617 while (++i < 32
1618 && ((highest_bit_set == -1)
1619 || (lowest_bit_set == -1)));
1620 if (i == 32)
1622 i = 0;
1625 if ((lowest_bit_set == -1)
1626 && ((high_bits >> i) & 1))
1627 lowest_bit_set = i + 32;
1628 if ((highest_bit_set == -1)
1629 && ((low_bits >> (32 - i - 1)) & 1))
1630 highest_bit_set = 32 - i - 1;
1632 while (++i < 32
1633 && ((highest_bit_set == -1)
1634 || (lowest_bit_set == -1)));
1636 /* If there are no bits set this should have gone out
1637 as one instruction! */
1638 if (lowest_bit_set == -1
1639 || highest_bit_set == -1)
1640 abort ();
1641 all_bits_between_are_set = 1;
1642 for (i = lowest_bit_set; i <= highest_bit_set; i++)
1644 if (i < 32)
1646 if ((low_bits & (1 << i)) != 0)
1647 continue;
1649 else
1651 if ((high_bits & (1 << (i - 32))) != 0)
1652 continue;
1654 all_bits_between_are_set = 0;
1655 break;
1657 *hbsp = highest_bit_set;
1658 *lbsp = lowest_bit_set;
1659 *abbasp = all_bits_between_are_set;
1662 static int const64_is_2insns
1663 PROTO((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
1665 static int
1666 const64_is_2insns (high_bits, low_bits)
1667 unsigned HOST_WIDE_INT high_bits, low_bits;
1669 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
1671 if (high_bits == 0
1672 || high_bits == 0xffffffff)
1673 return 1;
1675 analyze_64bit_constant (high_bits, low_bits,
1676 &highest_bit_set, &lowest_bit_set,
1677 &all_bits_between_are_set);
1679 if ((highest_bit_set == 63
1680 || lowest_bit_set == 0)
1681 && all_bits_between_are_set != 0)
1682 return 1;
1684 if ((highest_bit_set - lowest_bit_set) < 21)
1685 return 1;
1687 return 0;
1690 static unsigned HOST_WIDE_INT create_simple_focus_bits
1691 PROTO((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1692 int, int));
1694 static unsigned HOST_WIDE_INT
1695 create_simple_focus_bits (high_bits, low_bits, lowest_bit_set, shift)
1696 unsigned HOST_WIDE_INT high_bits, low_bits;
1697 int lowest_bit_set, shift;
1699 HOST_WIDE_INT hi, lo;
1701 if (lowest_bit_set < 32)
1703 lo = (low_bits >> lowest_bit_set) << shift;
1704 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
1706 else
1708 lo = 0;
1709 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
1711 if (hi & lo)
1712 abort ();
1713 return (hi | lo);
1716 /* Here we are sure to be arch64 and this is an integer constant
1717 being loaded into a register. Emit the most efficient
1718 insn sequence possible. Detection of all the 1-insn cases
1719 has been done already. */
1720 void
1721 sparc_emit_set_const64 (op0, op1)
1722 rtx op0;
1723 rtx op1;
1725 unsigned HOST_WIDE_INT high_bits, low_bits;
1726 int lowest_bit_set, highest_bit_set;
1727 int all_bits_between_are_set;
1728 rtx temp;
1730 /* Sanity check that we know what we are working with. */
1731 if (! TARGET_ARCH64
1732 || GET_CODE (op0) != REG
1733 || (REGNO (op0) >= SPARC_FIRST_FP_REG
1734 && REGNO (op0) <= SPARC_LAST_V9_FP_REG))
1735 abort ();
1737 if (reload_in_progress || reload_completed)
1738 temp = op0;
1739 else
1740 temp = gen_reg_rtx (DImode);
1742 if (GET_CODE (op1) != CONST_DOUBLE
1743 && GET_CODE (op1) != CONST_INT)
1745 sparc_emit_set_symbolic_const64 (op0, op1, temp);
1746 return;
1749 if (GET_CODE (op1) == CONST_DOUBLE)
1751 #if HOST_BITS_PER_WIDE_INT == 64
1752 high_bits = (CONST_DOUBLE_LOW (op1) >> 32) & 0xffffffff;
1753 low_bits = CONST_DOUBLE_LOW (op1) & 0xffffffff;
1754 #else
1755 high_bits = CONST_DOUBLE_HIGH (op1);
1756 low_bits = CONST_DOUBLE_LOW (op1);
1757 #endif
1759 else
1761 #if HOST_BITS_PER_WIDE_INT == 64
1762 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
1763 low_bits = (INTVAL (op1) & 0xffffffff);
1764 #else
1765 high_bits = ((INTVAL (op1) < 0) ?
1766 0xffffffff :
1767 0x00000000);
1768 low_bits = INTVAL (op1);
1769 #endif
1772 /* low_bits bits 0 --> 31
1773 high_bits bits 32 --> 63 */
1775 analyze_64bit_constant (high_bits, low_bits,
1776 &highest_bit_set, &lowest_bit_set,
1777 &all_bits_between_are_set);
1779 /* First try for a 2-insn sequence. */
1781 /* These situations are preferred because the optimizer can
1782 * do more things with them:
1783 * 1) mov -1, %reg
1784 * sllx %reg, shift, %reg
1785 * 2) mov -1, %reg
1786 * srlx %reg, shift, %reg
1787 * 3) mov some_small_const, %reg
1788 * sllx %reg, shift, %reg
1790 if (((highest_bit_set == 63
1791 || lowest_bit_set == 0)
1792 && all_bits_between_are_set != 0)
1793 || ((highest_bit_set - lowest_bit_set) < 12))
1795 HOST_WIDE_INT the_const = -1;
1796 int shift = lowest_bit_set;
1798 if ((highest_bit_set != 63
1799 && lowest_bit_set != 0)
1800 || all_bits_between_are_set == 0)
1802 the_const =
1803 create_simple_focus_bits (high_bits, low_bits,
1804 lowest_bit_set, 0);
1806 else if (lowest_bit_set == 0)
1807 shift = -(63 - highest_bit_set);
1809 if (! SPARC_SIMM13_P (the_const))
1810 abort ();
1812 emit_insn (gen_safe_SET64 (temp, the_const));
1813 if (shift > 0)
1814 emit_insn (gen_rtx_SET (VOIDmode,
1815 op0,
1816 gen_rtx_ASHIFT (DImode,
1817 temp,
1818 GEN_INT (shift))));
1819 else if (shift < 0)
1820 emit_insn (gen_rtx_SET (VOIDmode,
1821 op0,
1822 gen_rtx_LSHIFTRT (DImode,
1823 temp,
1824 GEN_INT (-shift))));
1825 else
1826 abort ();
1827 return;
1830 /* Now a range of 22 or less bits set somewhere.
1831 * 1) sethi %hi(focus_bits), %reg
1832 * sllx %reg, shift, %reg
1833 * 2) sethi %hi(focus_bits), %reg
1834 * srlx %reg, shift, %reg
1836 if ((highest_bit_set - lowest_bit_set) < 21)
1838 unsigned HOST_WIDE_INT focus_bits =
1839 create_simple_focus_bits (high_bits, low_bits,
1840 lowest_bit_set, 10);
1842 if (! SPARC_SETHI_P (focus_bits))
1843 abort ();
1845 sparc_emit_set_safe_HIGH64 (temp, focus_bits);
1847 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
1848 if (lowest_bit_set < 10)
1849 emit_insn (gen_rtx_SET (VOIDmode,
1850 op0,
1851 gen_rtx_LSHIFTRT (DImode, temp,
1852 GEN_INT (10 - lowest_bit_set))));
1853 else if (lowest_bit_set > 10)
1854 emit_insn (gen_rtx_SET (VOIDmode,
1855 op0,
1856 gen_rtx_ASHIFT (DImode, temp,
1857 GEN_INT (lowest_bit_set - 10))));
1858 else
1859 abort ();
1860 return;
1863 /* 1) sethi %hi(low_bits), %reg
1864 * or %reg, %lo(low_bits), %reg
1865 * 2) sethi %hi(~low_bits), %reg
1866 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
1868 if (high_bits == 0
1869 || high_bits == 0xffffffff)
1871 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
1872 (high_bits == 0xffffffff));
1873 return;
1876 /* Now, try 3-insn sequences. */
1878 /* 1) sethi %hi(high_bits), %reg
1879 * or %reg, %lo(high_bits), %reg
1880 * sllx %reg, 32, %reg
1882 if (low_bits == 0)
1884 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
1885 return;
1888 /* We may be able to do something quick
1889 when the constant is negated, so try that. */
1890 if (const64_is_2insns ((~high_bits) & 0xffffffff,
1891 (~low_bits) & 0xfffffc00))
1893 /* NOTE: The trailing bits get XOR'd so we need the
1894 non-negated bits, not the negated ones. */
1895 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
1897 if ((((~high_bits) & 0xffffffff) == 0
1898 && ((~low_bits) & 0x80000000) == 0)
1899 || (((~high_bits) & 0xffffffff) == 0xffffffff
1900 && ((~low_bits) & 0x80000000) != 0))
1902 int fast_int = (~low_bits & 0xffffffff);
1904 if ((SPARC_SETHI_P (fast_int)
1905 && (~high_bits & 0xffffffff) == 0)
1906 || SPARC_SIMM13_P (fast_int))
1907 emit_insn (gen_safe_SET64 (temp, fast_int));
1908 else
1909 sparc_emit_set_const64 (temp, GEN_INT64 (fast_int));
1911 else
1913 rtx negated_const;
1914 #if HOST_BITS_PER_WIDE_INT == 64
1915 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
1916 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
1917 #else
1918 negated_const = gen_rtx_CONST_DOUBLE (DImode, const0_rtx,
1919 (~low_bits) & 0xfffffc00,
1920 (~high_bits) & 0xffffffff);
1921 #endif
1922 sparc_emit_set_const64 (temp, negated_const);
1925 /* If we are XOR'ing with -1, then we should emit a one's complement
1926 instead. This way the combiner will notice logical operations
1927 such as ANDN later on and substitute. */
1928 if (trailing_bits == 0x3ff)
1930 emit_insn (gen_rtx_SET (VOIDmode, op0,
1931 gen_rtx_NOT (DImode, temp)));
1933 else
1935 emit_insn (gen_rtx_SET (VOIDmode,
1936 op0,
1937 gen_safe_XOR64 (temp,
1938 (-0x400 | trailing_bits))));
1940 return;
1943 /* 1) sethi %hi(xxx), %reg
1944 * or %reg, %lo(xxx), %reg
1945 * sllx %reg, yyy, %reg
1947 * ??? This is just a generalized version of the low_bits==0
1948 * thing above, FIXME...
1950 if ((highest_bit_set - lowest_bit_set) < 32)
1952 unsigned HOST_WIDE_INT focus_bits =
1953 create_simple_focus_bits (high_bits, low_bits,
1954 lowest_bit_set, 0);
1956 /* We can't get here in this state. */
1957 if (highest_bit_set < 32
1958 || lowest_bit_set >= 32)
1959 abort ();
1961 /* So what we know is that the set bits straddle the
1962 middle of the 64-bit word. */
1963 sparc_emit_set_const64_quick2 (op0, temp,
1964 focus_bits, 0,
1965 lowest_bit_set);
1966 return;
1969 /* 1) sethi %hi(high_bits), %reg
1970 * or %reg, %lo(high_bits), %reg
1971 * sllx %reg, 32, %reg
1972 * or %reg, low_bits, %reg
1974 if (SPARC_SIMM13_P(low_bits)
1975 && ((int)low_bits > 0))
1977 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
1978 return;
1981 /* The easiest way when all else fails, is full decomposition. */
1982 #if 0
1983 printf ("sparc_emit_set_const64: Hard constant [%08lx%08lx] neg[%08lx%08lx]\n",
1984 high_bits, low_bits, ~high_bits, ~low_bits);
1985 #endif
1986 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
1989 /* X and Y are two things to compare using CODE. Emit the compare insn and
1990 return the rtx for the cc reg in the proper mode. */
1993 gen_compare_reg (code, x, y)
1994 enum rtx_code code;
1995 rtx x, y;
1997 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
1998 rtx cc_reg;
2000 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2001 fcc regs (cse can't tell they're really call clobbered regs and will
2002 remove a duplicate comparison even if there is an intervening function
2003 call - it will then try to reload the cc reg via an int reg which is why
2004 we need the movcc patterns). It is possible to provide the movcc
2005 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2006 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2007 to tell cse that CCFPE mode registers (even pseudos) are call
2008 clobbered. */
2010 /* ??? This is an experiment. Rather than making changes to cse which may
2011 or may not be easy/clean, we do our own cse. This is possible because
2012 we will generate hard registers. Cse knows they're call clobbered (it
2013 doesn't know the same thing about pseudos). If we guess wrong, no big
2014 deal, but if we win, great! */
2016 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2017 #if 1 /* experiment */
2019 int reg;
2020 /* We cycle through the registers to ensure they're all exercised. */
2021 static int next_fcc_reg = 0;
2022 /* Previous x,y for each fcc reg. */
2023 static rtx prev_args[4][2];
2025 /* Scan prev_args for x,y. */
2026 for (reg = 0; reg < 4; reg++)
2027 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2028 break;
2029 if (reg == 4)
2031 reg = next_fcc_reg;
2032 prev_args[reg][0] = x;
2033 prev_args[reg][1] = y;
2034 next_fcc_reg = (next_fcc_reg + 1) & 3;
2036 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2038 #else
2039 cc_reg = gen_reg_rtx (mode);
2040 #endif /* ! experiment */
2041 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2042 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2043 else
2044 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2046 emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
2047 gen_rtx_COMPARE (mode, x, y)));
2049 return cc_reg;
2052 /* This function is used for v9 only.
2053 CODE is the code for an Scc's comparison.
2054 OPERANDS[0] is the target of the Scc insn.
2055 OPERANDS[1] is the value we compare against const0_rtx (which hasn't
2056 been generated yet).
2058 This function is needed to turn
2060 (set (reg:SI 110)
2061 (gt (reg:CCX 100 %icc)
2062 (const_int 0)))
2063 into
2064 (set (reg:SI 110)
2065 (gt:DI (reg:CCX 100 %icc)
2066 (const_int 0)))
2068 IE: The instruction recognizer needs to see the mode of the comparison to
2069 find the right instruction. We could use "gt:DI" right in the
2070 define_expand, but leaving it out allows us to handle DI, SI, etc.
2072 We refer to the global sparc compare operands sparc_compare_op0 and
2073 sparc_compare_op1. */
2076 gen_v9_scc (compare_code, operands)
2077 enum rtx_code compare_code;
2078 register rtx *operands;
2080 rtx temp, op0, op1;
2082 if (! TARGET_ARCH64
2083 && (GET_MODE (sparc_compare_op0) == DImode
2084 || GET_MODE (operands[0]) == DImode))
2085 return 0;
2087 /* Handle the case where operands[0] == sparc_compare_op0.
2088 We "early clobber" the result. */
2089 if (REGNO (operands[0]) == REGNO (sparc_compare_op0))
2091 op0 = gen_reg_rtx (GET_MODE (sparc_compare_op0));
2092 emit_move_insn (op0, sparc_compare_op0);
2094 else
2095 op0 = sparc_compare_op0;
2096 /* For consistency in the following. */
2097 op1 = sparc_compare_op1;
2099 /* Try to use the movrCC insns. */
2100 if (TARGET_ARCH64
2101 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
2102 && op1 == const0_rtx
2103 && v9_regcmp_p (compare_code))
2105 /* Special case for op0 != 0. This can be done with one instruction if
2106 operands[0] == sparc_compare_op0. We don't assume they are equal
2107 now though. */
2109 if (compare_code == NE
2110 && GET_MODE (operands[0]) == DImode
2111 && GET_MODE (op0) == DImode)
2113 emit_insn (gen_rtx_SET (VOIDmode, operands[0], op0));
2114 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2115 gen_rtx_IF_THEN_ELSE (DImode,
2116 gen_rtx_fmt_ee (compare_code, DImode,
2117 op0, const0_rtx),
2118 const1_rtx,
2119 operands[0])));
2120 return 1;
2123 emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx));
2124 if (GET_MODE (op0) != DImode)
2126 temp = gen_reg_rtx (DImode);
2127 convert_move (temp, op0, 0);
2129 else
2130 temp = op0;
2131 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2132 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
2133 gen_rtx_fmt_ee (compare_code, DImode,
2134 temp, const0_rtx),
2135 const1_rtx,
2136 operands[0])));
2137 return 1;
2139 else
2141 operands[1] = gen_compare_reg (compare_code, op0, op1);
2143 switch (GET_MODE (operands[1]))
2145 case CCmode :
2146 case CCXmode :
2147 case CCFPEmode :
2148 case CCFPmode :
2149 break;
2150 default :
2151 abort ();
2153 emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx));
2154 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2155 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
2156 gen_rtx_fmt_ee (compare_code,
2157 GET_MODE (operands[1]),
2158 operands[1], const0_rtx),
2159 const1_rtx, operands[0])));
2160 return 1;
2164 /* Emit a conditional jump insn for the v9 architecture using comparison code
2165 CODE and jump target LABEL.
2166 This function exists to take advantage of the v9 brxx insns. */
2168 void
2169 emit_v9_brxx_insn (code, op0, label)
2170 enum rtx_code code;
2171 rtx op0, label;
2173 emit_jump_insn (gen_rtx_SET (VOIDmode,
2174 pc_rtx,
2175 gen_rtx_IF_THEN_ELSE (VOIDmode,
2176 gen_rtx_fmt_ee (code, GET_MODE (op0),
2177 op0, const0_rtx),
2178 gen_rtx_LABEL_REF (VOIDmode, label),
2179 pc_rtx)));
2182 /* Return nonzero if a return peephole merging return with
2183 setting of output register is ok. */
2185 leaf_return_peephole_ok ()
2187 return (actual_fsize == 0);
2190 /* Return nonzero if TRIAL can go into the function epilogue's
2191 delay slot. SLOT is the slot we are trying to fill. */
2194 eligible_for_epilogue_delay (trial, slot)
2195 rtx trial;
2196 int slot;
2198 rtx pat, src;
2200 if (slot >= 1)
2201 return 0;
2203 if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2204 return 0;
2206 if (get_attr_length (trial) != 1)
2207 return 0;
2209 /* If %g0 is live, there are lots of things we can't handle.
2210 Rather than trying to find them all now, let's punt and only
2211 optimize things as necessary. */
2212 if (TARGET_LIVE_G0)
2213 return 0;
2215 /* In the case of a true leaf function, anything can go into the delay slot.
2216 A delay slot only exists however if the frame size is zero, otherwise
2217 we will put an insn to adjust the stack after the return. */
2218 if (leaf_function)
2220 if (leaf_return_peephole_ok ())
2221 return ((get_attr_in_uncond_branch_delay (trial)
2222 == IN_BRANCH_DELAY_TRUE));
2223 return 0;
2226 /* If only trivial `restore' insns work, nothing can go in the
2227 delay slot. */
2228 else if (TARGET_BROKEN_SAVERESTORE)
2229 return 0;
2231 pat = PATTERN (trial);
2233 /* Otherwise, only operations which can be done in tandem with
2234 a `restore' insn can go into the delay slot. */
2235 if (GET_CODE (SET_DEST (pat)) != REG
2236 || REGNO (SET_DEST (pat)) >= 32
2237 || REGNO (SET_DEST (pat)) < 24)
2238 return 0;
2240 /* The set of insns matched here must agree precisely with the set of
2241 patterns paired with a RETURN in sparc.md. */
2243 src = SET_SRC (pat);
2245 /* This matches "*return_[qhs]i". */
2246 if (arith_operand (src, GET_MODE (src)))
2247 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
2249 /* This matches "*return_di". */
2250 else if (arith_double_operand (src, GET_MODE (src)))
2251 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2253 /* This matches "*return_sf_no_fpu". */
2254 else if (! TARGET_FPU && restore_operand (SET_DEST (pat), SFmode)
2255 && register_operand (src, SFmode))
2256 return 1;
2258 /* This matches "*return_addsi". */
2259 else if (GET_CODE (src) == PLUS
2260 && arith_operand (XEXP (src, 0), SImode)
2261 && arith_operand (XEXP (src, 1), SImode)
2262 && (register_operand (XEXP (src, 0), SImode)
2263 || register_operand (XEXP (src, 1), SImode)))
2264 return 1;
2266 /* This matches "*return_adddi". */
2267 else if (GET_CODE (src) == PLUS
2268 && arith_double_operand (XEXP (src, 0), DImode)
2269 && arith_double_operand (XEXP (src, 1), DImode)
2270 && (register_operand (XEXP (src, 0), DImode)
2271 || register_operand (XEXP (src, 1), DImode)))
2272 return 1;
2274 return 0;
2277 static int
2278 check_return_regs (x)
2279 rtx x;
2281 switch (GET_CODE (x))
2283 case REG:
2284 return IN_OR_GLOBAL_P (x);
2286 case CONST_INT:
2287 case CONST_DOUBLE:
2288 case CONST:
2289 case SYMBOL_REF:
2290 case LABEL_REF:
2291 return 1;
2293 case SET:
2294 case IOR:
2295 case AND:
2296 case XOR:
2297 case PLUS:
2298 case MINUS:
2299 if (check_return_regs (XEXP (x, 1)) == 0)
2300 return 0;
2301 case NOT:
2302 case NEG:
2303 case MEM:
2304 return check_return_regs (XEXP (x, 0));
2306 default:
2307 return 0;
2312 /* Return 1 if TRIAL references only in and global registers. */
2314 eligible_for_return_delay (trial)
2315 rtx trial;
2317 if (GET_CODE (PATTERN (trial)) != SET)
2318 return 0;
2320 return check_return_regs (PATTERN (trial));
2324 short_branch (uid1, uid2)
2325 int uid1, uid2;
2327 unsigned int delta = insn_addresses[uid1] - insn_addresses[uid2];
2328 if (delta + 1024 < 2048)
2329 return 1;
2330 /* warning ("long branch, distance %d", delta); */
2331 return 0;
2334 /* Return non-zero if REG is not used after INSN.
2335 We assume REG is a reload reg, and therefore does
2336 not live past labels or calls or jumps. */
2338 reg_unused_after (reg, insn)
2339 rtx reg;
2340 rtx insn;
2342 enum rtx_code code, prev_code = UNKNOWN;
2344 while ((insn = NEXT_INSN (insn)))
2346 if (prev_code == CALL_INSN && call_used_regs[REGNO (reg)])
2347 return 1;
2349 code = GET_CODE (insn);
2350 if (GET_CODE (insn) == CODE_LABEL)
2351 return 1;
2353 if (GET_RTX_CLASS (code) == 'i')
2355 rtx set = single_set (insn);
2356 int in_src = set && reg_overlap_mentioned_p (reg, SET_SRC (set));
2357 if (set && in_src)
2358 return 0;
2359 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
2360 return 1;
2361 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
2362 return 0;
2364 prev_code = code;
2366 return 1;
2369 /* The table we use to reference PIC data. */
2370 static rtx global_offset_table;
2372 /* The function we use to get at it. */
2373 static rtx get_pc_symbol;
2374 static char get_pc_symbol_name[256];
2376 /* Ensure that we are not using patterns that are not OK with PIC. */
2379 check_pic (i)
2380 int i;
2382 switch (flag_pic)
2384 case 1:
2385 if (GET_CODE (recog_operand[i]) == SYMBOL_REF
2386 || (GET_CODE (recog_operand[i]) == CONST
2387 && ! (GET_CODE (XEXP (recog_operand[i], 0)) == MINUS
2388 && (XEXP (XEXP (recog_operand[i], 0), 0)
2389 == global_offset_table)
2390 && (GET_CODE (XEXP (XEXP (recog_operand[i], 0), 1))
2391 == CONST))))
2392 abort ();
2393 case 2:
2394 default:
2395 return 1;
2399 /* Return true if X is an address which needs a temporary register when
2400 reloaded while generating PIC code. */
2403 pic_address_needs_scratch (x)
2404 rtx x;
2406 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
2407 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
2408 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
2409 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2410 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
2411 return 1;
2413 return 0;
2416 /* Legitimize PIC addresses. If the address is already position-independent,
2417 we return ORIG. Newly generated position-independent addresses go into a
2418 reg. This is REG if non zero, otherwise we allocate register(s) as
2419 necessary. */
2422 legitimize_pic_address (orig, mode, reg)
2423 rtx orig;
2424 enum machine_mode mode ATTRIBUTE_UNUSED;
2425 rtx reg;
2427 if (GET_CODE (orig) == SYMBOL_REF)
2429 rtx pic_ref, address;
2430 rtx insn;
2432 if (reg == 0)
2434 if (reload_in_progress || reload_completed)
2435 abort ();
2436 else
2437 reg = gen_reg_rtx (Pmode);
2440 if (flag_pic == 2)
2442 /* If not during reload, allocate another temp reg here for loading
2443 in the address, so that these instructions can be optimized
2444 properly. */
2445 rtx temp_reg = ((reload_in_progress || reload_completed)
2446 ? reg : gen_reg_rtx (Pmode));
2448 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
2449 won't get confused into thinking that these two instructions
2450 are loading in the true address of the symbol. If in the
2451 future a PIC rtx exists, that should be used instead. */
2452 if (Pmode == SImode)
2454 emit_insn (gen_movsi_high_pic (temp_reg, orig));
2455 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
2457 else
2459 emit_insn (gen_movdi_high_pic (temp_reg, orig));
2460 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
2462 address = temp_reg;
2464 else
2465 address = orig;
2467 pic_ref = gen_rtx_MEM (Pmode,
2468 gen_rtx_PLUS (Pmode,
2469 pic_offset_table_rtx, address));
2470 current_function_uses_pic_offset_table = 1;
2471 RTX_UNCHANGING_P (pic_ref) = 1;
2472 insn = emit_move_insn (reg, pic_ref);
2473 /* Put a REG_EQUAL note on this insn, so that it can be optimized
2474 by loop. */
2475 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig,
2476 REG_NOTES (insn));
2477 return reg;
2479 else if (GET_CODE (orig) == CONST)
2481 rtx base, offset;
2483 if (GET_CODE (XEXP (orig, 0)) == PLUS
2484 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
2485 return orig;
2487 if (reg == 0)
2489 if (reload_in_progress || reload_completed)
2490 abort ();
2491 else
2492 reg = gen_reg_rtx (Pmode);
2495 if (GET_CODE (XEXP (orig, 0)) == PLUS)
2497 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
2498 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
2499 base == reg ? 0 : reg);
2501 else
2502 abort ();
2504 if (GET_CODE (offset) == CONST_INT)
2506 if (SMALL_INT (offset))
2507 return plus_constant_for_output (base, INTVAL (offset));
2508 else if (! reload_in_progress && ! reload_completed)
2509 offset = force_reg (Pmode, offset);
2510 else
2511 /* If we reach here, then something is seriously wrong. */
2512 abort ();
2514 return gen_rtx_PLUS (Pmode, base, offset);
2516 else if (GET_CODE (orig) == LABEL_REF)
2517 /* ??? Why do we do this? */
2518 /* Now movsi_pic_label_ref uses it, but we ought to be checking that
2519 the register is live instead, in case it is eliminated. */
2520 current_function_uses_pic_offset_table = 1;
2522 return orig;
2525 /* Return the RTX for insns to set the PIC register. */
2527 static rtx
2528 pic_setup_code ()
2530 rtx seq;
2532 start_sequence ();
2533 emit_insn (gen_get_pc (pic_offset_table_rtx, global_offset_table,
2534 get_pc_symbol));
2535 seq = gen_sequence ();
2536 end_sequence ();
2538 return seq;
2541 /* Emit special PIC prologues and epilogues. */
2543 void
2544 finalize_pic ()
2546 /* Labels to get the PC in the prologue of this function. */
2547 int orig_flag_pic = flag_pic;
2548 rtx insn;
2550 if (current_function_uses_pic_offset_table == 0)
2551 return;
2553 if (! flag_pic)
2554 abort ();
2556 /* If we havn't emitted the special get_pc helper function, do so now. */
2557 if (get_pc_symbol_name[0] == 0)
2559 int align;
2561 ASM_GENERATE_INTERNAL_LABEL (get_pc_symbol_name, "LGETPC", 0);
2562 text_section ();
2564 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
2565 if (align > 0)
2566 ASM_OUTPUT_ALIGN (asm_out_file, align);
2567 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "LGETPC", 0);
2568 fputs ("\tretl\n\tadd %o7,%l7,%l7\n", asm_out_file);
2571 /* Initialize every time through, since we can't easily
2572 know this to be permanent. */
2573 global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
2574 get_pc_symbol = gen_rtx_SYMBOL_REF (Pmode, get_pc_symbol_name);
2575 flag_pic = 0;
2577 emit_insn_after (pic_setup_code (), get_insns ());
2579 /* Insert the code in each nonlocal goto receiver.
2580 If you make changes here or to the nonlocal_goto_receiver
2581 pattern, make sure the unspec_volatile numbers still
2582 match. */
2583 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2584 if (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
2585 && XINT (PATTERN (insn), 1) == 5)
2586 emit_insn_after (pic_setup_code (), insn);
2588 flag_pic = orig_flag_pic;
2590 /* Need to emit this whether or not we obey regdecls,
2591 since setjmp/longjmp can cause life info to screw up.
2592 ??? In the case where we don't obey regdecls, this is not sufficient
2593 since we may not fall out the bottom. */
2594 emit_insn (gen_rtx_USE (VOIDmode, pic_offset_table_rtx));
2597 /* Return 1 if RTX is a MEM which is known to be aligned to at
2598 least an 8 byte boundary. */
2601 mem_min_alignment (mem, desired)
2602 rtx mem;
2603 int desired;
2605 rtx addr, base, offset;
2607 /* If it's not a MEM we can't accept it. */
2608 if (GET_CODE (mem) != MEM)
2609 return 0;
2611 addr = XEXP (mem, 0);
2612 base = offset = NULL_RTX;
2613 if (GET_CODE (addr) == PLUS)
2615 if (GET_CODE (XEXP (addr, 0)) == REG)
2617 base = XEXP (addr, 0);
2619 /* What we are saying here is that if the base
2620 REG is aligned properly, the compiler will make
2621 sure any REG based index upon it will be so
2622 as well. */
2623 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
2624 offset = XEXP (addr, 1);
2625 else
2626 offset = const0_rtx;
2629 else if (GET_CODE (addr) == REG)
2631 base = addr;
2632 offset = const0_rtx;
2635 if (base != NULL_RTX)
2637 int regno = REGNO (base);
2639 if (regno != FRAME_POINTER_REGNUM
2640 && regno != STACK_POINTER_REGNUM)
2642 /* Check if the compiler has recorded some information
2643 about the alignment of the base REG. If reload has
2644 completed, we already matched with proper alignments. */
2645 if (((regno_pointer_align != NULL
2646 && REGNO_POINTER_ALIGN (regno) >= desired)
2647 || reload_completed)
2648 && ((INTVAL (offset) & (desired - 1)) == 0))
2649 return 1;
2651 else
2653 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
2654 return 1;
2657 else if (! TARGET_UNALIGNED_DOUBLES
2658 || CONSTANT_P (addr)
2659 || GET_CODE (addr) == LO_SUM)
2661 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
2662 is true, in which case we can only assume that an access is aligned if
2663 it is to a constant address, or the address involves a LO_SUM. */
2664 return 1;
2667 /* An obviously unaligned address. */
2668 return 0;
2672 /* Vectors to keep interesting information about registers where it can easily
2673 be got. We use to use the actual mode value as the bit number, but there
2674 are more than 32 modes now. Instead we use two tables: one indexed by
2675 hard register number, and one indexed by mode. */
2677 /* The purpose of sparc_mode_class is to shrink the range of modes so that
2678 they all fit (as bit numbers) in a 32 bit word (again). Each real mode is
2679 mapped into one sparc_mode_class mode. */
2681 enum sparc_mode_class {
2682 S_MODE, D_MODE, T_MODE, O_MODE,
2683 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
2684 CC_MODE, CCFP_MODE
2687 /* Modes for single-word and smaller quantities. */
2688 #define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
2690 /* Modes for double-word and smaller quantities. */
2691 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
2693 /* Modes for quad-word and smaller quantities. */
2694 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
2696 /* Modes for single-float quantities. We must allow any single word or
2697 smaller quantity. This is because the fix/float conversion instructions
2698 take integer inputs/outputs from the float registers. */
2699 #define SF_MODES (S_MODES)
2701 /* Modes for double-float and smaller quantities. */
2702 #define DF_MODES (S_MODES | D_MODES)
2704 #define DF_MODES64 DF_MODES
2706 /* Modes for double-float only quantities. */
2707 #define DF_ONLY_MODES ((1 << (int) DF_MODE) | (1 << (int) D_MODE))
2709 /* Modes for double-float and larger quantities. */
2710 #define DF_UP_MODES (DF_ONLY_MODES | TF_ONLY_MODES)
2712 /* Modes for quad-float only quantities. */
2713 #define TF_ONLY_MODES (1 << (int) TF_MODE)
2715 /* Modes for quad-float and smaller quantities. */
2716 #define TF_MODES (DF_MODES | TF_ONLY_MODES)
2718 #define TF_MODES64 (DF_MODES64 | TF_ONLY_MODES)
2720 /* Modes for condition codes. */
2721 #define CC_MODES (1 << (int) CC_MODE)
2722 #define CCFP_MODES (1 << (int) CCFP_MODE)
2724 /* Value is 1 if register/mode pair is acceptable on sparc.
2725 The funny mixture of D and T modes is because integer operations
2726 do not specially operate on tetra quantities, so non-quad-aligned
2727 registers can hold quadword quantities (except %o4 and %i4 because
2728 they cross fixed registers). */
2730 /* This points to either the 32 bit or the 64 bit version. */
2731 int *hard_regno_mode_classes;
2733 static int hard_32bit_mode_classes[] = {
2734 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
2735 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
2736 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
2737 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
2739 TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2740 TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2741 TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2742 TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2744 /* FP regs f32 to f63. Only the even numbered registers actually exist,
2745 and none can hold SFmode/SImode values. */
2746 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2747 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2748 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2749 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2751 /* %fcc[0123] */
2752 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
2754 /* %icc */
2755 CC_MODES
2758 static int hard_64bit_mode_classes[] = {
2759 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2760 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2761 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2762 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2764 TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2765 TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2766 TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2767 TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2769 /* FP regs f32 to f63. Only the even numbered registers actually exist,
2770 and none can hold SFmode/SImode values. */
2771 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2772 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2773 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2774 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2776 /* %fcc[0123] */
2777 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
2779 /* %icc */
2780 CC_MODES
2783 int sparc_mode_class [NUM_MACHINE_MODES];
2785 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
2787 static void
2788 sparc_init_modes ()
2790 int i;
2792 for (i = 0; i < NUM_MACHINE_MODES; i++)
2794 switch (GET_MODE_CLASS (i))
2796 case MODE_INT:
2797 case MODE_PARTIAL_INT:
2798 case MODE_COMPLEX_INT:
2799 if (GET_MODE_SIZE (i) <= 4)
2800 sparc_mode_class[i] = 1 << (int) S_MODE;
2801 else if (GET_MODE_SIZE (i) == 8)
2802 sparc_mode_class[i] = 1 << (int) D_MODE;
2803 else if (GET_MODE_SIZE (i) == 16)
2804 sparc_mode_class[i] = 1 << (int) T_MODE;
2805 else if (GET_MODE_SIZE (i) == 32)
2806 sparc_mode_class[i] = 1 << (int) O_MODE;
2807 else
2808 sparc_mode_class[i] = 0;
2809 break;
2810 case MODE_FLOAT:
2811 case MODE_COMPLEX_FLOAT:
2812 if (GET_MODE_SIZE (i) <= 4)
2813 sparc_mode_class[i] = 1 << (int) SF_MODE;
2814 else if (GET_MODE_SIZE (i) == 8)
2815 sparc_mode_class[i] = 1 << (int) DF_MODE;
2816 else if (GET_MODE_SIZE (i) == 16)
2817 sparc_mode_class[i] = 1 << (int) TF_MODE;
2818 else if (GET_MODE_SIZE (i) == 32)
2819 sparc_mode_class[i] = 1 << (int) OF_MODE;
2820 else
2821 sparc_mode_class[i] = 0;
2822 break;
2823 case MODE_CC:
2824 default:
2825 /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so
2826 we must explicitly check for them here. */
2827 if (i == (int) CCFPmode || i == (int) CCFPEmode)
2828 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
2829 else if (i == (int) CCmode || i == (int) CC_NOOVmode
2830 || i == (int) CCXmode || i == (int) CCX_NOOVmode)
2831 sparc_mode_class[i] = 1 << (int) CC_MODE;
2832 else
2833 sparc_mode_class[i] = 0;
2834 break;
2838 if (TARGET_ARCH64)
2839 hard_regno_mode_classes = hard_64bit_mode_classes;
2840 else
2841 hard_regno_mode_classes = hard_32bit_mode_classes;
2843 /* Initialize the array used by REGNO_REG_CLASS. */
2844 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
2846 if (i < 16 && TARGET_V8PLUS)
2847 sparc_regno_reg_class[i] = I64_REGS;
2848 else if (i < 32)
2849 sparc_regno_reg_class[i] = GENERAL_REGS;
2850 else if (i < 64)
2851 sparc_regno_reg_class[i] = FP_REGS;
2852 else if (i < 96)
2853 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
2854 else if (i < 100)
2855 sparc_regno_reg_class[i] = FPCC_REGS;
2856 else
2857 sparc_regno_reg_class[i] = NO_REGS;
2861 /* Save non call used registers from LOW to HIGH at BASE+OFFSET.
2862 N_REGS is the number of 4-byte regs saved thus far. This applies even to
2863 v9 int regs as it simplifies the code. */
2865 static int
2866 save_regs (file, low, high, base, offset, n_regs, real_offset)
2867 FILE *file;
2868 int low, high;
2869 char *base;
2870 int offset;
2871 int n_regs;
2872 int real_offset;
2874 int i;
2876 if (TARGET_ARCH64 && high <= 32)
2878 for (i = low; i < high; i++)
2880 if (regs_ever_live[i] && ! call_used_regs[i])
2882 fprintf (file, "\tstx\t%s, [%s+%d]\n",
2883 reg_names[i], base, offset + 4 * n_regs);
2884 if (dwarf2out_do_frame ())
2885 dwarf2out_reg_save ("", i, real_offset + 4 * n_regs);
2886 n_regs += 2;
2890 else
2892 for (i = low; i < high; i += 2)
2894 if (regs_ever_live[i] && ! call_used_regs[i])
2896 if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2898 fprintf (file, "\tstd\t%s, [%s+%d]\n",
2899 reg_names[i], base, offset + 4 * n_regs);
2900 if (dwarf2out_do_frame ())
2902 char *l = dwarf2out_cfi_label ();
2903 dwarf2out_reg_save (l, i, real_offset + 4 * n_regs);
2904 dwarf2out_reg_save (l, i+1, real_offset + 4 * n_regs + 4);
2906 n_regs += 2;
2908 else
2910 fprintf (file, "\tst\t%s, [%s+%d]\n",
2911 reg_names[i], base, offset + 4 * n_regs);
2912 if (dwarf2out_do_frame ())
2913 dwarf2out_reg_save ("", i, real_offset + 4 * n_regs);
2914 n_regs += 2;
2917 else
2919 if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2921 fprintf (file, "\tst\t%s, [%s+%d]\n",
2922 reg_names[i+1], base, offset + 4 * n_regs + 4);
2923 if (dwarf2out_do_frame ())
2924 dwarf2out_reg_save ("", i + 1, real_offset + 4 * n_regs + 4);
2925 n_regs += 2;
2930 return n_regs;
2933 /* Restore non call used registers from LOW to HIGH at BASE+OFFSET.
2935 N_REGS is the number of 4-byte regs saved thus far. This applies even to
2936 v9 int regs as it simplifies the code. */
2938 static int
2939 restore_regs (file, low, high, base, offset, n_regs)
2940 FILE *file;
2941 int low, high;
2942 char *base;
2943 int offset;
2944 int n_regs;
2946 int i;
2948 if (TARGET_ARCH64 && high <= 32)
2950 for (i = low; i < high; i++)
2952 if (regs_ever_live[i] && ! call_used_regs[i])
2953 fprintf (file, "\tldx\t[%s+%d], %s\n",
2954 base, offset + 4 * n_regs, reg_names[i]),
2955 n_regs += 2;
2958 else
2960 for (i = low; i < high; i += 2)
2962 if (regs_ever_live[i] && ! call_used_regs[i])
2963 if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2964 fprintf (file, "\tldd\t[%s+%d], %s\n",
2965 base, offset + 4 * n_regs, reg_names[i]),
2966 n_regs += 2;
2967 else
2968 fprintf (file, "\tld\t[%s+%d],%s\n",
2969 base, offset + 4 * n_regs, reg_names[i]),
2970 n_regs += 2;
2971 else if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2972 fprintf (file, "\tld\t[%s+%d],%s\n",
2973 base, offset + 4 * n_regs + 4, reg_names[i+1]),
2974 n_regs += 2;
2977 return n_regs;
2980 /* Static variables we want to share between prologue and epilogue. */
2982 /* Number of live general or floating point registers needed to be saved
2983 (as 4-byte quantities). This is only done if TARGET_EPILOGUE. */
2984 static int num_gfregs;
2986 /* Compute the frame size required by the function. This function is called
2987 during the reload pass and also by output_function_prologue(). */
2990 compute_frame_size (size, leaf_function)
2991 int size;
2992 int leaf_function;
2994 int n_regs = 0, i;
2995 int outgoing_args_size = (current_function_outgoing_args_size
2996 + REG_PARM_STACK_SPACE (current_function_decl));
2998 if (TARGET_EPILOGUE)
3000 /* N_REGS is the number of 4-byte regs saved thus far. This applies
3001 even to v9 int regs to be consistent with save_regs/restore_regs. */
3003 if (TARGET_ARCH64)
3005 for (i = 0; i < 8; i++)
3006 if (regs_ever_live[i] && ! call_used_regs[i])
3007 n_regs += 2;
3009 else
3011 for (i = 0; i < 8; i += 2)
3012 if ((regs_ever_live[i] && ! call_used_regs[i])
3013 || (regs_ever_live[i+1] && ! call_used_regs[i+1]))
3014 n_regs += 2;
3017 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
3018 if ((regs_ever_live[i] && ! call_used_regs[i])
3019 || (regs_ever_live[i+1] && ! call_used_regs[i+1]))
3020 n_regs += 2;
3023 /* Set up values for use in `function_epilogue'. */
3024 num_gfregs = n_regs;
3026 if (leaf_function && n_regs == 0
3027 && size == 0 && current_function_outgoing_args_size == 0)
3029 actual_fsize = apparent_fsize = 0;
3031 else
3033 /* We subtract STARTING_FRAME_OFFSET, remember it's negative.
3034 The stack bias (if any) is taken out to undo its effects. */
3035 apparent_fsize = (size - STARTING_FRAME_OFFSET + SPARC_STACK_BIAS + 7) & -8;
3036 apparent_fsize += n_regs * 4;
3037 actual_fsize = apparent_fsize + ((outgoing_args_size + 7) & -8);
3040 /* Make sure nothing can clobber our register windows.
3041 If a SAVE must be done, or there is a stack-local variable,
3042 the register window area must be allocated.
3043 ??? For v8 we apparently need an additional 8 bytes of reserved space. */
3044 if (leaf_function == 0 || size > 0)
3045 actual_fsize += (16 * UNITS_PER_WORD) + (TARGET_ARCH64 ? 0 : 8);
3047 return SPARC_STACK_ALIGN (actual_fsize);
3050 /* Build a (32 bit) big number in a register. */
3051 /* ??? We may be able to use the set macro here too. */
3053 static void
3054 build_big_number (file, num, reg)
3055 FILE *file;
3056 int num;
3057 char *reg;
3059 if (num >= 0 || ! TARGET_ARCH64)
3061 fprintf (file, "\tsethi\t%%hi(%d), %s\n", num, reg);
3062 if ((num & 0x3ff) != 0)
3063 fprintf (file, "\tor\t%s, %%lo(%d), %s\n", reg, num, reg);
3065 else /* num < 0 && TARGET_ARCH64 */
3067 /* Sethi does not sign extend, so we must use a little trickery
3068 to use it for negative numbers. Invert the constant before
3069 loading it in, then use xor immediate to invert the loaded bits
3070 (along with the upper 32 bits) to the desired constant. This
3071 works because the sethi and immediate fields overlap. */
3072 int asize = num;
3073 int inv = ~asize;
3074 int low = -0x400 + (asize & 0x3FF);
3076 fprintf (file, "\tsethi\t%%hi(%d), %s\n\txor\t%s, %d, %s\n",
3077 inv, reg, reg, low, reg);
3081 /* Output code for the function prologue. */
3083 void
3084 output_function_prologue (file, size, leaf_function)
3085 FILE *file;
3086 int size;
3087 int leaf_function;
3089 /* Need to use actual_fsize, since we are also allocating
3090 space for our callee (and our own register save area). */
3091 actual_fsize = compute_frame_size (size, leaf_function);
3093 if (leaf_function)
3095 frame_base_name = "%sp";
3096 frame_base_offset = actual_fsize + SPARC_STACK_BIAS;
3098 else
3100 frame_base_name = "%fp";
3101 frame_base_offset = SPARC_STACK_BIAS;
3104 /* This is only for the human reader. */
3105 fprintf (file, "\t%s#PROLOGUE# 0\n", ASM_COMMENT_START);
3107 if (actual_fsize == 0)
3108 /* do nothing. */ ;
3109 else if (! leaf_function && ! TARGET_BROKEN_SAVERESTORE)
3111 if (actual_fsize <= 4096)
3112 fprintf (file, "\tsave\t%%sp, -%d, %%sp\n", actual_fsize);
3113 else if (actual_fsize <= 8192)
3115 fprintf (file, "\tsave\t%%sp, -4096, %%sp\n");
3116 fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize - 4096);
3118 else
3120 build_big_number (file, -actual_fsize, "%g1");
3121 fprintf (file, "\tsave\t%%sp, %%g1, %%sp\n");
3124 else if (! leaf_function && TARGET_BROKEN_SAVERESTORE)
3126 /* We assume the environment will properly handle or otherwise avoid
3127 trouble associated with an interrupt occurring after the `save' or
3128 trap occurring during it. */
3129 fprintf (file, "\tsave\n");
3131 if (actual_fsize <= 4096)
3132 fprintf (file, "\tadd\t%%fp, -%d, %%sp\n", actual_fsize);
3133 else if (actual_fsize <= 8192)
3135 fprintf (file, "\tadd\t%%fp, -4096, %%sp\n");
3136 fprintf (file, "\tadd\t%%fp, -%d, %%sp\n", actual_fsize - 4096);
3138 else
3140 build_big_number (file, -actual_fsize, "%g1");
3141 fprintf (file, "\tadd\t%%fp, %%g1, %%sp\n");
3144 else /* leaf function */
3146 if (actual_fsize <= 4096)
3147 fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize);
3148 else if (actual_fsize <= 8192)
3150 fprintf (file, "\tadd\t%%sp, -4096, %%sp\n");
3151 fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize - 4096);
3153 else
3155 build_big_number (file, -actual_fsize, "%g1");
3156 fprintf (file, "\tadd\t%%sp, %%g1, %%sp\n");
3160 if (dwarf2out_do_frame () && actual_fsize)
3162 char *label = dwarf2out_cfi_label ();
3164 /* The canonical frame address refers to the top of the frame. */
3165 dwarf2out_def_cfa (label, (leaf_function ? STACK_POINTER_REGNUM
3166 : FRAME_POINTER_REGNUM),
3167 frame_base_offset);
3169 if (! leaf_function)
3171 /* Note the register window save. This tells the unwinder that
3172 it needs to restore the window registers from the previous
3173 frame's window save area at 0(cfa). */
3174 dwarf2out_window_save (label);
3176 /* The return address (-8) is now in %i7. */
3177 dwarf2out_return_reg (label, 31);
3181 /* If doing anything with PIC, do it now. */
3182 if (! flag_pic)
3183 fprintf (file, "\t%s#PROLOGUE# 1\n", ASM_COMMENT_START);
3185 /* Call saved registers are saved just above the outgoing argument area. */
3186 if (num_gfregs)
3188 int offset, real_offset, n_regs;
3189 char *base;
3191 real_offset = -apparent_fsize;
3192 offset = -apparent_fsize + frame_base_offset;
3193 if (offset < -4096 || offset + num_gfregs * 4 > 4096)
3195 /* ??? This might be optimized a little as %g1 might already have a
3196 value close enough that a single add insn will do. */
3197 /* ??? Although, all of this is probably only a temporary fix
3198 because if %g1 can hold a function result, then
3199 output_function_epilogue will lose (the result will get
3200 clobbered). */
3201 build_big_number (file, offset, "%g1");
3202 fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
3203 base = "%g1";
3204 offset = 0;
3206 else
3208 base = frame_base_name;
3211 n_regs = 0;
3212 if (TARGET_EPILOGUE && ! leaf_function)
3213 /* ??? Originally saved regs 0-15 here. */
3214 n_regs = save_regs (file, 0, 8, base, offset, 0, real_offset);
3215 else if (leaf_function)
3216 /* ??? Originally saved regs 0-31 here. */
3217 n_regs = save_regs (file, 0, 8, base, offset, 0, real_offset);
3218 if (TARGET_EPILOGUE)
3219 save_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs,
3220 real_offset);
3223 leaf_label = 0;
3224 if (leaf_function && actual_fsize != 0)
3226 /* warning ("leaf procedure with frame size %d", actual_fsize); */
3227 if (! TARGET_EPILOGUE)
3228 leaf_label = gen_label_rtx ();
3232 /* Output code for the function epilogue. */
3234 void
3235 output_function_epilogue (file, size, leaf_function)
3236 FILE *file;
3237 int size ATTRIBUTE_UNUSED;
3238 int leaf_function;
3240 char *ret;
3242 if (leaf_label)
3244 emit_label_after (leaf_label, get_last_insn ());
3245 final_scan_insn (get_last_insn (), file, 0, 0, 1);
3248 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
3249 else if (profile_block_flag == 2)
3251 FUNCTION_BLOCK_PROFILER_EXIT(file);
3253 #endif
3255 else if (current_function_epilogue_delay_list == 0)
3257 /* If code does not drop into the epilogue, we need
3258 do nothing except output pending case vectors. */
3259 rtx insn = get_last_insn ();
3260 if (GET_CODE (insn) == NOTE)
3261 insn = prev_nonnote_insn (insn);
3262 if (insn && GET_CODE (insn) == BARRIER)
3263 goto output_vectors;
3266 /* Restore any call saved registers. */
3267 if (num_gfregs)
3269 int offset, n_regs;
3270 char *base;
3272 offset = -apparent_fsize + frame_base_offset;
3273 if (offset < -4096 || offset + num_gfregs * 4 > 4096 - 8 /*double*/)
3275 build_big_number (file, offset, "%g1");
3276 fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
3277 base = "%g1";
3278 offset = 0;
3280 else
3282 base = frame_base_name;
3285 n_regs = 0;
3286 if (TARGET_EPILOGUE && ! leaf_function)
3287 /* ??? Originally saved regs 0-15 here. */
3288 n_regs = restore_regs (file, 0, 8, base, offset, 0);
3289 else if (leaf_function)
3290 /* ??? Originally saved regs 0-31 here. */
3291 n_regs = restore_regs (file, 0, 8, base, offset, 0);
3292 if (TARGET_EPILOGUE)
3293 restore_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs);
3296 /* Work out how to skip the caller's unimp instruction if required. */
3297 if (leaf_function)
3298 ret = (SKIP_CALLERS_UNIMP_P ? "jmp\t%o7+12" : "retl");
3299 else
3300 ret = (SKIP_CALLERS_UNIMP_P ? "jmp\t%i7+12" : "ret");
3302 if (TARGET_EPILOGUE || leaf_label)
3304 int old_target_epilogue = TARGET_EPILOGUE;
3305 target_flags &= ~old_target_epilogue;
3307 if (! leaf_function)
3309 /* If we wound up with things in our delay slot, flush them here. */
3310 if (current_function_epilogue_delay_list)
3312 rtx insn = emit_jump_insn_after (gen_rtx_RETURN (VOIDmode),
3313 get_last_insn ());
3314 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode,
3315 gen_rtvec (2,
3316 PATTERN (XEXP (current_function_epilogue_delay_list, 0)),
3317 PATTERN (insn)));
3318 final_scan_insn (insn, file, 1, 0, 1);
3320 else if (TARGET_V9 && ! SKIP_CALLERS_UNIMP_P)
3321 fputs ("\treturn\t%i7+8\n\tnop\n", file);
3322 else
3323 fprintf (file, "\t%s\n\trestore\n", ret);
3325 /* All of the following cases are for leaf functions. */
3326 else if (current_function_epilogue_delay_list)
3328 /* eligible_for_epilogue_delay_slot ensures that if this is a
3329 leaf function, then we will only have insn in the delay slot
3330 if the frame size is zero, thus no adjust for the stack is
3331 needed here. */
3332 if (actual_fsize != 0)
3333 abort ();
3334 fprintf (file, "\t%s\n", ret);
3335 final_scan_insn (XEXP (current_function_epilogue_delay_list, 0),
3336 file, 1, 0, 1);
3338 /* Output 'nop' instead of 'sub %sp,-0,%sp' when no frame, so as to
3339 avoid generating confusing assembly language output. */
3340 else if (actual_fsize == 0)
3341 fprintf (file, "\t%s\n\tnop\n", ret);
3342 else if (actual_fsize <= 4096)
3343 fprintf (file, "\t%s\n\tsub\t%%sp, -%d, %%sp\n", ret, actual_fsize);
3344 else if (actual_fsize <= 8192)
3345 fprintf (file, "\tsub\t%%sp, -4096, %%sp\n\t%s\n\tsub\t%%sp, -%d, %%sp\n",
3346 ret, actual_fsize - 4096);
3347 else if ((actual_fsize & 0x3ff) == 0)
3348 fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
3349 actual_fsize, ret);
3350 else
3351 fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\tor\t%%g1, %%lo(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
3352 actual_fsize, actual_fsize, ret);
3353 target_flags |= old_target_epilogue;
3356 output_vectors:
3357 sparc_output_deferred_case_vectors ();
3360 /* Functions for handling argument passing.
3362 For v8 the first six args are normally in registers and the rest are
3363 pushed. Any arg that starts within the first 6 words is at least
3364 partially passed in a register unless its data type forbids.
3366 For v9, the argument registers are laid out as an array of 16 elements
3367 and arguments are added sequentially. The first 6 int args and up to the
3368 first 16 fp args (depending on size) are passed in regs.
3370 Slot Stack Integral Float Float in structure Double Long Double
3371 ---- ----- -------- ----- ------------------ ------ -----------
3372 15 [SP+248] %f31 %f30,%f31 %d30
3373 14 [SP+240] %f29 %f28,%f29 %d28 %q28
3374 13 [SP+232] %f27 %f26,%f27 %d26
3375 12 [SP+224] %f25 %f24,%f25 %d24 %q24
3376 11 [SP+216] %f23 %f22,%f23 %d22
3377 10 [SP+208] %f21 %f20,%f21 %d20 %q20
3378 9 [SP+200] %f19 %f18,%f19 %d18
3379 8 [SP+192] %f17 %f16,%f17 %d16 %q16
3380 7 [SP+184] %f15 %f14,%f15 %d14
3381 6 [SP+176] %f13 %f12,%f13 %d12 %q12
3382 5 [SP+168] %o5 %f11 %f10,%f11 %d10
3383 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
3384 3 [SP+152] %o3 %f7 %f6,%f7 %d6
3385 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
3386 1 [SP+136] %o1 %f3 %f2,%f3 %d2
3387 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
3389 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
3391 Integral arguments are always passed as 64 bit quantities appropriately
3392 extended.
3394 Passing of floating point values is handled as follows.
3395 If a prototype is in scope:
3396 If the value is in a named argument (i.e. not a stdarg function or a
3397 value not part of the `...') then the value is passed in the appropriate
3398 fp reg.
3399 If the value is part of the `...' and is passed in one of the first 6
3400 slots then the value is passed in the appropriate int reg.
3401 If the value is part of the `...' and is not passed in one of the first 6
3402 slots then the value is passed in memory.
3403 If a prototype is not in scope:
3404 If the value is one of the first 6 arguments the value is passed in the
3405 appropriate integer reg and the appropriate fp reg.
3406 If the value is not one of the first 6 arguments the value is passed in
3407 the appropriate fp reg and in memory.
3410 /* Maximum number of int regs for args. */
3411 #define SPARC_INT_ARG_MAX 6
3412 /* Maximum number of fp regs for args. */
3413 #define SPARC_FP_ARG_MAX 16
3415 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
3417 /* Handle the INIT_CUMULATIVE_ARGS macro.
3418 Initialize a variable CUM of type CUMULATIVE_ARGS
3419 for a call to a function whose data type is FNTYPE.
3420 For a library call, FNTYPE is 0. */
3422 void
3423 init_cumulative_args (cum, fntype, libname, indirect)
3424 CUMULATIVE_ARGS *cum;
3425 tree fntype;
3426 tree libname ATTRIBUTE_UNUSED;
3427 int indirect ATTRIBUTE_UNUSED;
3429 cum->words = 0;
3430 cum->prototype_p = fntype && TYPE_ARG_TYPES (fntype);
3431 cum->libcall_p = fntype == 0;
3434 /* Compute the slot number to pass an argument in.
3435 Returns the slot number or -1 if passing on the stack.
3437 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3438 the preceding args and about the function being called.
3439 MODE is the argument's machine mode.
3440 TYPE is the data type of the argument (as a tree).
3441 This is null for libcalls where that information may
3442 not be available.
3443 NAMED is nonzero if this argument is a named parameter
3444 (otherwise it is an extra parameter matching an ellipsis).
3445 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
3446 *PREGNO records the register number to use if scalar type.
3447 *PPADDING records the amount of padding needed in words. */
3449 static int
3450 function_arg_slotno (cum, mode, type, named, incoming_p, pregno, ppadding)
3451 const CUMULATIVE_ARGS *cum;
3452 enum machine_mode mode;
3453 tree type;
3454 int named;
3455 int incoming_p;
3456 int *pregno;
3457 int *ppadding;
3459 int regbase = (incoming_p
3460 ? SPARC_INCOMING_INT_ARG_FIRST
3461 : SPARC_OUTGOING_INT_ARG_FIRST);
3462 int slotno = cum->words;
3463 int regno;
3465 *ppadding = 0;
3467 if (type != 0 && TREE_ADDRESSABLE (type))
3468 return -1;
3469 if (TARGET_ARCH32
3470 && type != 0 && mode == BLKmode
3471 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
3472 return -1;
3474 switch (mode)
3476 case VOIDmode :
3477 /* MODE is VOIDmode when generating the actual call.
3478 See emit_call_1. */
3479 return -1;
3481 case QImode : case CQImode :
3482 case HImode : case CHImode :
3483 case SImode : case CSImode :
3484 case DImode : case CDImode :
3485 if (slotno >= SPARC_INT_ARG_MAX)
3486 return -1;
3487 regno = regbase + slotno;
3488 break;
3490 case SFmode : case SCmode :
3491 case DFmode : case DCmode :
3492 case TFmode : case TCmode :
3493 if (TARGET_ARCH32)
3495 if (slotno >= SPARC_INT_ARG_MAX)
3496 return -1;
3497 regno = regbase + slotno;
3499 else
3501 if ((mode == TFmode || mode == TCmode)
3502 && (slotno & 1) != 0)
3503 slotno++, *ppadding = 1;
3504 if (TARGET_FPU && named)
3506 if (slotno >= SPARC_FP_ARG_MAX)
3507 return -1;
3508 regno = SPARC_FP_ARG_FIRST + slotno * 2;
3509 if (mode == SFmode)
3510 regno++;
3512 else
3514 if (slotno >= SPARC_INT_ARG_MAX)
3515 return -1;
3516 regno = regbase + slotno;
3519 break;
3521 case BLKmode :
3522 /* For sparc64, objects requiring 16 byte alignment get it. */
3523 if (TARGET_ARCH64)
3525 if (type && TYPE_ALIGN (type) == 128 && (slotno & 1) != 0)
3526 slotno++, *ppadding = 1;
3529 if (TARGET_ARCH32
3530 || (type && TREE_CODE (type) == UNION_TYPE))
3532 if (slotno >= SPARC_INT_ARG_MAX)
3533 return -1;
3534 regno = regbase + slotno;
3536 else
3538 tree field;
3539 int intregs_p = 0, fpregs_p = 0;
3540 /* The ABI obviously doesn't specify how packed
3541 structures are passed. These are defined to be passed
3542 in int regs if possible, otherwise memory. */
3543 int packed_p = 0;
3545 /* First see what kinds of registers we need. */
3546 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3548 if (TREE_CODE (field) == FIELD_DECL)
3550 if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
3551 && TARGET_FPU)
3552 fpregs_p = 1;
3553 else
3554 intregs_p = 1;
3555 if (DECL_PACKED (field))
3556 packed_p = 1;
3559 if (packed_p || !named)
3560 fpregs_p = 0, intregs_p = 1;
3562 /* If all arg slots are filled, then must pass on stack. */
3563 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
3564 return -1;
3565 /* If there are only int args and all int arg slots are filled,
3566 then must pass on stack. */
3567 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
3568 return -1;
3569 /* Note that even if all int arg slots are filled, fp members may
3570 still be passed in regs if such regs are available.
3571 *PREGNO isn't set because there may be more than one, it's up
3572 to the caller to compute them. */
3573 return slotno;
3575 break;
3577 default :
3578 abort ();
3581 *pregno = regno;
3582 return slotno;
3585 /* Handle recursive register counting for structure field layout. */
3587 struct function_arg_record_value_parms
3589 rtx ret;
3590 int slotno, named, regbase;
3591 int nregs, intoffset;
3594 static void function_arg_record_value_3
3595 PROTO((int, struct function_arg_record_value_parms *));
3596 static void function_arg_record_value_2
3597 PROTO((tree, int, struct function_arg_record_value_parms *));
3598 static rtx function_arg_record_value
3599 PROTO((tree, enum machine_mode, int, int, int));
3601 static void
3602 function_arg_record_value_1 (type, startbitpos, parms)
3603 tree type;
3604 int startbitpos;
3605 struct function_arg_record_value_parms *parms;
3607 tree field;
3609 /* The ABI obviously doesn't specify how packed structures are
3610 passed. These are defined to be passed in int regs if possible,
3611 otherwise memory. */
3612 int packed_p = 0;
3614 /* We need to compute how many registers are needed so we can
3615 allocate the PARALLEL but before we can do that we need to know
3616 whether there are any packed fields. If there are, int regs are
3617 used regardless of whether there are fp values present. */
3618 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3620 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
3622 packed_p = 1;
3623 break;
3627 /* Compute how many registers we need. */
3628 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3630 if (TREE_CODE (field) == FIELD_DECL)
3632 int bitpos = startbitpos;
3633 if (DECL_FIELD_BITPOS (field))
3634 bitpos += TREE_INT_CST_LOW (DECL_FIELD_BITPOS (field));
3635 /* ??? FIXME: else assume zero offset. */
3637 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
3639 function_arg_record_value_1 (TREE_TYPE (field), bitpos, parms);
3641 else if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
3642 && TARGET_FPU
3643 && ! packed_p
3644 && parms->named)
3646 if (parms->intoffset != -1)
3648 int intslots, this_slotno;
3650 intslots = (bitpos - parms->intoffset + BITS_PER_WORD - 1)
3651 / BITS_PER_WORD;
3652 this_slotno = parms->slotno + parms->intoffset
3653 / BITS_PER_WORD;
3655 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
3656 intslots = MAX (intslots, 0);
3657 parms->nregs += intslots;
3658 parms->intoffset = -1;
3661 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
3662 If it wasn't true we wouldn't be here. */
3663 parms->nregs += 1;
3665 else
3667 if (parms->intoffset == -1)
3668 parms->intoffset = bitpos;
3674 /* Handle recursive structure field register assignment. */
3676 static void
3677 function_arg_record_value_3 (bitpos, parms)
3678 int bitpos;
3679 struct function_arg_record_value_parms *parms;
3681 enum machine_mode mode;
3682 int regno, this_slotno, intslots, intoffset;
3683 rtx reg;
3685 if (parms->intoffset == -1)
3686 return;
3687 intoffset = parms->intoffset;
3688 parms->intoffset = -1;
3690 intslots = (bitpos - intoffset + BITS_PER_WORD - 1) / BITS_PER_WORD;
3691 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
3693 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
3694 if (intslots <= 0)
3695 return;
3697 /* If this is the trailing part of a word, only load that much into
3698 the register. Otherwise load the whole register. Note that in
3699 the latter case we may pick up unwanted bits. It's not a problem
3700 at the moment but may wish to revisit. */
3702 if (intoffset % BITS_PER_WORD != 0)
3704 mode = mode_for_size (BITS_PER_WORD - intoffset%BITS_PER_WORD,
3705 MODE_INT, 0);
3707 else
3708 mode = word_mode;
3710 intoffset /= BITS_PER_UNIT;
3713 regno = parms->regbase + this_slotno;
3714 reg = gen_rtx_REG (mode, regno);
3715 XVECEXP (parms->ret, 0, parms->nregs)
3716 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
3718 this_slotno += 1;
3719 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
3720 parms->nregs += 1;
3721 intslots -= 1;
3723 while (intslots > 0);
3726 static void
3727 function_arg_record_value_2 (type, startbitpos, parms)
3728 tree type;
3729 int startbitpos;
3730 struct function_arg_record_value_parms *parms;
3732 tree field;
3733 int packed_p = 0;
3735 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3737 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
3739 packed_p = 1;
3740 break;
3744 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3746 if (TREE_CODE (field) == FIELD_DECL)
3748 int bitpos = startbitpos;
3749 if (DECL_FIELD_BITPOS (field))
3750 bitpos += TREE_INT_CST_LOW (DECL_FIELD_BITPOS (field));
3751 /* ??? FIXME: else assume zero offset. */
3753 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
3755 function_arg_record_value_2 (TREE_TYPE (field), bitpos, parms);
3757 else if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
3758 && TARGET_FPU
3759 && ! packed_p
3760 && parms->named)
3762 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
3763 rtx reg;
3765 function_arg_record_value_3 (bitpos, parms);
3767 reg = gen_rtx_REG (DECL_MODE (field),
3768 (SPARC_FP_ARG_FIRST + this_slotno * 2
3769 + (DECL_MODE (field) == SFmode
3770 && (bitpos & 32) != 0)));
3771 XVECEXP (parms->ret, 0, parms->nregs)
3772 = gen_rtx_EXPR_LIST (VOIDmode, reg,
3773 GEN_INT (bitpos / BITS_PER_UNIT));
3774 parms->nregs += 1;
3776 else
3778 if (parms->intoffset == -1)
3779 parms->intoffset = bitpos;
3785 static rtx
3786 function_arg_record_value (type, mode, slotno, named, regbase)
3787 tree type;
3788 enum machine_mode mode;
3789 int slotno, named, regbase;
3791 HOST_WIDE_INT typesize = int_size_in_bytes (type);
3792 struct function_arg_record_value_parms parms;
3793 int nregs;
3795 parms.ret = NULL_RTX;
3796 parms.slotno = slotno;
3797 parms.named = named;
3798 parms.regbase = regbase;
3800 /* Compute how many registers we need. */
3801 parms.nregs = 0;
3802 parms.intoffset = 0;
3803 function_arg_record_value_1 (type, 0, &parms);
3805 if (parms.intoffset != -1)
3807 int intslots, this_slotno;
3809 intslots = (typesize*BITS_PER_UNIT - parms.intoffset + BITS_PER_WORD - 1)
3810 / BITS_PER_WORD;
3811 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
3813 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
3814 intslots = MAX (intslots, 0);
3816 parms.nregs += intslots;
3818 nregs = parms.nregs;
3820 /* Allocate the vector and handle some annoying special cases. */
3821 if (nregs == 0)
3823 /* ??? Empty structure has no value? Duh? */
3824 if (typesize <= 0)
3826 /* Though there's nothing really to store, return a word register
3827 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
3828 leads to breakage due to the fact that there are zero bytes to
3829 load. */
3830 return gen_rtx_REG (mode, regbase);
3832 else
3834 /* ??? C++ has structures with no fields, and yet a size. Give up
3835 for now and pass everything back in integer registers. */
3836 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3838 if (nregs + slotno > SPARC_INT_ARG_MAX)
3839 nregs = SPARC_INT_ARG_MAX - slotno;
3841 if (nregs == 0)
3842 abort ();
3844 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
3846 /* Fill in the entries. */
3847 parms.nregs = 0;
3848 parms.intoffset = 0;
3849 function_arg_record_value_2 (type, 0, &parms);
3850 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
3852 if (parms.nregs != nregs)
3853 abort ();
3855 return parms.ret;
3858 /* Handle the FUNCTION_ARG macro.
3859 Determine where to put an argument to a function.
3860 Value is zero to push the argument on the stack,
3861 or a hard register in which to store the argument.
3863 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3864 the preceding args and about the function being called.
3865 MODE is the argument's machine mode.
3866 TYPE is the data type of the argument (as a tree).
3867 This is null for libcalls where that information may
3868 not be available.
3869 NAMED is nonzero if this argument is a named parameter
3870 (otherwise it is an extra parameter matching an ellipsis).
3871 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG. */
3874 function_arg (cum, mode, type, named, incoming_p)
3875 const CUMULATIVE_ARGS *cum;
3876 enum machine_mode mode;
3877 tree type;
3878 int named;
3879 int incoming_p;
3881 int regbase = (incoming_p
3882 ? SPARC_INCOMING_INT_ARG_FIRST
3883 : SPARC_OUTGOING_INT_ARG_FIRST);
3884 int slotno, regno, padding;
3885 rtx reg;
3887 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
3888 &regno, &padding);
3890 if (slotno == -1)
3891 return 0;
3893 if (TARGET_ARCH32)
3895 reg = gen_rtx_REG (mode, regno);
3896 return reg;
3899 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
3900 but also have the slot allocated for them.
3901 If no prototype is in scope fp values in register slots get passed
3902 in two places, either fp regs and int regs or fp regs and memory. */
3903 if ((GET_MODE_CLASS (mode) == MODE_FLOAT
3904 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3905 && SPARC_FP_REG_P (regno))
3907 reg = gen_rtx_REG (mode, regno);
3908 if (cum->prototype_p || cum->libcall_p)
3910 /* "* 2" because fp reg numbers are recorded in 4 byte
3911 quantities. */
3912 #if 0
3913 /* ??? This will cause the value to be passed in the fp reg and
3914 in the stack. When a prototype exists we want to pass the
3915 value in the reg but reserve space on the stack. That's an
3916 optimization, and is deferred [for a bit]. */
3917 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
3918 return gen_rtx_PARALLEL (mode,
3919 gen_rtvec (2,
3920 gen_rtx_EXPR_LIST (VOIDmode,
3921 NULL_RTX, const0_rtx),
3922 gen_rtx_EXPR_LIST (VOIDmode,
3923 reg, const0_rtx)));
3924 else
3925 #else
3926 /* ??? It seems that passing back a register even when past
3927 the area declared by REG_PARM_STACK_SPACE will allocate
3928 space appropriately, and will not copy the data onto the
3929 stack, exactly as we desire.
3931 This is due to locate_and_pad_parm being called in
3932 expand_call whenever reg_parm_stack_space > 0, which
3933 while benefical to our example here, would seem to be
3934 in error from what had been intended. Ho hum... -- r~ */
3935 #endif
3936 return reg;
3938 else
3940 rtx v0, v1;
3942 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
3944 int intreg;
3946 /* On incoming, we don't need to know that the value
3947 is passed in %f0 and %i0, and it confuses other parts
3948 causing needless spillage even on the simplest cases. */
3949 if (incoming_p)
3950 return reg;
3952 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
3953 + (regno - SPARC_FP_ARG_FIRST) / 2);
3955 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
3956 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
3957 const0_rtx);
3958 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
3960 else
3962 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
3963 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
3964 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
3968 else if (type && TREE_CODE (type) == RECORD_TYPE)
3970 /* Structures up to 16 bytes in size are passed in arg slots on the
3971 stack and are promoted to registers where possible. */
3973 if (int_size_in_bytes (type) > 16)
3974 abort (); /* shouldn't get here */
3976 return function_arg_record_value (type, mode, slotno, named, regbase);
3978 else if (type && TREE_CODE (type) == UNION_TYPE)
3980 enum machine_mode mode;
3981 int bytes = int_size_in_bytes (type);
3983 if (bytes > 16)
3984 abort ();
3986 mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 0);
3987 reg = gen_rtx_REG (mode, regno);
3989 else
3991 /* Scalar or complex int. */
3992 reg = gen_rtx_REG (mode, regno);
3995 return reg;
3998 /* Handle the FUNCTION_ARG_PARTIAL_NREGS macro.
3999 For an arg passed partly in registers and partly in memory,
4000 this is the number of registers used.
4001 For args passed entirely in registers or entirely in memory, zero.
4003 Any arg that starts in the first 6 regs but won't entirely fit in them
4004 needs partial registers on v8. On v9, structures with integer
4005 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
4006 values that begin in the last fp reg [where "last fp reg" varies with the
4007 mode] will be split between that reg and memory. */
4010 function_arg_partial_nregs (cum, mode, type, named)
4011 const CUMULATIVE_ARGS *cum;
4012 enum machine_mode mode;
4013 tree type;
4014 int named;
4016 int slotno, regno, padding;
4018 /* We pass 0 for incoming_p here, it doesn't matter. */
4019 slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
4021 if (slotno == -1)
4022 return 0;
4024 if (TARGET_ARCH32)
4026 if ((slotno + (mode == BLKmode
4027 ? ROUND_ADVANCE (int_size_in_bytes (type))
4028 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
4029 > NPARM_REGS (SImode))
4030 return NPARM_REGS (SImode) - slotno;
4031 return 0;
4033 else
4035 if (type && AGGREGATE_TYPE_P (type))
4037 int size = int_size_in_bytes (type);
4038 int align = TYPE_ALIGN (type);
4040 if (align == 16)
4041 slotno += slotno & 1;
4042 if (size > 8 && size <= 16
4043 && slotno == SPARC_INT_ARG_MAX - 1)
4044 return 1;
4046 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
4047 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
4048 && ! TARGET_FPU))
4050 if (GET_MODE_ALIGNMENT (mode) == 128)
4052 slotno += slotno & 1;
4053 if (slotno == SPARC_INT_ARG_MAX - 2)
4054 return 1;
4056 else
4058 if (slotno == SPARC_INT_ARG_MAX - 1)
4059 return 1;
4062 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4064 if (GET_MODE_ALIGNMENT (mode) == 128)
4065 slotno += slotno & 1;
4066 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
4067 > SPARC_FP_ARG_MAX)
4068 return 1;
4070 return 0;
4074 /* Handle the FUNCTION_ARG_PASS_BY_REFERENCE macro.
4075 !v9: The SPARC ABI stipulates passing struct arguments (of any size) and
4076 quad-precision floats by invisible reference.
4077 v9: Aggregates greater than 16 bytes are passed by reference.
4078 For Pascal, also pass arrays by reference. */
4081 function_arg_pass_by_reference (cum, mode, type, named)
4082 const CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
4083 enum machine_mode mode;
4084 tree type;
4085 int named ATTRIBUTE_UNUSED;
4087 if (TARGET_ARCH32)
4089 return ((type && AGGREGATE_TYPE_P (type))
4090 || mode == TFmode || mode == TCmode);
4092 else
4094 return ((type && TREE_CODE (type) == ARRAY_TYPE)
4095 /* Consider complex values as aggregates, so care for TCmode. */
4096 || GET_MODE_SIZE (mode) > 16
4097 || (type && AGGREGATE_TYPE_P (type)
4098 && int_size_in_bytes (type) > 16));
4102 /* Handle the FUNCTION_ARG_ADVANCE macro.
4103 Update the data in CUM to advance over an argument
4104 of mode MODE and data type TYPE.
4105 TYPE is null for libcalls where that information may not be available. */
4107 void
4108 function_arg_advance (cum, mode, type, named)
4109 CUMULATIVE_ARGS *cum;
4110 enum machine_mode mode;
4111 tree type;
4112 int named;
4114 int slotno, regno, padding;
4116 /* We pass 0 for incoming_p here, it doesn't matter. */
4117 slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
4119 /* If register required leading padding, add it. */
4120 if (slotno != -1)
4121 cum->words += padding;
4123 if (TARGET_ARCH32)
4125 cum->words += (mode != BLKmode
4126 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
4127 : ROUND_ADVANCE (int_size_in_bytes (type)));
4129 else
4131 if (type && AGGREGATE_TYPE_P (type))
4133 int size = int_size_in_bytes (type);
4135 if (size <= 8)
4136 ++cum->words;
4137 else if (size <= 16)
4138 cum->words += 2;
4139 else /* passed by reference */
4140 ++cum->words;
4142 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
4144 cum->words += 2;
4146 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4148 cum->words += GET_MODE_SIZE (mode) / UNITS_PER_WORD;
4150 else
4152 cum->words += (mode != BLKmode
4153 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
4154 : ROUND_ADVANCE (int_size_in_bytes (type)));
4159 /* Handle the FUNCTION_ARG_PADDING macro.
4160 For the 64 bit ABI structs are always stored left shifted in their
4161 argument slot. */
4163 enum direction
4164 function_arg_padding (mode, type)
4165 enum machine_mode mode;
4166 tree type;
4168 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
4169 return upward;
4171 /* This is the default definition. */
4172 return (! BYTES_BIG_ENDIAN
4173 ? upward
4174 : ((mode == BLKmode
4175 ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
4176 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
4177 : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
4178 ? downward : upward));
4181 /* Handle FUNCTION_VALUE, FUNCTION_OUTGOING_VALUE, and LIBCALL_VALUE macros.
4182 For v9, function return values are subject to the same rules as arguments,
4183 except that up to 32-bytes may be returned in registers. */
4186 function_value (type, mode, incoming_p)
4187 tree type;
4188 enum machine_mode mode;
4189 int incoming_p;
4191 int regno;
4192 int regbase = (incoming_p
4193 ? SPARC_OUTGOING_INT_ARG_FIRST
4194 : SPARC_INCOMING_INT_ARG_FIRST);
4196 if (TARGET_ARCH64 && type)
4198 if (TREE_CODE (type) == RECORD_TYPE)
4200 /* Structures up to 32 bytes in size are passed in registers,
4201 promoted to fp registers where possible. */
4203 if (int_size_in_bytes (type) > 32)
4204 abort (); /* shouldn't get here */
4206 return function_arg_record_value (type, mode, 0, 1, regbase);
4208 else if (TREE_CODE (type) == UNION_TYPE)
4210 int bytes = int_size_in_bytes (type);
4212 if (bytes > 32)
4213 abort ();
4215 mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 0);
4219 if (incoming_p)
4220 regno = BASE_RETURN_VALUE_REG (mode);
4221 else
4222 regno = BASE_OUTGOING_VALUE_REG (mode);
4224 return gen_rtx_REG (mode, regno);
4227 /* Do what is necessary for `va_start'. The argument is ignored.
4229 We look at the current function to determine if stdarg or varargs
4230 is used and return the address of the first unnamed parameter. */
4233 sparc_builtin_saveregs (arglist)
4234 tree arglist ATTRIBUTE_UNUSED;
4236 int first_reg = current_function_args_info.words;
4237 rtx address;
4238 int regno;
4240 for (regno = first_reg; regno < NPARM_REGS (word_mode); regno++)
4241 emit_move_insn (gen_rtx_MEM (word_mode,
4242 gen_rtx_PLUS (Pmode,
4243 frame_pointer_rtx,
4244 GEN_INT (STACK_POINTER_OFFSET
4245 + UNITS_PER_WORD * regno))),
4246 gen_rtx_REG (word_mode,
4247 BASE_INCOMING_ARG_REG (word_mode) + regno));
4249 address = gen_rtx_PLUS (Pmode,
4250 frame_pointer_rtx,
4251 GEN_INT (STACK_POINTER_OFFSET
4252 + UNITS_PER_WORD * first_reg));
4254 if (flag_check_memory_usage
4255 && first_reg < NPARM_REGS (word_mode))
4256 emit_library_call (chkr_set_right_libfunc, 1, VOIDmode, 3,
4257 address, ptr_mode,
4258 GEN_INT (UNITS_PER_WORD
4259 * (NPARM_REGS (word_mode) - first_reg)),
4260 TYPE_MODE (sizetype), GEN_INT (MEMORY_USE_RW),
4261 TYPE_MODE (integer_type_node));
4263 return address;
4266 /* Return the string to output a conditional branch to LABEL, which is
4267 the operand number of the label. OP is the conditional expression.
4268 XEXP (OP, 0) is assumed to be a condition code register (integer or
4269 floating point) and its mode specifies what kind of comparison we made.
4271 REVERSED is non-zero if we should reverse the sense of the comparison.
4273 ANNUL is non-zero if we should generate an annulling branch.
4275 NOOP is non-zero if we have to follow this branch by a noop.
4277 INSN, if set, is the insn. */
4279 char *
4280 output_cbranch (op, label, reversed, annul, noop, insn)
4281 rtx op;
4282 int label;
4283 int reversed, annul, noop;
4284 rtx insn;
4286 static char string[32];
4287 enum rtx_code code = GET_CODE (op);
4288 rtx cc_reg = XEXP (op, 0);
4289 enum machine_mode mode = GET_MODE (cc_reg);
4290 static char v8_labelno[] = "%lX";
4291 static char v9_icc_labelno[] = "%%icc, %lX";
4292 static char v9_xcc_labelno[] = "%%xcc, %lX";
4293 static char v9_fcc_labelno[] = "%%fccX, %lY";
4294 char *labelno;
4295 int labeloff, spaces = 8;
4297 /* ??? !v9: FP branches cannot be preceded by another floating point insn.
4298 Because there is currently no concept of pre-delay slots, we can fix
4299 this only by always emitting a nop before a floating point branch. */
4301 if ((mode == CCFPmode || mode == CCFPEmode) && ! TARGET_V9)
4302 strcpy (string, "nop\n\t");
4303 else
4304 string[0] = '\0';
4306 /* If not floating-point or if EQ or NE, we can just reverse the code. */
4307 if (reversed
4308 && ((mode != CCFPmode && mode != CCFPEmode) || code == EQ || code == NE))
4309 code = reverse_condition (code), reversed = 0;
4311 /* Start by writing the branch condition. */
4312 switch (code)
4314 case NE:
4315 if (mode == CCFPmode || mode == CCFPEmode)
4317 strcat (string, "fbne");
4318 spaces -= 4;
4320 else
4322 strcpy (string, "bne");
4323 spaces -= 3;
4325 break;
4327 case EQ:
4328 if (mode == CCFPmode || mode == CCFPEmode)
4330 strcat (string, "fbe");
4331 spaces -= 3;
4333 else
4335 strcpy (string, "be");
4336 spaces -= 2;
4338 break;
4340 case GE:
4341 if (mode == CCFPmode || mode == CCFPEmode)
4343 if (reversed)
4344 strcat (string, "fbul");
4345 else
4346 strcat (string, "fbge");
4347 spaces -= 4;
4349 else if (mode == CC_NOOVmode)
4351 strcpy (string, "bpos");
4352 spaces -= 4;
4354 else
4356 strcpy (string, "bge");
4357 spaces -= 3;
4359 break;
4361 case GT:
4362 if (mode == CCFPmode || mode == CCFPEmode)
4364 if (reversed)
4366 strcat (string, "fbule");
4367 spaces -= 5;
4369 else
4371 strcat (string, "fbg");
4372 spaces -= 3;
4375 else
4377 strcpy (string, "bg");
4378 spaces -= 2;
4380 break;
4382 case LE:
4383 if (mode == CCFPmode || mode == CCFPEmode)
4385 if (reversed)
4386 strcat (string, "fbug");
4387 else
4388 strcat (string, "fble");
4389 spaces -= 4;
4391 else
4393 strcpy (string, "ble");
4394 spaces -= 3;
4396 break;
4398 case LT:
4399 if (mode == CCFPmode || mode == CCFPEmode)
4401 if (reversed)
4403 strcat (string, "fbuge");
4404 spaces -= 5;
4406 else
4408 strcat (string, "fbl");
4409 spaces -= 3;
4412 else if (mode == CC_NOOVmode)
4414 strcpy (string, "bneg");
4415 spaces -= 4;
4417 else
4419 strcpy (string, "bl");
4420 spaces -= 2;
4422 break;
4424 case GEU:
4425 strcpy (string, "bgeu");
4426 spaces -= 4;
4427 break;
4429 case GTU:
4430 strcpy (string, "bgu");
4431 spaces -= 3;
4432 break;
4434 case LEU:
4435 strcpy (string, "bleu");
4436 spaces -= 4;
4437 break;
4439 case LTU:
4440 strcpy (string, "blu");
4441 spaces -= 3;
4442 break;
4444 default:
4445 abort ();
4448 /* Now add the annulling, the label, and a possible noop. */
4449 if (annul)
4451 strcat (string, ",a");
4452 spaces -= 2;
4455 if (! TARGET_V9)
4457 labeloff = 2;
4458 labelno = v8_labelno;
4460 else
4462 rtx note;
4464 if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
4466 strcat (string,
4467 INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
4468 spaces -= 3;
4471 labeloff = 9;
4472 if (mode == CCFPmode || mode == CCFPEmode)
4474 labeloff = 10;
4475 labelno = v9_fcc_labelno;
4476 /* Set the char indicating the number of the fcc reg to use. */
4477 labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
4479 else if (mode == CCXmode || mode == CCX_NOOVmode)
4480 labelno = v9_xcc_labelno;
4481 else
4482 labelno = v9_icc_labelno;
4484 /* Set the char indicating the number of the operand containing the
4485 label_ref. */
4486 labelno[labeloff] = label + '0';
4487 if (spaces > 0)
4488 strcat (string, "\t");
4489 else
4490 strcat (string, " ");
4491 strcat (string, labelno);
4493 if (noop)
4494 strcat (string, "\n\tnop");
4496 return string;
4499 /* Return the string to output a conditional branch to LABEL, testing
4500 register REG. LABEL is the operand number of the label; REG is the
4501 operand number of the reg. OP is the conditional expression. The mode
4502 of REG says what kind of comparison we made.
4504 REVERSED is non-zero if we should reverse the sense of the comparison.
4506 ANNUL is non-zero if we should generate an annulling branch.
4508 NOOP is non-zero if we have to follow this branch by a noop. */
4510 char *
4511 output_v9branch (op, reg, label, reversed, annul, noop, insn)
4512 rtx op;
4513 int reg, label;
4514 int reversed, annul, noop;
4515 rtx insn;
4517 static char string[20];
4518 enum rtx_code code = GET_CODE (op);
4519 enum machine_mode mode = GET_MODE (XEXP (op, 0));
4520 static char labelno[] = "%X, %lX";
4521 rtx note;
4522 int spaces = 8;
4524 /* If not floating-point or if EQ or NE, we can just reverse the code. */
4525 if (reversed)
4526 code = reverse_condition (code), reversed = 0;
4528 /* Only 64 bit versions of these instructions exist. */
4529 if (mode != DImode)
4530 abort ();
4532 /* Start by writing the branch condition. */
4534 switch (code)
4536 case NE:
4537 strcpy (string, "brnz");
4538 spaces -= 4;
4539 break;
4541 case EQ:
4542 strcpy (string, "brz");
4543 spaces -= 3;
4544 break;
4546 case GE:
4547 strcpy (string, "brgez");
4548 spaces -= 5;
4549 break;
4551 case LT:
4552 strcpy (string, "brlz");
4553 spaces -= 4;
4554 break;
4556 case LE:
4557 strcpy (string, "brlez");
4558 spaces -= 5;
4559 break;
4561 case GT:
4562 strcpy (string, "brgz");
4563 spaces -= 4;
4564 break;
4566 default:
4567 abort ();
4570 /* Now add the annulling, reg, label, and nop. */
4571 if (annul)
4573 strcat (string, ",a");
4574 spaces -= 2;
4577 if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
4579 strcat (string,
4580 INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
4581 spaces -= 3;
4584 labelno[1] = reg + '0';
4585 labelno[6] = label + '0';
4586 if (spaces > 0)
4587 strcat (string, "\t");
4588 else
4589 strcat (string, " ");
4590 strcat (string, labelno);
4592 if (noop)
4593 strcat (string, "\n\tnop");
4595 return string;
4598 /* Renumber registers in delay slot. Replace registers instead of
4599 renumbering because they may be shared.
4601 This does not handle instructions other than move. */
4603 static void
4604 epilogue_renumber (where)
4605 rtx *where;
4607 rtx x = *where;
4608 enum rtx_code code = GET_CODE (x);
4610 switch (code)
4612 case MEM:
4613 *where = x = copy_rtx (x);
4614 epilogue_renumber (&XEXP (x, 0));
4615 return;
4617 case REG:
4619 int regno = REGNO (x);
4620 if (regno > 8 && regno < 24)
4621 abort ();
4622 if (regno >= 24 && regno < 32)
4623 *where = gen_rtx_REG (GET_MODE (x), regno - 16);
4624 return;
4626 case CONST_INT:
4627 case CONST_DOUBLE:
4628 case CONST:
4629 case SYMBOL_REF:
4630 case LABEL_REF:
4631 return;
4633 case IOR:
4634 case AND:
4635 case XOR:
4636 case PLUS:
4637 case MINUS:
4638 epilogue_renumber (&XEXP (x, 1));
4639 case NEG:
4640 case NOT:
4641 epilogue_renumber (&XEXP (x, 0));
4642 return;
4644 default:
4645 debug_rtx (*where);
4646 abort ();
4650 /* Output assembler code to return from a function. */
4652 char *
4653 output_return (operands)
4654 rtx *operands;
4656 rtx delay = final_sequence ? XVECEXP (final_sequence, 0, 1) : 0;
4658 if (leaf_label)
4660 operands[0] = leaf_label;
4661 return "b%* %l0%(";
4663 else if (leaf_function)
4665 /* No delay slot in a leaf function. */
4666 if (delay)
4667 abort ();
4669 /* If we didn't allocate a frame pointer for the current function,
4670 the stack pointer might have been adjusted. Output code to
4671 restore it now. */
4673 operands[0] = GEN_INT (actual_fsize);
4675 /* Use sub of negated value in first two cases instead of add to
4676 allow actual_fsize == 4096. */
4678 if (actual_fsize <= 4096)
4680 if (SKIP_CALLERS_UNIMP_P)
4681 return "jmp\t%%o7+12\n\tsub\t%%sp, -%0, %%sp";
4682 else
4683 return "retl\n\tsub\t%%sp, -%0, %%sp";
4685 else if (actual_fsize <= 8192)
4687 operands[0] = GEN_INT (actual_fsize - 4096);
4688 if (SKIP_CALLERS_UNIMP_P)
4689 return "sub\t%%sp, -4096, %%sp\n\tjmp\t%%o7+12\n\tsub\t%%sp, -%0, %%sp";
4690 else
4691 return "sub\t%%sp, -4096, %%sp\n\tretl\n\tsub\t%%sp, -%0, %%sp";
4693 else if (SKIP_CALLERS_UNIMP_P)
4695 if ((actual_fsize & 0x3ff) != 0)
4696 return "sethi\t%%hi(%a0), %%g1\n\tor\t%%g1, %%lo(%a0), %%g1\n\tjmp\t%%o7+12\n\tadd\t%%sp, %%g1, %%sp";
4697 else
4698 return "sethi\t%%hi(%a0), %%g1\n\tjmp\t%%o7+12\n\tadd\t%%sp, %%g1, %%sp";
4700 else
4702 if ((actual_fsize & 0x3ff) != 0)
4703 return "sethi %%hi(%a0),%%g1\n\tor %%g1,%%lo(%a0),%%g1\n\tretl\n\tadd %%sp,%%g1,%%sp";
4704 else
4705 return "sethi %%hi(%a0),%%g1\n\tretl\n\tadd %%sp,%%g1,%%sp";
4708 else if (TARGET_V9)
4710 if (delay)
4712 epilogue_renumber (&SET_DEST (PATTERN (delay)));
4713 epilogue_renumber (&SET_SRC (PATTERN (delay)));
4715 if (SKIP_CALLERS_UNIMP_P)
4716 return "return\t%%i7+12%#";
4717 else
4718 return "return\t%%i7+8%#";
4720 else
4722 if (delay)
4723 abort ();
4724 if (SKIP_CALLERS_UNIMP_P)
4725 return "jmp\t%%i7+12\n\trestore";
4726 else
4727 return "ret\n\trestore";
4731 /* Leaf functions and non-leaf functions have different needs. */
4733 static int
4734 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
4736 static int
4737 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
4739 static int *reg_alloc_orders[] = {
4740 reg_leaf_alloc_order,
4741 reg_nonleaf_alloc_order};
4743 void
4744 order_regs_for_local_alloc ()
4746 static int last_order_nonleaf = 1;
4748 if (regs_ever_live[15] != last_order_nonleaf)
4750 last_order_nonleaf = !last_order_nonleaf;
4751 bcopy ((char *) reg_alloc_orders[last_order_nonleaf],
4752 (char *) reg_alloc_order, FIRST_PSEUDO_REGISTER * sizeof (int));
4756 /* Return 1 if REG and MEM are legitimate enough to allow the various
4757 mem<-->reg splits to be run. */
4760 sparc_splitdi_legitimate (reg, mem)
4761 rtx reg;
4762 rtx mem;
4764 /* Punt if we are here by mistake. */
4765 if (! reload_completed)
4766 abort ();
4768 /* We must have an offsettable memory reference. */
4769 if (! offsettable_memref_p (mem))
4770 return 0;
4772 /* If we have legitimate args for ldd/std, we do not want
4773 the split to happen. */
4774 if ((REGNO (reg) % 2) == 0
4775 && mem_min_alignment (mem, 8))
4776 return 0;
4778 /* Success. */
4779 return 1;
4782 /* Return 1 if x and y are some kind of REG and they refer to
4783 different hard registers. This test is guarenteed to be
4784 run after reload. */
4787 sparc_absnegfloat_split_legitimate (x, y)
4788 rtx x, y;
4790 if (GET_CODE (x) == SUBREG)
4791 x = alter_subreg (x);
4792 if (GET_CODE (x) != REG)
4793 return 0;
4794 if (GET_CODE (y) == SUBREG)
4795 y = alter_subreg (y);
4796 if (GET_CODE (y) != REG)
4797 return 0;
4798 if (REGNO (x) == REGNO (y))
4799 return 0;
4800 return 1;
4803 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
4804 This makes them candidates for using ldd and std insns.
4806 Note reg1 and reg2 *must* be hard registers. */
4809 registers_ok_for_ldd_peep (reg1, reg2)
4810 rtx reg1, reg2;
4812 /* We might have been passed a SUBREG. */
4813 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
4814 return 0;
4816 if (REGNO (reg1) % 2 != 0)
4817 return 0;
4819 /* Integer ldd is deprecated in SPARC V9 */
4820 if (TARGET_V9 && REGNO (reg1) < 32)
4821 return 0;
4823 return (REGNO (reg1) == REGNO (reg2) - 1);
4826 /* Return 1 if addr1 and addr2 are suitable for use in an ldd or
4827 std insn.
4829 This can only happen when addr1 and addr2 are consecutive memory
4830 locations (addr1 + 4 == addr2). addr1 must also be aligned on a
4831 64 bit boundary (addr1 % 8 == 0).
4833 We know %sp and %fp are kept aligned on a 64 bit boundary. Other
4834 registers are assumed to *never* be properly aligned and are
4835 rejected.
4837 Knowing %sp and %fp are kept aligned on a 64 bit boundary, we
4838 need only check that the offset for addr1 % 8 == 0. */
4841 addrs_ok_for_ldd_peep (addr1, addr2)
4842 rtx addr1, addr2;
4844 int reg1, offset1;
4846 /* Extract a register number and offset (if used) from the first addr. */
4847 if (GET_CODE (addr1) == PLUS)
4849 /* If not a REG, return zero. */
4850 if (GET_CODE (XEXP (addr1, 0)) != REG)
4851 return 0;
4852 else
4854 reg1 = REGNO (XEXP (addr1, 0));
4855 /* The offset must be constant! */
4856 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
4857 return 0;
4858 offset1 = INTVAL (XEXP (addr1, 1));
4861 else if (GET_CODE (addr1) != REG)
4862 return 0;
4863 else
4865 reg1 = REGNO (addr1);
4866 /* This was a simple (mem (reg)) expression. Offset is 0. */
4867 offset1 = 0;
4870 /* Make sure the second address is a (mem (plus (reg) (const_int). */
4871 if (GET_CODE (addr2) != PLUS)
4872 return 0;
4874 if (GET_CODE (XEXP (addr2, 0)) != REG
4875 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
4876 return 0;
4878 /* Only %fp and %sp are allowed. Additionally both addresses must
4879 use the same register. */
4880 if (reg1 != FRAME_POINTER_REGNUM && reg1 != STACK_POINTER_REGNUM)
4881 return 0;
4883 if (reg1 != REGNO (XEXP (addr2, 0)))
4884 return 0;
4886 /* The first offset must be evenly divisible by 8 to ensure the
4887 address is 64 bit aligned. */
4888 if (offset1 % 8 != 0)
4889 return 0;
4891 /* The offset for the second addr must be 4 more than the first addr. */
4892 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
4893 return 0;
4895 /* All the tests passed. addr1 and addr2 are valid for ldd and std
4896 instructions. */
4897 return 1;
4900 /* Return 1 if reg is a pseudo, or is the first register in
4901 a hard register pair. This makes it a candidate for use in
4902 ldd and std insns. */
4905 register_ok_for_ldd (reg)
4906 rtx reg;
4908 /* We might have been passed a SUBREG. */
4909 if (GET_CODE (reg) != REG)
4910 return 0;
4912 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
4913 return (REGNO (reg) % 2 == 0);
4914 else
4915 return 1;
4918 /* Print operand X (an rtx) in assembler syntax to file FILE.
4919 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4920 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4922 void
4923 print_operand (file, x, code)
4924 FILE *file;
4925 rtx x;
4926 int code;
4928 switch (code)
4930 case '#':
4931 /* Output a 'nop' if there's nothing for the delay slot. */
4932 if (dbr_sequence_length () == 0)
4933 fputs ("\n\t nop", file);
4934 return;
4935 case '*':
4936 /* Output an annul flag if there's nothing for the delay slot and we
4937 are optimizing. This is always used with '(' below. */
4938 /* Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
4939 this is a dbx bug. So, we only do this when optimizing. */
4940 /* On UltraSPARC, a branch in a delay slot causes a pipeline flush.
4941 Always emit a nop in case the next instruction is a branch. */
4942 if (dbr_sequence_length () == 0
4943 && (optimize && (int)sparc_cpu < PROCESSOR_V9))
4944 fputs (",a", file);
4945 return;
4946 case '(':
4947 /* Output a 'nop' if there's nothing for the delay slot and we are
4948 not optimizing. This is always used with '*' above. */
4949 if (dbr_sequence_length () == 0
4950 && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
4951 fputs ("\n\t nop", file);
4952 return;
4953 case '_':
4954 /* Output the Embedded Medium/Anywhere code model base register. */
4955 fputs (EMBMEDANY_BASE_REG, file);
4956 return;
4957 case '@':
4958 /* Print out what we are using as the frame pointer. This might
4959 be %fp, or might be %sp+offset. */
4960 /* ??? What if offset is too big? Perhaps the caller knows it isn't? */
4961 fprintf (file, "%s+%d", frame_base_name, frame_base_offset);
4962 return;
4963 case 'Y':
4964 /* Adjust the operand to take into account a RESTORE operation. */
4965 if (GET_CODE (x) == CONST_INT)
4966 break;
4967 else if (GET_CODE (x) != REG)
4968 output_operand_lossage ("Invalid %%Y operand");
4969 else if (REGNO (x) < 8)
4970 fputs (reg_names[REGNO (x)], file);
4971 else if (REGNO (x) >= 24 && REGNO (x) < 32)
4972 fputs (reg_names[REGNO (x)-16], file);
4973 else
4974 output_operand_lossage ("Invalid %%Y operand");
4975 return;
4976 case 'L':
4977 /* Print out the low order register name of a register pair. */
4978 if (WORDS_BIG_ENDIAN)
4979 fputs (reg_names[REGNO (x)+1], file);
4980 else
4981 fputs (reg_names[REGNO (x)], file);
4982 return;
4983 case 'H':
4984 /* Print out the high order register name of a register pair. */
4985 if (WORDS_BIG_ENDIAN)
4986 fputs (reg_names[REGNO (x)], file);
4987 else
4988 fputs (reg_names[REGNO (x)+1], file);
4989 return;
4990 case 'R':
4991 /* Print out the second register name of a register pair or quad.
4992 I.e., R (%o0) => %o1. */
4993 fputs (reg_names[REGNO (x)+1], file);
4994 return;
4995 case 'S':
4996 /* Print out the third register name of a register quad.
4997 I.e., S (%o0) => %o2. */
4998 fputs (reg_names[REGNO (x)+2], file);
4999 return;
5000 case 'T':
5001 /* Print out the fourth register name of a register quad.
5002 I.e., T (%o0) => %o3. */
5003 fputs (reg_names[REGNO (x)+3], file);
5004 return;
5005 case 'x':
5006 /* Print a condition code register. */
5007 if (REGNO (x) == SPARC_ICC_REG)
5009 /* We don't handle CC[X]_NOOVmode because they're not supposed
5010 to occur here. */
5011 if (GET_MODE (x) == CCmode)
5012 fputs ("%icc", file);
5013 else if (GET_MODE (x) == CCXmode)
5014 fputs ("%xcc", file);
5015 else
5016 abort ();
5018 else
5019 /* %fccN register */
5020 fputs (reg_names[REGNO (x)], file);
5021 return;
5022 case 'm':
5023 /* Print the operand's address only. */
5024 output_address (XEXP (x, 0));
5025 return;
5026 case 'r':
5027 /* In this case we need a register. Use %g0 if the
5028 operand is const0_rtx. */
5029 if (x == const0_rtx
5030 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
5032 fputs ("%g0", file);
5033 return;
5035 else
5036 break;
5038 case 'A':
5039 switch (GET_CODE (x))
5041 case IOR: fputs ("or", file); break;
5042 case AND: fputs ("and", file); break;
5043 case XOR: fputs ("xor", file); break;
5044 default: output_operand_lossage ("Invalid %%A operand");
5046 return;
5048 case 'B':
5049 switch (GET_CODE (x))
5051 case IOR: fputs ("orn", file); break;
5052 case AND: fputs ("andn", file); break;
5053 case XOR: fputs ("xnor", file); break;
5054 default: output_operand_lossage ("Invalid %%B operand");
5056 return;
5058 /* These are used by the conditional move instructions. */
5059 case 'c' :
5060 case 'C':
5062 enum rtx_code rc = (code == 'c'
5063 ? reverse_condition (GET_CODE (x))
5064 : GET_CODE (x));
5065 switch (rc)
5067 case NE: fputs ("ne", file); break;
5068 case EQ: fputs ("e", file); break;
5069 case GE: fputs ("ge", file); break;
5070 case GT: fputs ("g", file); break;
5071 case LE: fputs ("le", file); break;
5072 case LT: fputs ("l", file); break;
5073 case GEU: fputs ("geu", file); break;
5074 case GTU: fputs ("gu", file); break;
5075 case LEU: fputs ("leu", file); break;
5076 case LTU: fputs ("lu", file); break;
5077 default: output_operand_lossage (code == 'c'
5078 ? "Invalid %%c operand"
5079 : "Invalid %%C operand");
5081 return;
5084 /* These are used by the movr instruction pattern. */
5085 case 'd':
5086 case 'D':
5088 enum rtx_code rc = (code == 'd'
5089 ? reverse_condition (GET_CODE (x))
5090 : GET_CODE (x));
5091 switch (rc)
5093 case NE: fputs ("ne", file); break;
5094 case EQ: fputs ("e", file); break;
5095 case GE: fputs ("gez", file); break;
5096 case LT: fputs ("lz", file); break;
5097 case LE: fputs ("lez", file); break;
5098 case GT: fputs ("gz", file); break;
5099 default: output_operand_lossage (code == 'd'
5100 ? "Invalid %%d operand"
5101 : "Invalid %%D operand");
5103 return;
5106 case 'b':
5108 /* Print a sign-extended character. */
5109 int i = INTVAL (x) & 0xff;
5110 if (i & 0x80)
5111 i |= 0xffffff00;
5112 fprintf (file, "%d", i);
5113 return;
5116 case 'f':
5117 /* Operand must be a MEM; write its address. */
5118 if (GET_CODE (x) != MEM)
5119 output_operand_lossage ("Invalid %%f operand");
5120 output_address (XEXP (x, 0));
5121 return;
5123 case 0:
5124 /* Do nothing special. */
5125 break;
5127 default:
5128 /* Undocumented flag. */
5129 output_operand_lossage ("invalid operand output code");
5132 if (GET_CODE (x) == REG)
5133 fputs (reg_names[REGNO (x)], file);
5134 else if (GET_CODE (x) == MEM)
5136 fputc ('[', file);
5137 /* Poor Sun assembler doesn't understand absolute addressing. */
5138 if (CONSTANT_P (XEXP (x, 0))
5139 && ! TARGET_LIVE_G0)
5140 fputs ("%g0+", file);
5141 output_address (XEXP (x, 0));
5142 fputc (']', file);
5144 else if (GET_CODE (x) == HIGH)
5146 fputs ("%hi(", file);
5147 output_addr_const (file, XEXP (x, 0));
5148 fputc (')', file);
5150 else if (GET_CODE (x) == LO_SUM)
5152 print_operand (file, XEXP (x, 0), 0);
5153 if (TARGET_CM_MEDMID)
5154 fputs ("+%l44(", file);
5155 else
5156 fputs ("+%lo(", file);
5157 output_addr_const (file, XEXP (x, 1));
5158 fputc (')', file);
5160 else if (GET_CODE (x) == CONST_DOUBLE
5161 && (GET_MODE (x) == VOIDmode
5162 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
5164 if (CONST_DOUBLE_HIGH (x) == 0)
5165 fprintf (file, "%u", CONST_DOUBLE_LOW (x));
5166 else if (CONST_DOUBLE_HIGH (x) == -1
5167 && CONST_DOUBLE_LOW (x) < 0)
5168 fprintf (file, "%d", CONST_DOUBLE_LOW (x));
5169 else
5170 output_operand_lossage ("long long constant not a valid immediate operand");
5172 else if (GET_CODE (x) == CONST_DOUBLE)
5173 output_operand_lossage ("floating point constant not a valid immediate operand");
5174 else { output_addr_const (file, x); }
5177 /* This function outputs assembler code for VALUE to FILE, where VALUE is
5178 a 64 bit (DImode) value. */
5180 /* ??? If there is a 64 bit counterpart to .word that the assembler
5181 understands, then using that would simply this code greatly. */
5182 /* ??? We only output .xword's for symbols and only then in environments
5183 where the assembler can handle them. */
5185 void
5186 output_double_int (file, value)
5187 FILE *file;
5188 rtx value;
5190 if (GET_CODE (value) == CONST_INT)
5192 /* ??? This has endianness issues. */
5193 #if HOST_BITS_PER_WIDE_INT == 64
5194 HOST_WIDE_INT xword = INTVAL (value);
5195 HOST_WIDE_INT high, low;
5197 high = (xword >> 32) & 0xffffffff;
5198 low = xword & 0xffffffff;
5199 ASM_OUTPUT_INT (file, GEN_INT (high));
5200 ASM_OUTPUT_INT (file, GEN_INT (low));
5201 #else
5202 if (INTVAL (value) < 0)
5203 ASM_OUTPUT_INT (file, constm1_rtx);
5204 else
5205 ASM_OUTPUT_INT (file, const0_rtx);
5206 ASM_OUTPUT_INT (file, value);
5207 #endif
5209 else if (GET_CODE (value) == CONST_DOUBLE)
5211 ASM_OUTPUT_INT (file, GEN_INT (CONST_DOUBLE_HIGH (value)));
5212 ASM_OUTPUT_INT (file, GEN_INT (CONST_DOUBLE_LOW (value)));
5214 else if (GET_CODE (value) == SYMBOL_REF
5215 || GET_CODE (value) == CONST
5216 || GET_CODE (value) == PLUS
5217 || (TARGET_ARCH64 &&
5218 (GET_CODE (value) == LABEL_REF
5219 || GET_CODE (value) == CODE_LABEL
5220 || GET_CODE (value) == MINUS)))
5222 if (! TARGET_V9)
5224 ASM_OUTPUT_INT (file, const0_rtx);
5225 ASM_OUTPUT_INT (file, value);
5227 else
5229 fprintf (file, "\t%s\t", ASM_LONGLONG);
5230 output_addr_const (file, value);
5231 fprintf (file, "\n");
5234 else
5235 abort ();
5238 /* Return the value of a code used in the .proc pseudo-op that says
5239 what kind of result this function returns. For non-C types, we pick
5240 the closest C type. */
5242 #ifndef CHAR_TYPE_SIZE
5243 #define CHAR_TYPE_SIZE BITS_PER_UNIT
5244 #endif
5246 #ifndef SHORT_TYPE_SIZE
5247 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
5248 #endif
5250 #ifndef INT_TYPE_SIZE
5251 #define INT_TYPE_SIZE BITS_PER_WORD
5252 #endif
5254 #ifndef LONG_TYPE_SIZE
5255 #define LONG_TYPE_SIZE BITS_PER_WORD
5256 #endif
5258 #ifndef LONG_LONG_TYPE_SIZE
5259 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
5260 #endif
5262 #ifndef FLOAT_TYPE_SIZE
5263 #define FLOAT_TYPE_SIZE BITS_PER_WORD
5264 #endif
5266 #ifndef DOUBLE_TYPE_SIZE
5267 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
5268 #endif
5270 #ifndef LONG_DOUBLE_TYPE_SIZE
5271 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
5272 #endif
5274 unsigned long
5275 sparc_type_code (type)
5276 register tree type;
5278 register unsigned long qualifiers = 0;
5279 register unsigned shift;
5281 /* Only the first 30 bits of the qualifier are valid. We must refrain from
5282 setting more, since some assemblers will give an error for this. Also,
5283 we must be careful to avoid shifts of 32 bits or more to avoid getting
5284 unpredictable results. */
5286 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
5288 switch (TREE_CODE (type))
5290 case ERROR_MARK:
5291 return qualifiers;
5293 case ARRAY_TYPE:
5294 qualifiers |= (3 << shift);
5295 break;
5297 case FUNCTION_TYPE:
5298 case METHOD_TYPE:
5299 qualifiers |= (2 << shift);
5300 break;
5302 case POINTER_TYPE:
5303 case REFERENCE_TYPE:
5304 case OFFSET_TYPE:
5305 qualifiers |= (1 << shift);
5306 break;
5308 case RECORD_TYPE:
5309 return (qualifiers | 8);
5311 case UNION_TYPE:
5312 case QUAL_UNION_TYPE:
5313 return (qualifiers | 9);
5315 case ENUMERAL_TYPE:
5316 return (qualifiers | 10);
5318 case VOID_TYPE:
5319 return (qualifiers | 16);
5321 case INTEGER_TYPE:
5322 /* If this is a range type, consider it to be the underlying
5323 type. */
5324 if (TREE_TYPE (type) != 0)
5325 break;
5327 /* Carefully distinguish all the standard types of C,
5328 without messing up if the language is not C. We do this by
5329 testing TYPE_PRECISION and TREE_UNSIGNED. The old code used to
5330 look at both the names and the above fields, but that's redundant.
5331 Any type whose size is between two C types will be considered
5332 to be the wider of the two types. Also, we do not have a
5333 special code to use for "long long", so anything wider than
5334 long is treated the same. Note that we can't distinguish
5335 between "int" and "long" in this code if they are the same
5336 size, but that's fine, since neither can the assembler. */
5338 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
5339 return (qualifiers | (TREE_UNSIGNED (type) ? 12 : 2));
5341 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
5342 return (qualifiers | (TREE_UNSIGNED (type) ? 13 : 3));
5344 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
5345 return (qualifiers | (TREE_UNSIGNED (type) ? 14 : 4));
5347 else
5348 return (qualifiers | (TREE_UNSIGNED (type) ? 15 : 5));
5350 case REAL_TYPE:
5351 /* If this is a range type, consider it to be the underlying
5352 type. */
5353 if (TREE_TYPE (type) != 0)
5354 break;
5356 /* Carefully distinguish all the standard types of C,
5357 without messing up if the language is not C. */
5359 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
5360 return (qualifiers | 6);
5362 else
5363 return (qualifiers | 7);
5365 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
5366 /* ??? We need to distinguish between double and float complex types,
5367 but I don't know how yet because I can't reach this code from
5368 existing front-ends. */
5369 return (qualifiers | 7); /* Who knows? */
5371 case CHAR_TYPE: /* GNU Pascal CHAR type. Not used in C. */
5372 case BOOLEAN_TYPE: /* GNU Fortran BOOLEAN type. */
5373 case FILE_TYPE: /* GNU Pascal FILE type. */
5374 case SET_TYPE: /* GNU Pascal SET type. */
5375 case LANG_TYPE: /* ? */
5376 return qualifiers;
5378 default:
5379 abort (); /* Not a type! */
5383 return qualifiers;
5386 /* Nested function support. */
5388 /* Emit RTL insns to initialize the variable parts of a trampoline.
5389 FNADDR is an RTX for the address of the function's pure code.
5390 CXT is an RTX for the static chain value for the function.
5392 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
5393 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
5394 (to store insns). This is a bit excessive. Perhaps a different
5395 mechanism would be better here.
5397 Emit enough FLUSH insns to synchronize the data and instruction caches. */
5399 void
5400 sparc_initialize_trampoline (tramp, fnaddr, cxt)
5401 rtx tramp, fnaddr, cxt;
5403 /* SPARC 32 bit trampoline:
5405 sethi %hi(fn), %g1
5406 sethi %hi(static), %g2
5407 jmp %g1+%lo(fn)
5408 or %g2, %lo(static), %g2
5410 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
5411 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
5414 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 0)),
5415 expand_binop (SImode, ior_optab,
5416 expand_shift (RSHIFT_EXPR, SImode, fnaddr,
5417 size_int (10), 0, 1),
5418 GEN_INT (0x03000000),
5419 NULL_RTX, 1, OPTAB_DIRECT));
5421 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
5422 expand_binop (SImode, ior_optab,
5423 expand_shift (RSHIFT_EXPR, SImode, cxt,
5424 size_int (10), 0, 1),
5425 GEN_INT (0x05000000),
5426 NULL_RTX, 1, OPTAB_DIRECT));
5428 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
5429 expand_binop (SImode, ior_optab,
5430 expand_and (fnaddr, GEN_INT (0x3ff), NULL_RTX),
5431 GEN_INT (0x81c06000),
5432 NULL_RTX, 1, OPTAB_DIRECT));
5434 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
5435 expand_binop (SImode, ior_optab,
5436 expand_and (cxt, GEN_INT (0x3ff), NULL_RTX),
5437 GEN_INT (0x8410a000),
5438 NULL_RTX, 1, OPTAB_DIRECT));
5440 emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, tramp))));
5441 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
5442 aligned on a 16 byte boundary so one flush clears it all. */
5443 if (sparc_cpu != PROCESSOR_ULTRASPARC)
5444 emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
5445 plus_constant (tramp, 8)))));
5448 /* The 64 bit version is simpler because it makes more sense to load the
5449 values as "immediate" data out of the trampoline. It's also easier since
5450 we can read the PC without clobbering a register. */
5452 void
5453 sparc64_initialize_trampoline (tramp, fnaddr, cxt)
5454 rtx tramp, fnaddr, cxt;
5457 rd %pc, %g1
5458 ldx [%g1+24], %g5
5459 jmp %g5
5460 ldx [%g1+16], %g5
5461 +16 bytes data
5464 emit_move_insn (gen_rtx_MEM (SImode, tramp),
5465 GEN_INT (0x83414000));
5466 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
5467 GEN_INT (0xca586018));
5468 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
5469 GEN_INT (0x81c04000));
5470 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
5471 GEN_INT (0xca586010));
5472 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), cxt);
5473 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 20)), fnaddr);
5474 emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, tramp))));
5475 if (sparc_cpu != PROCESSOR_ULTRASPARC)
5476 emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8)))));
5479 /* Subroutines to support a flat (single) register window calling
5480 convention. */
5482 /* Single-register window sparc stack frames look like:
5484 Before call After call
5485 +-----------------------+ +-----------------------+
5486 high | | | |
5487 mem | caller's temps. | | caller's temps. |
5488 | | | |
5489 +-----------------------+ +-----------------------+
5490 | | | |
5491 | arguments on stack. | | arguments on stack. |
5492 | | | |
5493 +-----------------------+FP+92->+-----------------------+
5494 | 6 words to save | | 6 words to save |
5495 | arguments passed | | arguments passed |
5496 | in registers, even | | in registers, even |
5497 | if not passed. | | if not passed. |
5498 SP+68->+-----------------------+FP+68->+-----------------------+
5499 | 1 word struct addr | | 1 word struct addr |
5500 +-----------------------+FP+64->+-----------------------+
5501 | | | |
5502 | 16 word reg save area | | 16 word reg save area |
5503 | | | |
5504 SP->+-----------------------+ FP->+-----------------------+
5505 | 4 word area for |
5506 | fp/alu reg moves |
5507 FP-16->+-----------------------+
5509 | local variables |
5511 +-----------------------+
5513 | fp register save |
5515 +-----------------------+
5517 | gp register save |
5519 +-----------------------+
5521 | alloca allocations |
5523 +-----------------------+
5525 | arguments on stack |
5527 SP+92->+-----------------------+
5528 | 6 words to save |
5529 | arguments passed |
5530 | in registers, even |
5531 low | if not passed. |
5532 memory SP+68->+-----------------------+
5533 | 1 word struct addr |
5534 SP+64->+-----------------------+
5536 I 16 word reg save area |
5538 SP->+-----------------------+ */
5540 /* Structure to be filled in by sparc_flat_compute_frame_size with register
5541 save masks, and offsets for the current function. */
5543 struct sparc_frame_info
5545 unsigned long total_size; /* # bytes that the entire frame takes up. */
5546 unsigned long var_size; /* # bytes that variables take up. */
5547 unsigned long args_size; /* # bytes that outgoing arguments take up. */
5548 unsigned long extra_size; /* # bytes of extra gunk. */
5549 unsigned int gp_reg_size; /* # bytes needed to store gp regs. */
5550 unsigned int fp_reg_size; /* # bytes needed to store fp regs. */
5551 unsigned long gmask; /* Mask of saved gp registers. */
5552 unsigned long fmask; /* Mask of saved fp registers. */
5553 unsigned long reg_offset; /* Offset from new sp to store regs. */
5554 int initialized; /* Nonzero if frame size already calculated. */
5557 /* Current frame information calculated by sparc_flat_compute_frame_size. */
5558 struct sparc_frame_info current_frame_info;
5560 /* Zero structure to initialize current_frame_info. */
5561 struct sparc_frame_info zero_frame_info;
5563 /* Tell prologue and epilogue if register REGNO should be saved / restored. */
5565 #define RETURN_ADDR_REGNUM 15
5566 #define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM))
5567 #define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM))
5569 #define MUST_SAVE_REGISTER(regno) \
5570 ((regs_ever_live[regno] && !call_used_regs[regno]) \
5571 || (regno == FRAME_POINTER_REGNUM && frame_pointer_needed) \
5572 || (regno == RETURN_ADDR_REGNUM && regs_ever_live[RETURN_ADDR_REGNUM]))
5574 /* Return the bytes needed to compute the frame pointer from the current
5575 stack pointer. */
5577 unsigned long
5578 sparc_flat_compute_frame_size (size)
5579 int size; /* # of var. bytes allocated. */
5581 int regno;
5582 unsigned long total_size; /* # bytes that the entire frame takes up. */
5583 unsigned long var_size; /* # bytes that variables take up. */
5584 unsigned long args_size; /* # bytes that outgoing arguments take up. */
5585 unsigned long extra_size; /* # extra bytes. */
5586 unsigned int gp_reg_size; /* # bytes needed to store gp regs. */
5587 unsigned int fp_reg_size; /* # bytes needed to store fp regs. */
5588 unsigned long gmask; /* Mask of saved gp registers. */
5589 unsigned long fmask; /* Mask of saved fp registers. */
5590 unsigned long reg_offset; /* Offset to register save area. */
5591 int need_aligned_p; /* 1 if need the save area 8 byte aligned. */
5593 /* This is the size of the 16 word reg save area, 1 word struct addr
5594 area, and 4 word fp/alu register copy area. */
5595 extra_size = -STARTING_FRAME_OFFSET + FIRST_PARM_OFFSET(0);
5596 var_size = size;
5597 /* Also include the size needed for the 6 parameter registers. */
5598 args_size = current_function_outgoing_args_size + 24;
5599 total_size = var_size + args_size + extra_size;
5600 gp_reg_size = 0;
5601 fp_reg_size = 0;
5602 gmask = 0;
5603 fmask = 0;
5604 reg_offset = 0;
5605 need_aligned_p = 0;
5607 /* Calculate space needed for gp registers. */
5608 for (regno = 1; regno <= 31; regno++)
5610 if (MUST_SAVE_REGISTER (regno))
5612 /* If we need to save two regs in a row, ensure there's room to bump
5613 up the address to align it to a doubleword boundary. */
5614 if ((regno & 0x1) == 0 && MUST_SAVE_REGISTER (regno+1))
5616 if (gp_reg_size % 8 != 0)
5617 gp_reg_size += 4;
5618 gp_reg_size += 2 * UNITS_PER_WORD;
5619 gmask |= 3 << regno;
5620 regno++;
5621 need_aligned_p = 1;
5623 else
5625 gp_reg_size += UNITS_PER_WORD;
5626 gmask |= 1 << regno;
5631 /* Calculate space needed for fp registers. */
5632 for (regno = 32; regno <= 63; regno++)
5634 if (regs_ever_live[regno] && !call_used_regs[regno])
5636 fp_reg_size += UNITS_PER_WORD;
5637 fmask |= 1 << (regno - 32);
5641 if (gmask || fmask)
5643 int n;
5644 reg_offset = FIRST_PARM_OFFSET(0) + args_size;
5645 /* Ensure save area is 8 byte aligned if we need it. */
5646 n = reg_offset % 8;
5647 if (need_aligned_p && n != 0)
5649 total_size += 8 - n;
5650 reg_offset += 8 - n;
5652 total_size += gp_reg_size + fp_reg_size;
5655 /* ??? This looks a little suspicious. Clarify. */
5656 if (total_size == extra_size)
5657 total_size = extra_size = 0;
5659 total_size = SPARC_STACK_ALIGN (total_size);
5661 /* Save other computed information. */
5662 current_frame_info.total_size = total_size;
5663 current_frame_info.var_size = var_size;
5664 current_frame_info.args_size = args_size;
5665 current_frame_info.extra_size = extra_size;
5666 current_frame_info.gp_reg_size = gp_reg_size;
5667 current_frame_info.fp_reg_size = fp_reg_size;
5668 current_frame_info.gmask = gmask;
5669 current_frame_info.fmask = fmask;
5670 current_frame_info.reg_offset = reg_offset;
5671 current_frame_info.initialized = reload_completed;
5673 /* Ok, we're done. */
5674 return total_size;
5677 /* Save/restore registers in GMASK and FMASK at register BASE_REG plus offset
5678 OFFSET.
5680 BASE_REG must be 8 byte aligned. This allows us to test OFFSET for
5681 appropriate alignment and use DOUBLEWORD_OP when we can. We assume
5682 [BASE_REG+OFFSET] will always be a valid address.
5684 WORD_OP is either "st" for save, "ld" for restore.
5685 DOUBLEWORD_OP is either "std" for save, "ldd" for restore. */
5687 void
5688 sparc_flat_save_restore (file, base_reg, offset, gmask, fmask, word_op,
5689 doubleword_op, base_offset)
5690 FILE *file;
5691 char *base_reg;
5692 unsigned int offset;
5693 unsigned long gmask;
5694 unsigned long fmask;
5695 char *word_op;
5696 char *doubleword_op;
5697 unsigned long base_offset;
5699 int regno;
5701 if (gmask == 0 && fmask == 0)
5702 return;
5704 /* Save registers starting from high to low. We've already saved the
5705 previous frame pointer and previous return address for the debugger's
5706 sake. The debugger allows us to not need a nop in the epilog if at least
5707 one register is reloaded in addition to return address. */
5709 if (gmask)
5711 for (regno = 1; regno <= 31; regno++)
5713 if ((gmask & (1L << regno)) != 0)
5715 if ((regno & 0x1) == 0 && ((gmask & (1L << (regno+1))) != 0))
5717 /* We can save two registers in a row. If we're not at a
5718 double word boundary, move to one.
5719 sparc_flat_compute_frame_size ensures there's room to do
5720 this. */
5721 if (offset % 8 != 0)
5722 offset += UNITS_PER_WORD;
5724 if (word_op[0] == 's')
5726 fprintf (file, "\t%s\t%s, [%s+%d]\n",
5727 doubleword_op, reg_names[regno],
5728 base_reg, offset);
5729 if (dwarf2out_do_frame ())
5731 char *l = dwarf2out_cfi_label ();
5732 dwarf2out_reg_save (l, regno, offset + base_offset);
5733 dwarf2out_reg_save
5734 (l, regno+1, offset+base_offset + UNITS_PER_WORD);
5737 else
5738 fprintf (file, "\t%s\t[%s+%d], %s\n",
5739 doubleword_op, base_reg, offset,
5740 reg_names[regno]);
5742 offset += 2 * UNITS_PER_WORD;
5743 regno++;
5745 else
5747 if (word_op[0] == 's')
5749 fprintf (file, "\t%s\t%s, [%s+%d]\n",
5750 word_op, reg_names[regno],
5751 base_reg, offset);
5752 if (dwarf2out_do_frame ())
5753 dwarf2out_reg_save ("", regno, offset + base_offset);
5755 else
5756 fprintf (file, "\t%s\t[%s+%d], %s\n",
5757 word_op, base_reg, offset, reg_names[regno]);
5759 offset += UNITS_PER_WORD;
5765 if (fmask)
5767 for (regno = 32; regno <= 63; regno++)
5769 if ((fmask & (1L << (regno - 32))) != 0)
5771 if (word_op[0] == 's')
5773 fprintf (file, "\t%s\t%s, [%s+%d]\n",
5774 word_op, reg_names[regno],
5775 base_reg, offset);
5776 if (dwarf2out_do_frame ())
5777 dwarf2out_reg_save ("", regno, offset + base_offset);
5779 else
5780 fprintf (file, "\t%s\t[%s+%d], %s\n",
5781 word_op, base_reg, offset, reg_names[regno]);
5783 offset += UNITS_PER_WORD;
5789 /* Set up the stack and frame (if desired) for the function. */
5791 void
5792 sparc_flat_output_function_prologue (file, size)
5793 FILE *file;
5794 int size;
5796 char *sp_str = reg_names[STACK_POINTER_REGNUM];
5797 unsigned long gmask = current_frame_info.gmask;
5799 /* This is only for the human reader. */
5800 fprintf (file, "\t%s#PROLOGUE# 0\n", ASM_COMMENT_START);
5801 fprintf (file, "\t%s# vars= %ld, regs= %d/%d, args= %d, extra= %ld\n",
5802 ASM_COMMENT_START,
5803 current_frame_info.var_size,
5804 current_frame_info.gp_reg_size / 4,
5805 current_frame_info.fp_reg_size / 4,
5806 current_function_outgoing_args_size,
5807 current_frame_info.extra_size);
5809 size = SPARC_STACK_ALIGN (size);
5810 size = (! current_frame_info.initialized
5811 ? sparc_flat_compute_frame_size (size)
5812 : current_frame_info.total_size);
5814 /* These cases shouldn't happen. Catch them now. */
5815 if (size == 0 && (gmask || current_frame_info.fmask))
5816 abort ();
5818 /* Allocate our stack frame by decrementing %sp.
5819 At present, the only algorithm gdb can use to determine if this is a
5820 flat frame is if we always set %i7 if we set %sp. This can be optimized
5821 in the future by putting in some sort of debugging information that says
5822 this is a `flat' function. However, there is still the case of debugging
5823 code without such debugging information (including cases where most fns
5824 have such info, but there is one that doesn't). So, always do this now
5825 so we don't get a lot of code out there that gdb can't handle.
5826 If the frame pointer isn't needn't then that's ok - gdb won't be able to
5827 distinguish us from a non-flat function but there won't (and shouldn't)
5828 be any differences anyway. The return pc is saved (if necessary) right
5829 after %i7 so gdb won't have to look too far to find it. */
5830 if (size > 0)
5832 unsigned int reg_offset = current_frame_info.reg_offset;
5833 char *fp_str = reg_names[FRAME_POINTER_REGNUM];
5834 char *t1_str = "%g1";
5836 /* Things get a little tricky if local variables take up more than ~4096
5837 bytes and outgoing arguments take up more than ~4096 bytes. When that
5838 happens, the register save area can't be accessed from either end of
5839 the frame. Handle this by decrementing %sp to the start of the gp
5840 register save area, save the regs, update %i7, and then set %sp to its
5841 final value. Given that we only have one scratch register to play
5842 with it is the cheapest solution, and it helps gdb out as it won't
5843 slow down recognition of flat functions.
5844 Don't change the order of insns emitted here without checking with
5845 the gdb folk first. */
5847 /* Is the entire register save area offsettable from %sp? */
5848 if (reg_offset < 4096 - 64 * UNITS_PER_WORD)
5850 if (size <= 4096)
5852 fprintf (file, "\tadd\t%s, %d, %s\n",
5853 sp_str, -size, sp_str);
5854 if (gmask & FRAME_POINTER_MASK)
5856 fprintf (file, "\tst\t%s, [%s+%d]\n",
5857 fp_str, sp_str, reg_offset);
5858 fprintf (file, "\tsub\t%s, %d, %s\t%s# set up frame pointer\n",
5859 sp_str, -size, fp_str, ASM_COMMENT_START);
5860 reg_offset += 4;
5863 else
5865 fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
5866 size, t1_str, sp_str, t1_str, sp_str);
5867 if (gmask & FRAME_POINTER_MASK)
5869 fprintf (file, "\tst\t%s, [%s+%d]\n",
5870 fp_str, sp_str, reg_offset);
5871 fprintf (file, "\tadd\t%s, %s, %s\t%s# set up frame pointer\n",
5872 sp_str, t1_str, fp_str, ASM_COMMENT_START);
5873 reg_offset += 4;
5876 if (dwarf2out_do_frame ())
5878 char *l = dwarf2out_cfi_label ();
5879 if (gmask & FRAME_POINTER_MASK)
5881 dwarf2out_reg_save (l, FRAME_POINTER_REGNUM,
5882 reg_offset - 4 - size);
5883 dwarf2out_def_cfa (l, FRAME_POINTER_REGNUM, 0);
5885 else
5886 dwarf2out_def_cfa (l, STACK_POINTER_REGNUM, size);
5888 if (gmask & RETURN_ADDR_MASK)
5890 fprintf (file, "\tst\t%s, [%s+%d]\n",
5891 reg_names[RETURN_ADDR_REGNUM], sp_str, reg_offset);
5892 if (dwarf2out_do_frame ())
5893 dwarf2out_return_save ("", reg_offset - size);
5894 reg_offset += 4;
5896 sparc_flat_save_restore (file, sp_str, reg_offset,
5897 gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
5898 current_frame_info.fmask,
5899 "st", "std", -size);
5901 else
5903 /* Subtract %sp in two steps, but make sure there is always a
5904 64 byte register save area, and %sp is properly aligned. */
5905 /* Amount to decrement %sp by, the first time. */
5906 unsigned int size1 = ((size - reg_offset + 64) + 15) & -16;
5907 /* Offset to register save area from %sp. */
5908 unsigned int offset = size1 - (size - reg_offset);
5910 if (size1 <= 4096)
5912 fprintf (file, "\tadd\t%s, %d, %s\n",
5913 sp_str, -size1, sp_str);
5914 if (gmask & FRAME_POINTER_MASK)
5916 fprintf (file, "\tst\t%s, [%s+%d]\n\tsub\t%s, %d, %s\t%s# set up frame pointer\n",
5917 fp_str, sp_str, offset, sp_str, -size1, fp_str,
5918 ASM_COMMENT_START);
5919 offset += 4;
5922 else
5924 fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
5925 size1, t1_str, sp_str, t1_str, sp_str);
5926 if (gmask & FRAME_POINTER_MASK)
5928 fprintf (file, "\tst\t%s, [%s+%d]\n\tadd\t%s, %s, %s\t%s# set up frame pointer\n",
5929 fp_str, sp_str, offset, sp_str, t1_str, fp_str,
5930 ASM_COMMENT_START);
5931 offset += 4;
5934 if (dwarf2out_do_frame ())
5936 char *l = dwarf2out_cfi_label ();
5937 if (gmask & FRAME_POINTER_MASK)
5939 dwarf2out_reg_save (l, FRAME_POINTER_REGNUM,
5940 offset - 4 - size1);
5941 dwarf2out_def_cfa (l, FRAME_POINTER_REGNUM, 0);
5943 else
5944 dwarf2out_def_cfa (l, STACK_POINTER_REGNUM, size1);
5946 if (gmask & RETURN_ADDR_MASK)
5948 fprintf (file, "\tst\t%s, [%s+%d]\n",
5949 reg_names[RETURN_ADDR_REGNUM], sp_str, offset);
5950 if (dwarf2out_do_frame ())
5951 /* offset - size1 == reg_offset - size
5952 if reg_offset were updated above like offset. */
5953 dwarf2out_return_save ("", offset - size1);
5954 offset += 4;
5956 sparc_flat_save_restore (file, sp_str, offset,
5957 gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
5958 current_frame_info.fmask,
5959 "st", "std", -size1);
5960 fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
5961 size - size1, t1_str, sp_str, t1_str, sp_str);
5962 if (dwarf2out_do_frame ())
5963 if (! (gmask & FRAME_POINTER_MASK))
5964 dwarf2out_def_cfa ("", STACK_POINTER_REGNUM, size);
5968 fprintf (file, "\t%s#PROLOGUE# 1\n", ASM_COMMENT_START);
5971 /* Do any necessary cleanup after a function to restore stack, frame,
5972 and regs. */
5974 void
5975 sparc_flat_output_function_epilogue (file, size)
5976 FILE *file;
5977 int size;
5979 rtx epilogue_delay = current_function_epilogue_delay_list;
5980 int noepilogue = FALSE;
5982 /* This is only for the human reader. */
5983 fprintf (file, "\t%s#EPILOGUE#\n", ASM_COMMENT_START);
5985 /* The epilogue does not depend on any registers, but the stack
5986 registers, so we assume that if we have 1 pending nop, it can be
5987 ignored, and 2 it must be filled (2 nops occur for integer
5988 multiply and divide). */
5990 size = SPARC_STACK_ALIGN (size);
5991 size = (!current_frame_info.initialized
5992 ? sparc_flat_compute_frame_size (size)
5993 : current_frame_info.total_size);
5995 if (size == 0 && epilogue_delay == 0)
5997 rtx insn = get_last_insn ();
5999 /* If the last insn was a BARRIER, we don't have to write any code
6000 because a jump (aka return) was put there. */
6001 if (GET_CODE (insn) == NOTE)
6002 insn = prev_nonnote_insn (insn);
6003 if (insn && GET_CODE (insn) == BARRIER)
6004 noepilogue = TRUE;
6007 if (!noepilogue)
6009 unsigned int reg_offset = current_frame_info.reg_offset;
6010 unsigned int size1;
6011 char *sp_str = reg_names[STACK_POINTER_REGNUM];
6012 char *fp_str = reg_names[FRAME_POINTER_REGNUM];
6013 char *t1_str = "%g1";
6015 /* In the reload sequence, we don't need to fill the load delay
6016 slots for most of the loads, also see if we can fill the final
6017 delay slot if not otherwise filled by the reload sequence. */
6019 if (size > 4095)
6020 fprintf (file, "\tset\t%d, %s\n", size, t1_str);
6022 if (frame_pointer_needed)
6024 if (size > 4095)
6025 fprintf (file,"\tsub\t%s, %s, %s\t\t%s# sp not trusted here\n",
6026 fp_str, t1_str, sp_str, ASM_COMMENT_START);
6027 else
6028 fprintf (file,"\tsub\t%s, %d, %s\t\t%s# sp not trusted here\n",
6029 fp_str, size, sp_str, ASM_COMMENT_START);
6032 /* Is the entire register save area offsettable from %sp? */
6033 if (reg_offset < 4096 - 64 * UNITS_PER_WORD)
6035 size1 = 0;
6037 else
6039 /* Restore %sp in two steps, but make sure there is always a
6040 64 byte register save area, and %sp is properly aligned. */
6041 /* Amount to increment %sp by, the first time. */
6042 size1 = ((reg_offset - 64 - 16) + 15) & -16;
6043 /* Offset to register save area from %sp. */
6044 reg_offset = size1 - reg_offset;
6046 fprintf (file, "\tset\t%d, %s\n\tadd\t%s, %s, %s\n",
6047 size1, t1_str, sp_str, t1_str, sp_str);
6050 /* We must restore the frame pointer and return address reg first
6051 because they are treated specially by the prologue output code. */
6052 if (current_frame_info.gmask & FRAME_POINTER_MASK)
6054 fprintf (file, "\tld\t[%s+%d], %s\n",
6055 sp_str, reg_offset, fp_str);
6056 reg_offset += 4;
6058 if (current_frame_info.gmask & RETURN_ADDR_MASK)
6060 fprintf (file, "\tld\t[%s+%d], %s\n",
6061 sp_str, reg_offset, reg_names[RETURN_ADDR_REGNUM]);
6062 reg_offset += 4;
6065 /* Restore any remaining saved registers. */
6066 sparc_flat_save_restore (file, sp_str, reg_offset,
6067 current_frame_info.gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
6068 current_frame_info.fmask,
6069 "ld", "ldd", 0);
6071 /* If we had to increment %sp in two steps, record it so the second
6072 restoration in the epilogue finishes up. */
6073 if (size1 > 0)
6075 size -= size1;
6076 if (size > 4095)
6077 fprintf (file, "\tset\t%d, %s\n",
6078 size, t1_str);
6081 if (current_function_returns_struct)
6082 fprintf (file, "\tjmp\t%%o7+12\n");
6083 else
6084 fprintf (file, "\tretl\n");
6086 /* If the only register saved is the return address, we need a
6087 nop, unless we have an instruction to put into it. Otherwise
6088 we don't since reloading multiple registers doesn't reference
6089 the register being loaded. */
6091 if (epilogue_delay)
6093 if (size)
6094 abort ();
6095 final_scan_insn (XEXP (epilogue_delay, 0), file, 1, -2, 1);
6098 else if (size > 4095)
6099 fprintf (file, "\tadd\t%s, %s, %s\n", sp_str, t1_str, sp_str);
6101 else if (size > 0)
6102 fprintf (file, "\tadd\t%s, %d, %s\n", sp_str, size, sp_str);
6104 else
6105 fprintf (file, "\tnop\n");
6108 /* Reset state info for each function. */
6109 current_frame_info = zero_frame_info;
6111 sparc_output_deferred_case_vectors ();
6114 /* Define the number of delay slots needed for the function epilogue.
6116 On the sparc, we need a slot if either no stack has been allocated,
6117 or the only register saved is the return register. */
6120 sparc_flat_epilogue_delay_slots ()
6122 if (!current_frame_info.initialized)
6123 (void) sparc_flat_compute_frame_size (get_frame_size ());
6125 if (current_frame_info.total_size == 0)
6126 return 1;
6128 return 0;
6131 /* Return true is TRIAL is a valid insn for the epilogue delay slot.
6132 Any single length instruction which doesn't reference the stack or frame
6133 pointer is OK. */
6136 sparc_flat_eligible_for_epilogue_delay (trial, slot)
6137 rtx trial;
6138 int slot ATTRIBUTE_UNUSED;
6140 rtx pat = PATTERN (trial);
6142 if (get_attr_length (trial) != 1)
6143 return 0;
6145 /* If %g0 is live, there are lots of things we can't handle.
6146 Rather than trying to find them all now, let's punt and only
6147 optimize things as necessary. */
6148 if (TARGET_LIVE_G0)
6149 return 0;
6151 if (! reg_mentioned_p (stack_pointer_rtx, pat)
6152 && ! reg_mentioned_p (frame_pointer_rtx, pat))
6153 return 1;
6155 return 0;
6158 /* Adjust the cost of a scheduling dependency. Return the new cost of
6159 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
6162 supersparc_adjust_cost (insn, link, dep_insn, cost)
6163 rtx insn;
6164 rtx link;
6165 rtx dep_insn;
6166 int cost;
6168 enum attr_type insn_type;
6170 if (! recog_memoized (insn))
6171 return 0;
6173 insn_type = get_attr_type (insn);
6175 if (REG_NOTE_KIND (link) == 0)
6177 /* Data dependency; DEP_INSN writes a register that INSN reads some
6178 cycles later. */
6180 /* if a load, then the dependence must be on the memory address;
6181 add an extra "cycle". Note that the cost could be two cycles
6182 if the reg was written late in an instruction group; we ca not tell
6183 here. */
6184 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
6185 return cost + 3;
6187 /* Get the delay only if the address of the store is the dependence. */
6188 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
6190 rtx pat = PATTERN(insn);
6191 rtx dep_pat = PATTERN (dep_insn);
6193 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
6194 return cost; /* This should not happen! */
6196 /* The dependency between the two instructions was on the data that
6197 is being stored. Assume that this implies that the address of the
6198 store is not dependent. */
6199 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
6200 return cost;
6202 return cost + 3; /* An approximation. */
6205 /* A shift instruction cannot receive its data from an instruction
6206 in the same cycle; add a one cycle penalty. */
6207 if (insn_type == TYPE_SHIFT)
6208 return cost + 3; /* Split before cascade into shift. */
6210 else
6212 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
6213 INSN writes some cycles later. */
6215 /* These are only significant for the fpu unit; writing a fp reg before
6216 the fpu has finished with it stalls the processor. */
6218 /* Reusing an integer register causes no problems. */
6219 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
6220 return 0;
6223 return cost;
6226 /* This describes the state of the UltraSPARC pipeline during
6227 instruction scheduling. */
6229 #define TMASK(__x) ((unsigned)1 << ((int)(__x)))
6230 #define UMASK(__x) ((unsigned)1 << ((int)(__x)))
6232 enum ultra_code { NONE=0, /* no insn at all */
6233 IEU0, /* shifts and conditional moves */
6234 IEU1, /* condition code setting insns, calls+jumps */
6235 IEUN, /* all other single cycle ieu insns */
6236 LSU, /* loads and stores */
6237 CTI, /* branches */
6238 FPM, /* FPU pipeline 1, multiplies and divides */
6239 FPA, /* FPU pipeline 2, all other operations */
6240 SINGLE, /* single issue instructions */
6241 NUM_ULTRA_CODES };
6243 static char *ultra_code_names[NUM_ULTRA_CODES] = {
6244 "NONE", "IEU0", "IEU1", "IEUN", "LSU", "CTI",
6245 "FPM", "FPA", "SINGLE" };
6247 struct ultrasparc_pipeline_state {
6248 /* The insns in this group. */
6249 rtx group[4];
6251 /* The code for each insn. */
6252 enum ultra_code codes[4];
6254 /* Which insns in this group have been committed by the
6255 scheduler. This is how we determine how many more
6256 can issue this cycle. */
6257 char commit[4];
6259 /* How many insns in this group. */
6260 char group_size;
6262 /* Mask of free slots still in this group. */
6263 char free_slot_mask;
6265 /* The slotter uses the following to determine what other
6266 insn types can still make their way into this group. */
6267 char contents [NUM_ULTRA_CODES];
6268 char num_ieu_insns;
6271 #define ULTRA_NUM_HIST 8
6272 static struct ultrasparc_pipeline_state ultra_pipe_hist[ULTRA_NUM_HIST];
6273 static int ultra_cur_hist;
6274 static int ultra_cycles_elapsed;
6276 #define ultra_pipe (ultra_pipe_hist[ultra_cur_hist])
6278 /* Given TYPE_MASK compute the ultra_code it has. */
6279 static enum ultra_code
6280 ultra_code_from_mask (type_mask)
6281 int type_mask;
6283 if (type_mask & (TMASK (TYPE_SHIFT) | TMASK (TYPE_CMOVE)))
6284 return IEU0;
6285 else if (type_mask & (TMASK (TYPE_COMPARE) |
6286 TMASK (TYPE_CALL) |
6287 TMASK (TYPE_UNCOND_BRANCH)))
6288 return IEU1;
6289 else if (type_mask & (TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
6290 TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY)))
6291 return IEUN;
6292 else if (type_mask & (TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
6293 TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
6294 TMASK (TYPE_FPSTORE)))
6295 return LSU;
6296 else if (type_mask & (TMASK (TYPE_FPMUL) | TMASK (TYPE_FPDIVS) |
6297 TMASK (TYPE_FPDIVD) | TMASK (TYPE_FPSQRT)))
6298 return FPM;
6299 else if (type_mask & (TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
6300 TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)))
6301 return FPA;
6302 else if (type_mask & TMASK (TYPE_BRANCH))
6303 return CTI;
6305 return SINGLE;
6308 /* Check INSN (a conditional move) and make sure that it's
6309 results are available at this cycle. Return 1 if the
6310 results are in fact ready. */
6311 static int
6312 ultra_cmove_results_ready_p (insn)
6313 rtx insn;
6315 struct ultrasparc_pipeline_state *up;
6316 int entry, slot;
6318 /* If this got dispatched in the previous
6319 group, the results are not ready. */
6320 entry = (ultra_cur_hist - 1) % (ULTRA_NUM_HIST - 1);
6321 up = &ultra_pipe_hist[entry];
6322 slot = 4;
6323 while (--slot >= 0)
6324 if (up->group[slot] == insn)
6325 return 0;
6327 return 1;
6330 /* Walk backwards in pipeline history looking for FPU
6331 operations which use a mode different than FPMODE and
6332 will create a stall if an insn using FPMODE were to be
6333 dispatched this cycle. */
6334 static int
6335 ultra_fpmode_conflict_exists (fpmode)
6336 enum machine_mode fpmode;
6338 int hist_ent;
6339 int hist_lim;
6341 hist_ent = (ultra_cur_hist - 1) % (ULTRA_NUM_HIST - 1);
6342 if (ultra_cycles_elapsed < 4)
6343 hist_lim = ultra_cycles_elapsed;
6344 else
6345 hist_lim = 4;
6346 while (hist_lim > 0)
6348 struct ultrasparc_pipeline_state *up = &ultra_pipe_hist[hist_ent];
6349 int slot = 4;
6351 while (--slot >= 0)
6353 rtx insn = up->group[slot];
6354 enum machine_mode this_mode;
6355 rtx pat;
6357 if (! insn
6358 || GET_CODE (insn) != INSN
6359 || (pat = PATTERN (insn)) == 0
6360 || GET_CODE (pat) != SET)
6361 continue;
6363 this_mode = GET_MODE (SET_DEST (pat));
6364 if ((this_mode != SFmode
6365 && this_mode != DFmode)
6366 || this_mode == fpmode)
6367 continue;
6369 /* If it is not FMOV, FABS, FNEG, FDIV, or FSQRT then
6370 we will get a stall. Loads and stores are independant
6371 of these rules. */
6372 if (GET_CODE (SET_SRC (pat)) != ABS
6373 && GET_CODE (SET_SRC (pat)) != NEG
6374 && ((TMASK (get_attr_type (insn)) &
6375 (TMASK (TYPE_FPDIVS) | TMASK (TYPE_FPDIVD) |
6376 TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPSQRT) |
6377 TMASK (TYPE_LOAD) | TMASK (TYPE_STORE))) == 0))
6378 return 1;
6380 hist_lim--;
6381 hist_ent = (hist_ent - 1) % (ULTRA_NUM_HIST - 1);
6384 /* No conflicts, safe to dispatch. */
6385 return 0;
6388 /* Find an instruction in LIST which has one of the
6389 type attributes enumerated in TYPE_MASK. START
6390 says where to begin the search.
6392 NOTE: This scheme depends upon the fact that we
6393 have less than 32 distinct type attributes. */
6395 static int ultra_types_avail;
6397 static rtx *
6398 ultra_find_type (type_mask, list, start)
6399 int type_mask;
6400 rtx *list;
6401 int start;
6403 int i;
6405 /* Short circuit if no such insn exists in the ready
6406 at the moment. */
6407 if ((type_mask & ultra_types_avail) == 0)
6408 return 0;
6410 for (i = start; i >= 0; i--)
6412 rtx insn = list[i];
6414 if (recog_memoized (insn) >= 0
6415 && (TMASK(get_attr_type (insn)) & type_mask))
6417 enum machine_mode fpmode = SFmode;
6418 rtx pat = 0;
6419 int slot;
6420 int check_depend = 0;
6421 int check_fpmode_conflict = 0;
6423 if (GET_CODE (insn) == INSN
6424 && (pat = PATTERN(insn)) != 0
6425 && GET_CODE (pat) == SET
6426 && !(type_mask & (TMASK (TYPE_STORE) |
6427 TMASK (TYPE_FPSTORE))))
6429 check_depend = 1;
6430 if (GET_MODE (SET_DEST (pat)) == SFmode
6431 || GET_MODE (SET_DEST (pat)) == DFmode)
6433 fpmode = GET_MODE (SET_DEST (pat));
6434 check_fpmode_conflict = 1;
6438 slot = 4;
6439 while(--slot >= 0)
6441 rtx slot_insn = ultra_pipe.group[slot];
6442 rtx slot_pat;
6444 /* Already issued, bad dependency, or FPU
6445 mode conflict. */
6446 if (slot_insn != 0
6447 && (slot_pat = PATTERN (slot_insn)) != 0
6448 && ((insn == slot_insn)
6449 || (check_depend == 1
6450 && GET_CODE (slot_insn) == INSN
6451 && GET_CODE (slot_pat) == SET
6452 && ((GET_CODE (SET_DEST (slot_pat)) == REG
6453 && GET_CODE (SET_SRC (pat)) == REG
6454 && REGNO (SET_DEST (slot_pat)) ==
6455 REGNO (SET_SRC (pat)))
6456 || (GET_CODE (SET_DEST (slot_pat)) == SUBREG
6457 && GET_CODE (SET_SRC (pat)) == SUBREG
6458 && REGNO (SUBREG_REG (SET_DEST (slot_pat))) ==
6459 REGNO (SUBREG_REG (SET_SRC (pat)))
6460 && SUBREG_WORD (SET_DEST (slot_pat)) ==
6461 SUBREG_WORD (SET_SRC (pat)))))
6462 || (check_fpmode_conflict == 1
6463 && GET_CODE (slot_insn) == INSN
6464 && GET_CODE (slot_pat) == SET
6465 && (GET_MODE (SET_DEST (slot_pat)) == SFmode
6466 || GET_MODE (SET_DEST (slot_pat)) == DFmode)
6467 && GET_MODE (SET_DEST (slot_pat)) != fpmode)))
6468 goto next;
6471 /* Check for peculiar result availability and dispatch
6472 interference situations. */
6473 if (pat != 0
6474 && ultra_cycles_elapsed > 0)
6476 rtx link;
6478 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6480 rtx link_insn = XEXP (link, 0);
6481 if (GET_CODE (link_insn) == INSN
6482 && recog_memoized (link_insn) >= 0
6483 && (TMASK (get_attr_type (link_insn)) &
6484 (TMASK (TYPE_CMOVE) | TMASK (TYPE_FPCMOVE)))
6485 && ! ultra_cmove_results_ready_p (link_insn))
6486 goto next;
6489 if (check_fpmode_conflict
6490 && ultra_fpmode_conflict_exists (fpmode))
6491 goto next;
6494 return &list[i];
6496 next:
6499 return 0;
6502 static void
6503 ultra_build_types_avail (ready, n_ready)
6504 rtx *ready;
6505 int n_ready;
6507 int i = n_ready - 1;
6509 ultra_types_avail = 0;
6510 while(i >= 0)
6512 rtx insn = ready[i];
6514 if (recog_memoized (insn) >= 0)
6515 ultra_types_avail |= TMASK (get_attr_type (insn));
6517 i -= 1;
6521 /* Place insn pointed to my IP into the pipeline.
6522 Make element THIS of READY be that insn if it
6523 is not already. TYPE indicates the pipeline class
6524 this insn falls into. */
6525 static void
6526 ultra_schedule_insn (ip, ready, this, type)
6527 rtx *ip;
6528 rtx *ready;
6529 int this;
6530 enum ultra_code type;
6532 int pipe_slot;
6533 char mask = ultra_pipe.free_slot_mask;
6535 /* Obtain free slot. */
6536 for (pipe_slot = 0; pipe_slot < 4; pipe_slot++)
6537 if ((mask & (1 << pipe_slot)) != 0)
6538 break;
6539 if (pipe_slot == 4)
6540 abort ();
6542 /* In it goes, and it hasn't been committed yet. */
6543 ultra_pipe.group[pipe_slot] = *ip;
6544 ultra_pipe.codes[pipe_slot] = type;
6545 ultra_pipe.contents[type] = 1;
6546 if (UMASK (type) &
6547 (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
6548 ultra_pipe.num_ieu_insns += 1;
6550 ultra_pipe.free_slot_mask = (mask & ~(1 << pipe_slot));
6551 ultra_pipe.group_size += 1;
6552 ultra_pipe.commit[pipe_slot] = 0;
6554 /* Update ready list. */
6555 if (ip != &ready[this])
6557 rtx temp = *ip;
6559 *ip = ready[this];
6560 ready[this] = temp;
6564 /* Advance to the next pipeline group. */
6565 static void
6566 ultra_flush_pipeline ()
6568 ultra_cur_hist = (ultra_cur_hist + 1) % (ULTRA_NUM_HIST - 1);
6569 ultra_cycles_elapsed += 1;
6570 bzero ((char *) &ultra_pipe, sizeof ultra_pipe);
6571 ultra_pipe.free_slot_mask = 0xf;
6574 static int ultra_reorder_called_this_block;
6576 /* Init our data structures for this current block. */
6577 void
6578 ultrasparc_sched_init (dump, sched_verbose)
6579 FILE *dump ATTRIBUTE_UNUSED;
6580 int sched_verbose ATTRIBUTE_UNUSED;
6582 bzero ((char *) ultra_pipe_hist, sizeof ultra_pipe_hist);
6583 ultra_cur_hist = 0;
6584 ultra_cycles_elapsed = 0;
6585 ultra_reorder_called_this_block = 0;
6586 ultra_pipe.free_slot_mask = 0xf;
6589 /* INSN has been scheduled, update pipeline commit state
6590 and return how many instructions are still to be
6591 scheduled in this group. */
6593 ultrasparc_variable_issue (insn)
6594 rtx insn;
6596 struct ultrasparc_pipeline_state *up = &ultra_pipe;
6597 int i, left_to_fire;
6599 left_to_fire = 0;
6600 for (i = 0; i < 4; i++)
6602 if (up->group[i] == 0)
6603 continue;
6605 if (up->group[i] == insn)
6607 up->commit[i] = 1;
6609 else if (! up->commit[i])
6610 left_to_fire++;
6613 return left_to_fire;
6616 /* In actual_hazard_this_instance, we may have yanked some
6617 instructions from the ready list due to conflict cost
6618 adjustments. If so, and such an insn was in our pipeline
6619 group, remove it and update state. */
6620 static void
6621 ultra_rescan_pipeline_state (ready, n_ready)
6622 rtx *ready;
6623 int n_ready;
6625 struct ultrasparc_pipeline_state *up = &ultra_pipe;
6626 int i;
6628 for (i = 0; i < 4; i++)
6630 rtx insn = up->group[i];
6631 int j;
6633 if (! insn)
6634 continue;
6636 /* If it has been committed, then it was removed from
6637 the ready list because it was actually scheduled,
6638 and that is not the case we are searching for here. */
6639 if (up->commit[i] != 0)
6640 continue;
6642 for (j = n_ready - 1; j >= 0; j--)
6643 if (ready[j] == insn)
6644 break;
6646 /* If we didn't find it, toss it. */
6647 if (j < 0)
6649 enum ultra_code ucode = up->codes[i];
6651 up->group[i] = 0;
6652 up->codes[i] = NONE;
6653 up->contents[ucode] = 0;
6654 if (UMASK (ucode) &
6655 (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
6656 up->num_ieu_insns -= 1;
6658 up->free_slot_mask |= (1 << i);
6659 up->group_size -= 1;
6660 up->commit[i] = 0;
6665 void
6666 ultrasparc_sched_reorder (dump, sched_verbose, ready, n_ready)
6667 FILE *dump;
6668 int sched_verbose;
6669 rtx *ready;
6670 int n_ready;
6672 struct ultrasparc_pipeline_state *up = &ultra_pipe;
6673 int i, this_insn;
6675 /* We get called once unnecessarily per block of insns
6676 scheduled. */
6677 if (ultra_reorder_called_this_block == 0)
6679 ultra_reorder_called_this_block = 1;
6680 return;
6683 if (sched_verbose)
6685 int n;
6687 fprintf (dump, "\n;;\tUltraSPARC Looking at [");
6688 for (n = n_ready - 1; n >= 0; n--)
6690 rtx insn = ready[n];
6691 enum ultra_code ucode;
6693 if (recog_memoized (insn) < 0)
6694 continue;
6695 ucode = ultra_code_from_mask (TMASK (get_attr_type (insn)));
6696 if (n != 0)
6697 fprintf (dump, "%s(%d) ",
6698 ultra_code_names[ucode],
6699 INSN_UID (insn));
6700 else
6701 fprintf (dump, "%s(%d)",
6702 ultra_code_names[ucode],
6703 INSN_UID (insn));
6705 fprintf (dump, "]\n");
6708 this_insn = n_ready - 1;
6710 /* Skip over junk we don't understand. */
6711 while ((this_insn >= 0)
6712 && recog_memoized (ready[this_insn]) < 0)
6713 this_insn--;
6715 ultra_build_types_avail (ready, this_insn + 1);
6717 while (this_insn >= 0) {
6718 int old_group_size = up->group_size;
6720 if (up->group_size != 0)
6722 int num_committed;
6724 num_committed = (up->commit[0] + up->commit[1] +
6725 up->commit[2] + up->commit[3]);
6726 /* If nothing has been commited from our group, or all of
6727 them have. Clear out the (current cycle's) pipeline
6728 state and start afresh. */
6729 if (num_committed == 0
6730 || num_committed == up->group_size)
6732 ultra_flush_pipeline ();
6733 up = &ultra_pipe;
6734 old_group_size = 0;
6736 else
6738 /* OK, some ready list insns got requeued and thus removed
6739 from the ready list. Account for this fact. */
6740 ultra_rescan_pipeline_state (ready, n_ready);
6742 /* Something "changed", make this look like a newly
6743 formed group so the code at the end of the loop
6744 knows that progress was in fact made. */
6745 if (up->group_size != old_group_size)
6746 old_group_size = 0;
6750 if (up->group_size == 0)
6752 /* If the pipeline is (still) empty and we have any single
6753 group insns, get them out now as this is a good time. */
6754 rtx *ip = ultra_find_type ((TMASK (TYPE_RETURN) | TMASK (TYPE_ADDRESS) |
6755 TMASK (TYPE_IMUL) | TMASK (TYPE_CMOVE) |
6756 TMASK (TYPE_MULTI) | TMASK (TYPE_MISC)),
6757 ready, this_insn);
6758 if (ip)
6760 ultra_schedule_insn (ip, ready, this_insn, SINGLE);
6761 break;
6764 /* If we are not in the process of emptying out the pipe, try to
6765 obtain an instruction which must be the first in it's group. */
6766 ip = ultra_find_type ((TMASK (TYPE_CALL) |
6767 TMASK (TYPE_CALL_NO_DELAY_SLOT) |
6768 TMASK (TYPE_UNCOND_BRANCH)),
6769 ready, this_insn);
6770 if (ip)
6772 ultra_schedule_insn (ip, ready, this_insn, IEU1);
6773 this_insn--;
6775 else if ((ip = ultra_find_type ((TMASK (TYPE_FPDIVS) |
6776 TMASK (TYPE_FPDIVD) |
6777 TMASK (TYPE_FPSQRT)),
6778 ready, this_insn)) != 0)
6780 ultra_schedule_insn (ip, ready, this_insn, FPM);
6781 this_insn--;
6785 /* Try to fill the integer pipeline. First, look for an IEU0 specific
6786 operation. We can't do more IEU operations if the first 3 slots are
6787 all full or we have dispatched two IEU insns already. */
6788 if ((up->free_slot_mask & 0x7) != 0
6789 && up->num_ieu_insns < 2
6790 && up->contents[IEU0] == 0
6791 && up->contents[IEUN] == 0)
6793 rtx *ip = ultra_find_type (TMASK(TYPE_SHIFT), ready, this_insn);
6794 if (ip)
6796 ultra_schedule_insn (ip, ready, this_insn, IEU0);
6797 this_insn--;
6801 /* If we can, try to find an IEU1 specific or an unnamed
6802 IEU instruction. */
6803 if ((up->free_slot_mask & 0x7) != 0
6804 && up->num_ieu_insns < 2)
6806 rtx *ip = ultra_find_type ((TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
6807 TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY) |
6808 (up->contents[IEU1] == 0 ? TMASK (TYPE_COMPARE) : 0)),
6809 ready, this_insn);
6810 if (ip)
6812 rtx insn = *ip;
6814 ultra_schedule_insn (ip, ready, this_insn,
6815 (!up->contents[IEU1]
6816 && get_attr_type (insn) == TYPE_COMPARE)
6817 ? IEU1 : IEUN);
6818 this_insn--;
6822 /* If only one IEU insn has been found, try to find another unnamed
6823 IEU operation or an IEU1 specific one. */
6824 if ((up->free_slot_mask & 0x7) != 0
6825 && up->num_ieu_insns < 2)
6827 rtx *ip;
6828 int tmask = (TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
6829 TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY));
6831 if (!up->contents[IEU1])
6832 tmask |= TMASK (TYPE_COMPARE);
6833 ip = ultra_find_type (tmask, ready, this_insn);
6834 if (ip)
6836 rtx insn = *ip;
6838 ultra_schedule_insn (ip, ready, this_insn,
6839 (!up->contents[IEU1]
6840 && get_attr_type (insn) == TYPE_COMPARE)
6841 ? IEU1 : IEUN);
6842 this_insn--;
6846 /* Try for a load or store, but such an insn can only be issued
6847 if it is within' one of the first 3 slots. */
6848 if ((up->free_slot_mask & 0x7) != 0
6849 && up->contents[LSU] == 0)
6851 rtx *ip = ultra_find_type ((TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
6852 TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
6853 TMASK (TYPE_FPSTORE)), ready, this_insn);
6854 if (ip)
6856 ultra_schedule_insn (ip, ready, this_insn, LSU);
6857 this_insn--;
6861 /* Now find FPU operations, first FPM class. But not divisions or
6862 square-roots because those will break the group up. Unlike all
6863 the previous types, these can go in any slot. */
6864 if (up->free_slot_mask != 0
6865 && up->contents[FPM] == 0)
6867 rtx *ip = ultra_find_type (TMASK (TYPE_FPMUL), ready, this_insn);
6868 if (ip)
6870 ultra_schedule_insn (ip, ready, this_insn, FPM);
6871 this_insn--;
6875 /* Continue on with FPA class if we have not filled the group already. */
6876 if (up->free_slot_mask != 0
6877 && up->contents[FPA] == 0)
6879 rtx *ip = ultra_find_type ((TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
6880 TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)),
6881 ready, this_insn);
6882 if (ip)
6884 ultra_schedule_insn (ip, ready, this_insn, FPA);
6885 this_insn--;
6889 /* Finally, maybe stick a branch in here. */
6890 if (up->free_slot_mask != 0
6891 && up->contents[CTI] == 0)
6893 rtx *ip = ultra_find_type (TMASK (TYPE_BRANCH), ready, this_insn);
6895 /* Try to slip in a branch only if it is one of the
6896 next 2 in the ready list. */
6897 if (ip && ((&ready[this_insn] - ip) < 2))
6899 ultra_schedule_insn (ip, ready, this_insn, CTI);
6900 this_insn--;
6904 up->group_size = 0;
6905 for (i = 0; i < 4; i++)
6906 if ((up->free_slot_mask & (1 << i)) == 0)
6907 up->group_size++;
6909 /* See if we made any progress... */
6910 if (old_group_size != up->group_size)
6911 break;
6913 /* Clean out the (current cycle's) pipeline state
6914 and try once more. If we placed no instructions
6915 into the pipeline at all, it means a real hard
6916 conflict exists with some earlier issued instruction
6917 so we must advance to the next cycle to clear it up. */
6918 if (up->group_size == 0)
6920 ultra_flush_pipeline ();
6921 up = &ultra_pipe;
6923 else
6925 bzero ((char *) &ultra_pipe, sizeof ultra_pipe);
6926 ultra_pipe.free_slot_mask = 0xf;
6930 if (sched_verbose)
6932 int n, gsize;
6934 fprintf (dump, ";;\tUltraSPARC Launched [");
6935 gsize = up->group_size;
6936 for (n = 0; n < 4; n++)
6938 rtx insn = up->group[n];
6940 if (! insn)
6941 continue;
6943 gsize -= 1;
6944 if (gsize != 0)
6945 fprintf (dump, "%s(%d) ",
6946 ultra_code_names[up->codes[n]],
6947 INSN_UID (insn));
6948 else
6949 fprintf (dump, "%s(%d)",
6950 ultra_code_names[up->codes[n]],
6951 INSN_UID (insn));
6953 fprintf (dump, "]\n");
6958 ultrasparc_adjust_cost (insn, link, dep_insn, cost)
6959 rtx insn;
6960 rtx link;
6961 rtx dep_insn;
6962 int cost;
6964 enum attr_type insn_type, dep_type;
6965 rtx pat = PATTERN(insn);
6966 rtx dep_pat = PATTERN (dep_insn);
6968 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
6969 return cost;
6971 insn_type = get_attr_type (insn);
6972 dep_type = get_attr_type (dep_insn);
6974 /* Nothing issues in parallel with integer multiplies, so
6975 mark as zero cost since the scheduler can not do anything
6976 about it. */
6977 if (insn_type == TYPE_IMUL)
6978 return 0;
6980 #define SLOW_FP(dep_type) \
6981 (dep_type == TYPE_FPSQRT || dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)
6983 switch (REG_NOTE_KIND (link))
6985 case 0:
6986 /* Data dependency; DEP_INSN writes a register that INSN reads some
6987 cycles later. */
6989 if (dep_type == TYPE_CMOVE)
6991 /* Instructions that read the result of conditional moves cannot
6992 be in the same group or the following group. */
6993 return cost + 1;
6996 switch (insn_type)
6998 /* UltraSPARC can dual issue a store and an instruction setting
6999 the value stored, except for divide and square root. */
7000 case TYPE_FPSTORE:
7001 if (! SLOW_FP (dep_type))
7002 return 0;
7003 return cost;
7005 case TYPE_STORE:
7006 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
7007 return cost;
7009 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
7010 /* The dependency between the two instructions is on the data
7011 that is being stored. Assume that the address of the store
7012 is not also dependent. */
7013 return 0;
7014 return cost;
7016 case TYPE_LOAD:
7017 case TYPE_SLOAD:
7018 case TYPE_FPLOAD:
7019 /* A load does not return data until at least 11 cycles after
7020 a store to the same location. 3 cycles are accounted for
7021 in the load latency; add the other 8 here. */
7022 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
7024 /* If the addresses are not equal this may be a false
7025 dependency because pointer aliasing could not be
7026 determined. Add only 2 cycles in that case. 2 is
7027 an arbitrary compromise between 8, which would cause
7028 the scheduler to generate worse code elsewhere to
7029 compensate for a dependency which might not really
7030 exist, and 0. */
7031 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
7032 || GET_CODE (SET_SRC (pat)) != MEM
7033 || GET_CODE (SET_DEST (dep_pat)) != MEM
7034 || ! rtx_equal_p (XEXP (SET_SRC (pat), 0),
7035 XEXP (SET_DEST (dep_pat), 0)))
7036 return cost + 2;
7038 return cost + 8;
7040 return cost;
7042 case TYPE_BRANCH:
7043 /* Compare to branch latency is 0. There is no benefit from
7044 separating compare and branch. */
7045 if (dep_type == TYPE_COMPARE)
7046 return 0;
7047 /* Floating point compare to branch latency is less than
7048 compare to conditional move. */
7049 if (dep_type == TYPE_FPCMP)
7050 return cost - 1;
7051 return cost;
7053 case TYPE_FPCMOVE:
7054 /* FMOVR class instructions can not issue in the same cycle
7055 or the cycle after an instruction which writes any
7056 integer register. Model this as cost 2 for dependent
7057 instructions. */
7058 if ((dep_type == TYPE_IALU || dep_type == TYPE_UNARY
7059 || dep_type == TYPE_BINARY)
7060 && cost < 2)
7061 return 2;
7062 /* Otherwise check as for integer conditional moves. */
7064 case TYPE_CMOVE:
7065 /* Conditional moves involving integer registers wait until
7066 3 cycles after loads return data. The interlock applies
7067 to all loads, not just dependent loads, but that is hard
7068 to model. */
7069 if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)
7070 return cost + 3;
7071 return cost;
7073 default:
7074 break;
7076 break;
7078 case REG_DEP_ANTI:
7079 /* Divide and square root lock destination registers for full latency. */
7080 if (! SLOW_FP (dep_type))
7081 return 0;
7082 break;
7084 case REG_DEP_OUTPUT:
7085 /* IEU and FPU instruction that have the same destination
7086 register cannot be grouped together. */
7087 return cost + 1;
7089 default:
7090 break;
7093 /* Other costs not accounted for:
7094 - Single precision floating point loads lock the other half of
7095 the even/odd register pair.
7096 - Several hazards associated with ldd/std are ignored because these
7097 instructions are rarely generated for V9.
7098 - The floating point pipeline can not have both a single and double
7099 precision operation active at the same time. Format conversions
7100 and graphics instructions are given honorary double precision status.
7101 - call and jmpl are always the first instruction in a group. */
7103 return cost;
7106 int
7107 sparc_issue_rate ()
7109 switch (sparc_cpu)
7111 default:
7112 return 1;
7113 case PROCESSOR_V9:
7114 /* Assume V9 processors are capable of at least dual-issue. */
7115 return 2;
7116 case PROCESSOR_SUPERSPARC:
7117 return 3;
7118 case PROCESSOR_ULTRASPARC:
7119 return 4;
7123 static int
7124 set_extends(x, insn)
7125 rtx x, insn;
7127 register rtx pat = PATTERN (insn);
7129 switch (GET_CODE (SET_SRC (pat)))
7131 /* Load and some shift instructions zero extend. */
7132 case MEM:
7133 case ZERO_EXTEND:
7134 /* sethi clears the high bits */
7135 case HIGH:
7136 /* LO_SUM is used with sethi. sethi cleared the high
7137 bits and the values used with lo_sum are positive */
7138 case LO_SUM:
7139 /* Store flag stores 0 or 1 */
7140 case LT: case LTU:
7141 case GT: case GTU:
7142 case LE: case LEU:
7143 case GE: case GEU:
7144 case EQ:
7145 case NE:
7146 return 1;
7147 case AND:
7149 rtx op1 = XEXP (SET_SRC (pat), 1);
7150 if (GET_CODE (op1) == CONST_INT)
7151 return INTVAL (op1) >= 0;
7152 if (GET_CODE (XEXP (SET_SRC (pat), 0)) == REG
7153 && sparc_check_64 (XEXP (SET_SRC (pat), 0), insn) == 1)
7154 return 1;
7155 if (GET_CODE (op1) == REG
7156 && sparc_check_64 ((op1), insn) == 1)
7157 return 1;
7159 case ASHIFT:
7160 case LSHIFTRT:
7161 return GET_MODE (SET_SRC (pat)) == SImode;
7162 /* Positive integers leave the high bits zero. */
7163 case CONST_DOUBLE:
7164 return ! (CONST_DOUBLE_LOW (x) & 0x80000000);
7165 case CONST_INT:
7166 return ! (INTVAL (x) & 0x80000000);
7167 case ASHIFTRT:
7168 case SIGN_EXTEND:
7169 return - (GET_MODE (SET_SRC (pat)) == SImode);
7170 default:
7171 return 0;
7175 /* We _ought_ to have only one kind per function, but... */
7176 static rtx sparc_addr_diff_list;
7177 static rtx sparc_addr_list;
7179 void
7180 sparc_defer_case_vector (lab, vec, diff)
7181 rtx lab, vec;
7182 int diff;
7184 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
7185 if (diff)
7186 sparc_addr_diff_list
7187 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
7188 else
7189 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
7192 static void
7193 sparc_output_addr_vec (vec)
7194 rtx vec;
7196 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7197 int idx, vlen = XVECLEN (body, 0);
7199 #ifdef ASM_OUTPUT_ADDR_VEC_START
7200 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7201 #endif
7203 #ifdef ASM_OUTPUT_CASE_LABEL
7204 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7205 NEXT_INSN (lab));
7206 #else
7207 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7208 #endif
7210 for (idx = 0; idx < vlen; idx++)
7212 ASM_OUTPUT_ADDR_VEC_ELT
7213 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
7216 #ifdef ASM_OUTPUT_ADDR_VEC_END
7217 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7218 #endif
7221 static void
7222 sparc_output_addr_diff_vec (vec)
7223 rtx vec;
7225 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7226 rtx base = XEXP (XEXP (body, 0), 0);
7227 int idx, vlen = XVECLEN (body, 1);
7229 #ifdef ASM_OUTPUT_ADDR_VEC_START
7230 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7231 #endif
7233 #ifdef ASM_OUTPUT_CASE_LABEL
7234 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7235 NEXT_INSN (lab));
7236 #else
7237 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7238 #endif
7240 for (idx = 0; idx < vlen; idx++)
7242 ASM_OUTPUT_ADDR_DIFF_ELT
7243 (asm_out_file,
7244 body,
7245 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
7246 CODE_LABEL_NUMBER (base));
7249 #ifdef ASM_OUTPUT_ADDR_VEC_END
7250 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7251 #endif
7254 static void
7255 sparc_output_deferred_case_vectors ()
7257 rtx t;
7258 int align;
7260 if (sparc_addr_list == NULL_RTX
7261 && sparc_addr_diff_list == NULL_RTX)
7262 return;
7264 /* Align to cache line in the function's code section. */
7265 function_section (current_function_decl);
7267 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
7268 if (align > 0)
7269 ASM_OUTPUT_ALIGN (asm_out_file, align);
7271 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
7272 sparc_output_addr_vec (XEXP (t, 0));
7273 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
7274 sparc_output_addr_diff_vec (XEXP (t, 0));
7276 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
7279 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
7280 unknown. Return 1 if the high bits are zero, -1 if the register is
7281 sign extended. */
7283 sparc_check_64 (x, insn)
7284 rtx x, insn;
7286 /* If a register is set only once it is safe to ignore insns this
7287 code does not know how to handle. The loop will either recognize
7288 the single set and return the correct value or fail to recognize
7289 it and return 0. */
7290 int set_once = 0;
7292 if (GET_CODE (x) == REG
7293 && flag_expensive_optimizations
7294 && REG_N_SETS (REGNO (x)) == 1)
7295 set_once = 1;
7297 if (insn == 0)
7299 if (set_once)
7300 insn = get_last_insn_anywhere ();
7301 else
7302 return 0;
7305 while ((insn = PREV_INSN (insn)))
7307 switch (GET_CODE (insn))
7309 case JUMP_INSN:
7310 case NOTE:
7311 break;
7312 case CODE_LABEL:
7313 case CALL_INSN:
7314 default:
7315 if (! set_once)
7316 return 0;
7317 break;
7318 case INSN:
7320 rtx pat = PATTERN (insn);
7321 if (GET_CODE (pat) != SET)
7322 return 0;
7323 if (rtx_equal_p (x, SET_DEST (pat)))
7324 return set_extends (x, insn);
7325 if (reg_overlap_mentioned_p (SET_DEST (pat), x))
7326 return 0;
7330 return 0;
7333 char *
7334 sparc_v8plus_shift (operands, insn, opcode)
7335 rtx *operands;
7336 rtx insn;
7337 char *opcode;
7339 static char asm_code[60];
7341 if (GET_CODE (operands[3]) == SCRATCH)
7342 operands[3] = operands[0];
7343 if (GET_CODE (operands[1]) == CONST_INT)
7345 output_asm_insn ("mov %1,%3", operands);
7347 else
7349 output_asm_insn ("sllx %H1,32,%3", operands);
7350 if (sparc_check_64 (operands[1], insn) <= 0)
7351 output_asm_insn ("srl %L1,0,%L1", operands);
7352 output_asm_insn ("or %L1,%3,%3", operands);
7355 strcpy(asm_code, opcode);
7356 if (which_alternative != 2)
7357 return strcat (asm_code, " %0,%2,%L0\n\tsrlx %L0,32,%H0");
7358 else
7359 return strcat (asm_code, " %3,%2,%3\n\tsrlx %3,32,%H0\n\tmov %3,%L0");
7363 /* Return 1 if DEST and SRC reference only global and in registers. */
7366 sparc_return_peephole_ok (dest, src)
7367 rtx dest, src;
7369 if (! TARGET_V9)
7370 return 0;
7371 if (leaf_function)
7372 return 0;
7373 if (GET_CODE (src) != CONST_INT
7374 && (GET_CODE (src) != REG || ! IN_OR_GLOBAL_P (src)))
7375 return 0;
7376 return IN_OR_GLOBAL_P (dest);