* config/arc/arc.c: Include "df.h".
[official-gcc.git] / gcc / config / sparc / sparc.c
blobaf67a46eaf798ab52a2f626774a0d444036555e6
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5 Contributed by Michael Tiemann (tiemann@cygnus.com)
6 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
7 at Cygnus Support.
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 3, or (at your option)
14 any later version.
16 GCC is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "tree.h"
30 #include "rtl.h"
31 #include "regs.h"
32 #include "hard-reg-set.h"
33 #include "real.h"
34 #include "insn-config.h"
35 #include "insn-codes.h"
36 #include "conditions.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "recog.h"
44 #include "toplev.h"
45 #include "ggc.h"
46 #include "tm_p.h"
47 #include "debug.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "cfglayout.h"
51 #include "gimple.h"
52 #include "langhooks.h"
53 #include "params.h"
54 #include "df.h"
56 /* Processor costs */
57 static const
58 struct processor_costs cypress_costs = {
59 COSTS_N_INSNS (2), /* int load */
60 COSTS_N_INSNS (2), /* int signed load */
61 COSTS_N_INSNS (2), /* int zeroed load */
62 COSTS_N_INSNS (2), /* float load */
63 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
64 COSTS_N_INSNS (5), /* fadd, fsub */
65 COSTS_N_INSNS (1), /* fcmp */
66 COSTS_N_INSNS (1), /* fmov, fmovr */
67 COSTS_N_INSNS (7), /* fmul */
68 COSTS_N_INSNS (37), /* fdivs */
69 COSTS_N_INSNS (37), /* fdivd */
70 COSTS_N_INSNS (63), /* fsqrts */
71 COSTS_N_INSNS (63), /* fsqrtd */
72 COSTS_N_INSNS (1), /* imul */
73 COSTS_N_INSNS (1), /* imulX */
74 0, /* imul bit factor */
75 COSTS_N_INSNS (1), /* idiv */
76 COSTS_N_INSNS (1), /* idivX */
77 COSTS_N_INSNS (1), /* movcc/movr */
78 0, /* shift penalty */
81 static const
82 struct processor_costs supersparc_costs = {
83 COSTS_N_INSNS (1), /* int load */
84 COSTS_N_INSNS (1), /* int signed load */
85 COSTS_N_INSNS (1), /* int zeroed load */
86 COSTS_N_INSNS (0), /* float load */
87 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
88 COSTS_N_INSNS (3), /* fadd, fsub */
89 COSTS_N_INSNS (3), /* fcmp */
90 COSTS_N_INSNS (1), /* fmov, fmovr */
91 COSTS_N_INSNS (3), /* fmul */
92 COSTS_N_INSNS (6), /* fdivs */
93 COSTS_N_INSNS (9), /* fdivd */
94 COSTS_N_INSNS (12), /* fsqrts */
95 COSTS_N_INSNS (12), /* fsqrtd */
96 COSTS_N_INSNS (4), /* imul */
97 COSTS_N_INSNS (4), /* imulX */
98 0, /* imul bit factor */
99 COSTS_N_INSNS (4), /* idiv */
100 COSTS_N_INSNS (4), /* idivX */
101 COSTS_N_INSNS (1), /* movcc/movr */
102 1, /* shift penalty */
105 static const
106 struct processor_costs hypersparc_costs = {
107 COSTS_N_INSNS (1), /* int load */
108 COSTS_N_INSNS (1), /* int signed load */
109 COSTS_N_INSNS (1), /* int zeroed load */
110 COSTS_N_INSNS (1), /* float load */
111 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
112 COSTS_N_INSNS (1), /* fadd, fsub */
113 COSTS_N_INSNS (1), /* fcmp */
114 COSTS_N_INSNS (1), /* fmov, fmovr */
115 COSTS_N_INSNS (1), /* fmul */
116 COSTS_N_INSNS (8), /* fdivs */
117 COSTS_N_INSNS (12), /* fdivd */
118 COSTS_N_INSNS (17), /* fsqrts */
119 COSTS_N_INSNS (17), /* fsqrtd */
120 COSTS_N_INSNS (17), /* imul */
121 COSTS_N_INSNS (17), /* imulX */
122 0, /* imul bit factor */
123 COSTS_N_INSNS (17), /* idiv */
124 COSTS_N_INSNS (17), /* idivX */
125 COSTS_N_INSNS (1), /* movcc/movr */
126 0, /* shift penalty */
129 static const
130 struct processor_costs sparclet_costs = {
131 COSTS_N_INSNS (3), /* int load */
132 COSTS_N_INSNS (3), /* int signed load */
133 COSTS_N_INSNS (1), /* int zeroed load */
134 COSTS_N_INSNS (1), /* float load */
135 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
136 COSTS_N_INSNS (1), /* fadd, fsub */
137 COSTS_N_INSNS (1), /* fcmp */
138 COSTS_N_INSNS (1), /* fmov, fmovr */
139 COSTS_N_INSNS (1), /* fmul */
140 COSTS_N_INSNS (1), /* fdivs */
141 COSTS_N_INSNS (1), /* fdivd */
142 COSTS_N_INSNS (1), /* fsqrts */
143 COSTS_N_INSNS (1), /* fsqrtd */
144 COSTS_N_INSNS (5), /* imul */
145 COSTS_N_INSNS (5), /* imulX */
146 0, /* imul bit factor */
147 COSTS_N_INSNS (5), /* idiv */
148 COSTS_N_INSNS (5), /* idivX */
149 COSTS_N_INSNS (1), /* movcc/movr */
150 0, /* shift penalty */
153 static const
154 struct processor_costs ultrasparc_costs = {
155 COSTS_N_INSNS (2), /* int load */
156 COSTS_N_INSNS (3), /* int signed load */
157 COSTS_N_INSNS (2), /* int zeroed load */
158 COSTS_N_INSNS (2), /* float load */
159 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
160 COSTS_N_INSNS (4), /* fadd, fsub */
161 COSTS_N_INSNS (1), /* fcmp */
162 COSTS_N_INSNS (2), /* fmov, fmovr */
163 COSTS_N_INSNS (4), /* fmul */
164 COSTS_N_INSNS (13), /* fdivs */
165 COSTS_N_INSNS (23), /* fdivd */
166 COSTS_N_INSNS (13), /* fsqrts */
167 COSTS_N_INSNS (23), /* fsqrtd */
168 COSTS_N_INSNS (4), /* imul */
169 COSTS_N_INSNS (4), /* imulX */
170 2, /* imul bit factor */
171 COSTS_N_INSNS (37), /* idiv */
172 COSTS_N_INSNS (68), /* idivX */
173 COSTS_N_INSNS (2), /* movcc/movr */
174 2, /* shift penalty */
177 static const
178 struct processor_costs ultrasparc3_costs = {
179 COSTS_N_INSNS (2), /* int load */
180 COSTS_N_INSNS (3), /* int signed load */
181 COSTS_N_INSNS (3), /* int zeroed load */
182 COSTS_N_INSNS (2), /* float load */
183 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
184 COSTS_N_INSNS (4), /* fadd, fsub */
185 COSTS_N_INSNS (5), /* fcmp */
186 COSTS_N_INSNS (3), /* fmov, fmovr */
187 COSTS_N_INSNS (4), /* fmul */
188 COSTS_N_INSNS (17), /* fdivs */
189 COSTS_N_INSNS (20), /* fdivd */
190 COSTS_N_INSNS (20), /* fsqrts */
191 COSTS_N_INSNS (29), /* fsqrtd */
192 COSTS_N_INSNS (6), /* imul */
193 COSTS_N_INSNS (6), /* imulX */
194 0, /* imul bit factor */
195 COSTS_N_INSNS (40), /* idiv */
196 COSTS_N_INSNS (71), /* idivX */
197 COSTS_N_INSNS (2), /* movcc/movr */
198 0, /* shift penalty */
201 static const
202 struct processor_costs niagara_costs = {
203 COSTS_N_INSNS (3), /* int load */
204 COSTS_N_INSNS (3), /* int signed load */
205 COSTS_N_INSNS (3), /* int zeroed load */
206 COSTS_N_INSNS (9), /* float load */
207 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
208 COSTS_N_INSNS (8), /* fadd, fsub */
209 COSTS_N_INSNS (26), /* fcmp */
210 COSTS_N_INSNS (8), /* fmov, fmovr */
211 COSTS_N_INSNS (29), /* fmul */
212 COSTS_N_INSNS (54), /* fdivs */
213 COSTS_N_INSNS (83), /* fdivd */
214 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
215 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
216 COSTS_N_INSNS (11), /* imul */
217 COSTS_N_INSNS (11), /* imulX */
218 0, /* imul bit factor */
219 COSTS_N_INSNS (72), /* idiv */
220 COSTS_N_INSNS (72), /* idivX */
221 COSTS_N_INSNS (1), /* movcc/movr */
222 0, /* shift penalty */
225 static const
226 struct processor_costs niagara2_costs = {
227 COSTS_N_INSNS (3), /* int load */
228 COSTS_N_INSNS (3), /* int signed load */
229 COSTS_N_INSNS (3), /* int zeroed load */
230 COSTS_N_INSNS (3), /* float load */
231 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
232 COSTS_N_INSNS (6), /* fadd, fsub */
233 COSTS_N_INSNS (6), /* fcmp */
234 COSTS_N_INSNS (6), /* fmov, fmovr */
235 COSTS_N_INSNS (6), /* fmul */
236 COSTS_N_INSNS (19), /* fdivs */
237 COSTS_N_INSNS (33), /* fdivd */
238 COSTS_N_INSNS (19), /* fsqrts */
239 COSTS_N_INSNS (33), /* fsqrtd */
240 COSTS_N_INSNS (5), /* imul */
241 COSTS_N_INSNS (5), /* imulX */
242 0, /* imul bit factor */
243 COSTS_N_INSNS (31), /* idiv, average of 12 - 41 cycle range */
244 COSTS_N_INSNS (31), /* idivX, average of 12 - 41 cycle range */
245 COSTS_N_INSNS (1), /* movcc/movr */
246 0, /* shift penalty */
249 const struct processor_costs *sparc_costs = &cypress_costs;
251 #ifdef HAVE_AS_RELAX_OPTION
252 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
253 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
254 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
255 somebody does not branch between the sethi and jmp. */
256 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
257 #else
258 #define LEAF_SIBCALL_SLOT_RESERVED_P \
259 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
260 #endif
262 /* Global variables for machine-dependent things. */
264 /* Size of frame. Need to know this to emit return insns from leaf procedures.
265 ACTUAL_FSIZE is set by sparc_compute_frame_size() which is called during the
266 reload pass. This is important as the value is later used for scheduling
267 (to see what can go in a delay slot).
268 APPARENT_FSIZE is the size of the stack less the register save area and less
269 the outgoing argument area. It is used when saving call preserved regs. */
270 static HOST_WIDE_INT apparent_fsize;
271 static HOST_WIDE_INT actual_fsize;
273 /* Number of live general or floating point registers needed to be
274 saved (as 4-byte quantities). */
275 static int num_gfregs;
277 /* The alias set for prologue/epilogue register save/restore. */
278 static GTY(()) alias_set_type sparc_sr_alias_set;
280 /* The alias set for the structure return value. */
281 static GTY(()) alias_set_type struct_value_alias_set;
283 /* Vector to say how input registers are mapped to output registers.
284 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
285 eliminate it. You must use -fomit-frame-pointer to get that. */
286 char leaf_reg_remap[] =
287 { 0, 1, 2, 3, 4, 5, 6, 7,
288 -1, -1, -1, -1, -1, -1, 14, -1,
289 -1, -1, -1, -1, -1, -1, -1, -1,
290 8, 9, 10, 11, 12, 13, -1, 15,
292 32, 33, 34, 35, 36, 37, 38, 39,
293 40, 41, 42, 43, 44, 45, 46, 47,
294 48, 49, 50, 51, 52, 53, 54, 55,
295 56, 57, 58, 59, 60, 61, 62, 63,
296 64, 65, 66, 67, 68, 69, 70, 71,
297 72, 73, 74, 75, 76, 77, 78, 79,
298 80, 81, 82, 83, 84, 85, 86, 87,
299 88, 89, 90, 91, 92, 93, 94, 95,
300 96, 97, 98, 99, 100};
302 /* Vector, indexed by hard register number, which contains 1
303 for a register that is allowable in a candidate for leaf
304 function treatment. */
305 char sparc_leaf_regs[] =
306 { 1, 1, 1, 1, 1, 1, 1, 1,
307 0, 0, 0, 0, 0, 0, 1, 0,
308 0, 0, 0, 0, 0, 0, 0, 0,
309 1, 1, 1, 1, 1, 1, 0, 1,
310 1, 1, 1, 1, 1, 1, 1, 1,
311 1, 1, 1, 1, 1, 1, 1, 1,
312 1, 1, 1, 1, 1, 1, 1, 1,
313 1, 1, 1, 1, 1, 1, 1, 1,
314 1, 1, 1, 1, 1, 1, 1, 1,
315 1, 1, 1, 1, 1, 1, 1, 1,
316 1, 1, 1, 1, 1, 1, 1, 1,
317 1, 1, 1, 1, 1, 1, 1, 1,
318 1, 1, 1, 1, 1};
320 struct GTY(()) machine_function
322 /* Some local-dynamic TLS symbol name. */
323 const char *some_ld_name;
325 /* True if the current function is leaf and uses only leaf regs,
326 so that the SPARC leaf function optimization can be applied.
327 Private version of current_function_uses_only_leaf_regs, see
328 sparc_expand_prologue for the rationale. */
329 int leaf_function_p;
331 /* True if the data calculated by sparc_expand_prologue are valid. */
332 bool prologue_data_valid_p;
335 #define sparc_leaf_function_p cfun->machine->leaf_function_p
336 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
338 /* Register we pretend to think the frame pointer is allocated to.
339 Normally, this is %fp, but if we are in a leaf procedure, this
340 is %sp+"something". We record "something" separately as it may
341 be too big for reg+constant addressing. */
342 static rtx frame_base_reg;
343 static HOST_WIDE_INT frame_base_offset;
345 /* 1 if the next opcode is to be specially indented. */
346 int sparc_indent_opcode = 0;
348 static bool sparc_handle_option (size_t, const char *, int);
349 static void sparc_init_modes (void);
350 static void scan_record_type (tree, int *, int *, int *);
351 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
352 tree, int, int, int *, int *);
354 static int supersparc_adjust_cost (rtx, rtx, rtx, int);
355 static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
357 static void sparc_output_addr_vec (rtx);
358 static void sparc_output_addr_diff_vec (rtx);
359 static void sparc_output_deferred_case_vectors (void);
360 static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
361 static rtx sparc_builtin_saveregs (void);
362 static int epilogue_renumber (rtx *, int);
363 static bool sparc_assemble_integer (rtx, unsigned int, int);
364 static int set_extends (rtx);
365 static void emit_pic_helper (void);
366 static void load_pic_register (bool);
367 static int save_or_restore_regs (int, int, rtx, int, int);
368 static void emit_save_or_restore_regs (int);
369 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
370 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
371 #ifdef OBJECT_FORMAT_ELF
372 static void sparc_elf_asm_named_section (const char *, unsigned int, tree);
373 #endif
375 static int sparc_adjust_cost (rtx, rtx, rtx, int);
376 static int sparc_issue_rate (void);
377 static void sparc_sched_init (FILE *, int, int);
378 static int sparc_use_sched_lookahead (void);
380 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
381 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
382 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
383 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
384 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
386 static bool sparc_function_ok_for_sibcall (tree, tree);
387 static void sparc_init_libfuncs (void);
388 static void sparc_init_builtins (void);
389 static void sparc_vis_init_builtins (void);
390 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
391 static tree sparc_fold_builtin (tree, tree, bool);
392 static int sparc_vis_mul8x16 (int, int);
393 static tree sparc_handle_vis_mul8x16 (int, tree, tree, tree);
394 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
395 HOST_WIDE_INT, tree);
396 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
397 HOST_WIDE_INT, const_tree);
398 static struct machine_function * sparc_init_machine_status (void);
399 static bool sparc_cannot_force_const_mem (rtx);
400 static rtx sparc_tls_get_addr (void);
401 static rtx sparc_tls_got (void);
402 static const char *get_some_local_dynamic_name (void);
403 static int get_some_local_dynamic_name_1 (rtx *, void *);
404 static bool sparc_rtx_costs (rtx, int, int, int *, bool);
405 static bool sparc_promote_prototypes (const_tree);
406 static rtx sparc_struct_value_rtx (tree, int);
407 static bool sparc_return_in_memory (const_tree, const_tree);
408 static bool sparc_strict_argument_naming (CUMULATIVE_ARGS *);
409 static void sparc_va_start (tree, rtx);
410 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
411 static bool sparc_vector_mode_supported_p (enum machine_mode);
412 static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
413 static bool sparc_pass_by_reference (CUMULATIVE_ARGS *,
414 enum machine_mode, const_tree, bool);
415 static int sparc_arg_partial_bytes (CUMULATIVE_ARGS *,
416 enum machine_mode, tree, bool);
417 static void sparc_dwarf_handle_frame_unspec (const char *, rtx, int);
418 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
419 static void sparc_file_end (void);
420 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
421 static const char *sparc_mangle_type (const_tree);
422 #endif
424 #ifdef SUBTARGET_ATTRIBUTE_TABLE
425 /* Table of valid machine attributes. */
426 static const struct attribute_spec sparc_attribute_table[] =
428 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
429 SUBTARGET_ATTRIBUTE_TABLE,
430 { NULL, 0, 0, false, false, false, NULL }
432 #endif
434 /* Option handling. */
436 /* Parsed value. */
437 enum cmodel sparc_cmodel;
439 char sparc_hard_reg_printed[8];
441 struct sparc_cpu_select sparc_select[] =
443 /* switch name, tune arch */
444 { (char *)0, "default", 1, 1 },
445 { (char *)0, "-mcpu=", 1, 1 },
446 { (char *)0, "-mtune=", 1, 0 },
447 { 0, 0, 0, 0 }
450 /* CPU type. This is set from TARGET_CPU_DEFAULT and -m{cpu,tune}=xxx. */
451 enum processor_type sparc_cpu;
453 /* Whether\fan FPU option was specified. */
454 static bool fpu_option_set = false;
456 /* Initialize the GCC target structure. */
458 /* The sparc default is to use .half rather than .short for aligned
459 HI objects. Use .word instead of .long on non-ELF systems. */
460 #undef TARGET_ASM_ALIGNED_HI_OP
461 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
462 #ifndef OBJECT_FORMAT_ELF
463 #undef TARGET_ASM_ALIGNED_SI_OP
464 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
465 #endif
467 #undef TARGET_ASM_UNALIGNED_HI_OP
468 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
469 #undef TARGET_ASM_UNALIGNED_SI_OP
470 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
471 #undef TARGET_ASM_UNALIGNED_DI_OP
472 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
474 /* The target hook has to handle DI-mode values. */
475 #undef TARGET_ASM_INTEGER
476 #define TARGET_ASM_INTEGER sparc_assemble_integer
478 #undef TARGET_ASM_FUNCTION_PROLOGUE
479 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
480 #undef TARGET_ASM_FUNCTION_EPILOGUE
481 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
483 #undef TARGET_SCHED_ADJUST_COST
484 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
485 #undef TARGET_SCHED_ISSUE_RATE
486 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
487 #undef TARGET_SCHED_INIT
488 #define TARGET_SCHED_INIT sparc_sched_init
489 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
490 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
492 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
493 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
495 #undef TARGET_INIT_LIBFUNCS
496 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
497 #undef TARGET_INIT_BUILTINS
498 #define TARGET_INIT_BUILTINS sparc_init_builtins
500 #undef TARGET_LEGITIMIZE_ADDRESS
501 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
503 #undef TARGET_EXPAND_BUILTIN
504 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
505 #undef TARGET_FOLD_BUILTIN
506 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
508 #if TARGET_TLS
509 #undef TARGET_HAVE_TLS
510 #define TARGET_HAVE_TLS true
511 #endif
513 #undef TARGET_CANNOT_FORCE_CONST_MEM
514 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
516 #undef TARGET_ASM_OUTPUT_MI_THUNK
517 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
518 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
519 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
521 #undef TARGET_RTX_COSTS
522 #define TARGET_RTX_COSTS sparc_rtx_costs
523 #undef TARGET_ADDRESS_COST
524 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
526 /* This is only needed for TARGET_ARCH64, but since PROMOTE_FUNCTION_MODE is a
527 no-op for TARGET_ARCH32 this is ok. Otherwise we'd need to add a runtime
528 test for this value. */
529 #undef TARGET_PROMOTE_FUNCTION_ARGS
530 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
532 /* This is only needed for TARGET_ARCH64, but since PROMOTE_FUNCTION_MODE is a
533 no-op for TARGET_ARCH32 this is ok. Otherwise we'd need to add a runtime
534 test for this value. */
535 #undef TARGET_PROMOTE_FUNCTION_RETURN
536 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
538 #undef TARGET_PROMOTE_PROTOTYPES
539 #define TARGET_PROMOTE_PROTOTYPES sparc_promote_prototypes
541 #undef TARGET_STRUCT_VALUE_RTX
542 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
543 #undef TARGET_RETURN_IN_MEMORY
544 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
545 #undef TARGET_MUST_PASS_IN_STACK
546 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
547 #undef TARGET_PASS_BY_REFERENCE
548 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
549 #undef TARGET_ARG_PARTIAL_BYTES
550 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
552 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
553 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
554 #undef TARGET_STRICT_ARGUMENT_NAMING
555 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
557 #undef TARGET_EXPAND_BUILTIN_VA_START
558 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
559 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
560 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
562 #undef TARGET_VECTOR_MODE_SUPPORTED_P
563 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
565 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
566 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC sparc_dwarf_handle_frame_unspec
568 #ifdef SUBTARGET_INSERT_ATTRIBUTES
569 #undef TARGET_INSERT_ATTRIBUTES
570 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
571 #endif
573 #ifdef SUBTARGET_ATTRIBUTE_TABLE
574 #undef TARGET_ATTRIBUTE_TABLE
575 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
576 #endif
578 #undef TARGET_RELAXED_ORDERING
579 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
581 #undef TARGET_DEFAULT_TARGET_FLAGS
582 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
583 #undef TARGET_HANDLE_OPTION
584 #define TARGET_HANDLE_OPTION sparc_handle_option
586 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
587 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
588 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
589 #endif
591 #undef TARGET_ASM_FILE_END
592 #define TARGET_ASM_FILE_END sparc_file_end
594 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
595 #undef TARGET_MANGLE_TYPE
596 #define TARGET_MANGLE_TYPE sparc_mangle_type
597 #endif
599 #undef TARGET_LEGITIMATE_ADDRESS_P
600 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
602 struct gcc_target targetm = TARGET_INITIALIZER;
604 /* Implement TARGET_HANDLE_OPTION. */
606 static bool
607 sparc_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
609 switch (code)
611 case OPT_mfpu:
612 case OPT_mhard_float:
613 case OPT_msoft_float:
614 fpu_option_set = true;
615 break;
617 case OPT_mcpu_:
618 sparc_select[1].string = arg;
619 break;
621 case OPT_mtune_:
622 sparc_select[2].string = arg;
623 break;
626 return true;
629 /* Validate and override various options, and do some machine dependent
630 initialization. */
632 void
633 sparc_override_options (void)
635 static struct code_model {
636 const char *const name;
637 const enum cmodel value;
638 } const cmodels[] = {
639 { "32", CM_32 },
640 { "medlow", CM_MEDLOW },
641 { "medmid", CM_MEDMID },
642 { "medany", CM_MEDANY },
643 { "embmedany", CM_EMBMEDANY },
644 { NULL, (enum cmodel) 0 }
646 const struct code_model *cmodel;
647 /* Map TARGET_CPU_DEFAULT to value for -m{arch,tune}=. */
648 static struct cpu_default {
649 const int cpu;
650 const char *const name;
651 } const cpu_default[] = {
652 /* There must be one entry here for each TARGET_CPU value. */
653 { TARGET_CPU_sparc, "cypress" },
654 { TARGET_CPU_sparclet, "tsc701" },
655 { TARGET_CPU_sparclite, "f930" },
656 { TARGET_CPU_v8, "v8" },
657 { TARGET_CPU_hypersparc, "hypersparc" },
658 { TARGET_CPU_sparclite86x, "sparclite86x" },
659 { TARGET_CPU_supersparc, "supersparc" },
660 { TARGET_CPU_v9, "v9" },
661 { TARGET_CPU_ultrasparc, "ultrasparc" },
662 { TARGET_CPU_ultrasparc3, "ultrasparc3" },
663 { TARGET_CPU_niagara, "niagara" },
664 { TARGET_CPU_niagara2, "niagara2" },
665 { 0, 0 }
667 const struct cpu_default *def;
668 /* Table of values for -m{cpu,tune}=. */
669 static struct cpu_table {
670 const char *const name;
671 const enum processor_type processor;
672 const int disable;
673 const int enable;
674 } const cpu_table[] = {
675 { "v7", PROCESSOR_V7, MASK_ISA, 0 },
676 { "cypress", PROCESSOR_CYPRESS, MASK_ISA, 0 },
677 { "v8", PROCESSOR_V8, MASK_ISA, MASK_V8 },
678 /* TI TMS390Z55 supersparc */
679 { "supersparc", PROCESSOR_SUPERSPARC, MASK_ISA, MASK_V8 },
680 { "sparclite", PROCESSOR_SPARCLITE, MASK_ISA, MASK_SPARCLITE },
681 /* The Fujitsu MB86930 is the original sparclite chip, with no fpu.
682 The Fujitsu MB86934 is the recent sparclite chip, with an fpu. */
683 { "f930", PROCESSOR_F930, MASK_ISA|MASK_FPU, MASK_SPARCLITE },
684 { "f934", PROCESSOR_F934, MASK_ISA, MASK_SPARCLITE|MASK_FPU },
685 { "hypersparc", PROCESSOR_HYPERSPARC, MASK_ISA, MASK_V8|MASK_FPU },
686 { "sparclite86x", PROCESSOR_SPARCLITE86X, MASK_ISA|MASK_FPU,
687 MASK_SPARCLITE },
688 { "sparclet", PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
689 /* TEMIC sparclet */
690 { "tsc701", PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
691 { "v9", PROCESSOR_V9, MASK_ISA, MASK_V9 },
692 /* TI ultrasparc I, II, IIi */
693 { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9
694 /* Although insns using %y are deprecated, it is a clear win on current
695 ultrasparcs. */
696 |MASK_DEPRECATED_V8_INSNS},
697 /* TI ultrasparc III */
698 /* ??? Check if %y issue still holds true in ultra3. */
699 { "ultrasparc3", PROCESSOR_ULTRASPARC3, MASK_ISA, MASK_V9|MASK_DEPRECATED_V8_INSNS},
700 /* UltraSPARC T1 */
701 { "niagara", PROCESSOR_NIAGARA, MASK_ISA, MASK_V9|MASK_DEPRECATED_V8_INSNS},
702 { "niagara2", PROCESSOR_NIAGARA, MASK_ISA, MASK_V9},
703 { 0, (enum processor_type) 0, 0, 0 }
705 const struct cpu_table *cpu;
706 const struct sparc_cpu_select *sel;
707 int fpu;
709 #ifndef SPARC_BI_ARCH
710 /* Check for unsupported architecture size. */
711 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
712 error ("%s is not supported by this configuration",
713 DEFAULT_ARCH32_P ? "-m64" : "-m32");
714 #endif
716 /* We force all 64bit archs to use 128 bit long double */
717 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
719 error ("-mlong-double-64 not allowed with -m64");
720 target_flags |= MASK_LONG_DOUBLE_128;
723 /* Code model selection. */
724 sparc_cmodel = SPARC_DEFAULT_CMODEL;
726 #ifdef SPARC_BI_ARCH
727 if (TARGET_ARCH32)
728 sparc_cmodel = CM_32;
729 #endif
731 if (sparc_cmodel_string != NULL)
733 if (TARGET_ARCH64)
735 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
736 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
737 break;
738 if (cmodel->name == NULL)
739 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
740 else
741 sparc_cmodel = cmodel->value;
743 else
744 error ("-mcmodel= is not supported on 32 bit systems");
747 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
749 /* Set the default CPU. */
750 for (def = &cpu_default[0]; def->name; ++def)
751 if (def->cpu == TARGET_CPU_DEFAULT)
752 break;
753 gcc_assert (def->name);
754 sparc_select[0].string = def->name;
756 for (sel = &sparc_select[0]; sel->name; ++sel)
758 if (sel->string)
760 for (cpu = &cpu_table[0]; cpu->name; ++cpu)
761 if (! strcmp (sel->string, cpu->name))
763 if (sel->set_tune_p)
764 sparc_cpu = cpu->processor;
766 if (sel->set_arch_p)
768 target_flags &= ~cpu->disable;
769 target_flags |= cpu->enable;
771 break;
774 if (! cpu->name)
775 error ("bad value (%s) for %s switch", sel->string, sel->name);
779 /* If -mfpu or -mno-fpu was explicitly used, don't override with
780 the processor default. */
781 if (fpu_option_set)
782 target_flags = (target_flags & ~MASK_FPU) | fpu;
784 /* Don't allow -mvis if FPU is disabled. */
785 if (! TARGET_FPU)
786 target_flags &= ~MASK_VIS;
788 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
789 are available.
790 -m64 also implies v9. */
791 if (TARGET_VIS || TARGET_ARCH64)
793 target_flags |= MASK_V9;
794 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
797 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
798 if (TARGET_V9 && TARGET_ARCH32)
799 target_flags |= MASK_DEPRECATED_V8_INSNS;
801 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
802 if (! TARGET_V9 || TARGET_ARCH64)
803 target_flags &= ~MASK_V8PLUS;
805 /* Don't use stack biasing in 32 bit mode. */
806 if (TARGET_ARCH32)
807 target_flags &= ~MASK_STACK_BIAS;
809 /* Supply a default value for align_functions. */
810 if (align_functions == 0
811 && (sparc_cpu == PROCESSOR_ULTRASPARC
812 || sparc_cpu == PROCESSOR_ULTRASPARC3
813 || sparc_cpu == PROCESSOR_NIAGARA
814 || sparc_cpu == PROCESSOR_NIAGARA2))
815 align_functions = 32;
817 /* Validate PCC_STRUCT_RETURN. */
818 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
819 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
821 /* Only use .uaxword when compiling for a 64-bit target. */
822 if (!TARGET_ARCH64)
823 targetm.asm_out.unaligned_op.di = NULL;
825 /* Do various machine dependent initializations. */
826 sparc_init_modes ();
828 /* Acquire unique alias sets for our private stuff. */
829 sparc_sr_alias_set = new_alias_set ();
830 struct_value_alias_set = new_alias_set ();
832 /* Set up function hooks. */
833 init_machine_status = sparc_init_machine_status;
835 switch (sparc_cpu)
837 case PROCESSOR_V7:
838 case PROCESSOR_CYPRESS:
839 sparc_costs = &cypress_costs;
840 break;
841 case PROCESSOR_V8:
842 case PROCESSOR_SPARCLITE:
843 case PROCESSOR_SUPERSPARC:
844 sparc_costs = &supersparc_costs;
845 break;
846 case PROCESSOR_F930:
847 case PROCESSOR_F934:
848 case PROCESSOR_HYPERSPARC:
849 case PROCESSOR_SPARCLITE86X:
850 sparc_costs = &hypersparc_costs;
851 break;
852 case PROCESSOR_SPARCLET:
853 case PROCESSOR_TSC701:
854 sparc_costs = &sparclet_costs;
855 break;
856 case PROCESSOR_V9:
857 case PROCESSOR_ULTRASPARC:
858 sparc_costs = &ultrasparc_costs;
859 break;
860 case PROCESSOR_ULTRASPARC3:
861 sparc_costs = &ultrasparc3_costs;
862 break;
863 case PROCESSOR_NIAGARA:
864 sparc_costs = &niagara_costs;
865 break;
866 case PROCESSOR_NIAGARA2:
867 sparc_costs = &niagara2_costs;
868 break;
871 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
872 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
873 target_flags |= MASK_LONG_DOUBLE_128;
874 #endif
876 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
877 set_param_value ("simultaneous-prefetches",
878 ((sparc_cpu == PROCESSOR_ULTRASPARC
879 || sparc_cpu == PROCESSOR_NIAGARA
880 || sparc_cpu == PROCESSOR_NIAGARA2)
882 : (sparc_cpu == PROCESSOR_ULTRASPARC3
883 ? 8 : 3)));
884 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
885 set_param_value ("l1-cache-line-size",
886 ((sparc_cpu == PROCESSOR_ULTRASPARC
887 || sparc_cpu == PROCESSOR_ULTRASPARC3
888 || sparc_cpu == PROCESSOR_NIAGARA
889 || sparc_cpu == PROCESSOR_NIAGARA2)
890 ? 64 : 32));
893 /* Miscellaneous utilities. */
895 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
896 or branch on register contents instructions. */
899 v9_regcmp_p (enum rtx_code code)
901 return (code == EQ || code == NE || code == GE || code == LT
902 || code == LE || code == GT);
905 /* Nonzero if OP is a floating point constant which can
906 be loaded into an integer register using a single
907 sethi instruction. */
910 fp_sethi_p (rtx op)
912 if (GET_CODE (op) == CONST_DOUBLE)
914 REAL_VALUE_TYPE r;
915 long i;
917 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
918 REAL_VALUE_TO_TARGET_SINGLE (r, i);
919 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
922 return 0;
925 /* Nonzero if OP is a floating point constant which can
926 be loaded into an integer register using a single
927 mov instruction. */
930 fp_mov_p (rtx op)
932 if (GET_CODE (op) == CONST_DOUBLE)
934 REAL_VALUE_TYPE r;
935 long i;
937 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
938 REAL_VALUE_TO_TARGET_SINGLE (r, i);
939 return SPARC_SIMM13_P (i);
942 return 0;
945 /* Nonzero if OP is a floating point constant which can
946 be loaded into an integer register using a high/losum
947 instruction sequence. */
950 fp_high_losum_p (rtx op)
952 /* The constraints calling this should only be in
953 SFmode move insns, so any constant which cannot
954 be moved using a single insn will do. */
955 if (GET_CODE (op) == CONST_DOUBLE)
957 REAL_VALUE_TYPE r;
958 long i;
960 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
961 REAL_VALUE_TO_TARGET_SINGLE (r, i);
962 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
965 return 0;
968 /* Expand a move instruction. Return true if all work is done. */
970 bool
971 sparc_expand_move (enum machine_mode mode, rtx *operands)
973 /* Handle sets of MEM first. */
974 if (GET_CODE (operands[0]) == MEM)
976 /* 0 is a register (or a pair of registers) on SPARC. */
977 if (register_or_zero_operand (operands[1], mode))
978 return false;
980 if (!reload_in_progress)
982 operands[0] = validize_mem (operands[0]);
983 operands[1] = force_reg (mode, operands[1]);
987 /* Fixup TLS cases. */
988 if (TARGET_HAVE_TLS
989 && CONSTANT_P (operands[1])
990 && GET_CODE (operands[1]) != HIGH
991 && sparc_tls_referenced_p (operands [1]))
993 rtx sym = operands[1];
994 rtx addend = NULL;
996 if (GET_CODE (sym) == CONST && GET_CODE (XEXP (sym, 0)) == PLUS)
998 addend = XEXP (XEXP (sym, 0), 1);
999 sym = XEXP (XEXP (sym, 0), 0);
1002 gcc_assert (SPARC_SYMBOL_REF_TLS_P (sym));
1004 sym = legitimize_tls_address (sym);
1005 if (addend)
1007 sym = gen_rtx_PLUS (mode, sym, addend);
1008 sym = force_operand (sym, operands[0]);
1010 operands[1] = sym;
1013 /* Fixup PIC cases. */
1014 if (flag_pic && CONSTANT_P (operands[1]))
1016 if (pic_address_needs_scratch (operands[1]))
1017 operands[1] = legitimize_pic_address (operands[1], mode, 0);
1019 /* VxWorks does not impose a fixed gap between segments; the run-time
1020 gap can be different from the object-file gap. We therefore can't
1021 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1022 are absolutely sure that X is in the same segment as the GOT.
1023 Unfortunately, the flexibility of linker scripts means that we
1024 can't be sure of that in general, so assume that _G_O_T_-relative
1025 accesses are never valid on VxWorks. */
1026 if (GET_CODE (operands[1]) == LABEL_REF && !TARGET_VXWORKS_RTP)
1028 if (mode == SImode)
1030 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1031 return true;
1034 if (mode == DImode)
1036 gcc_assert (TARGET_ARCH64);
1037 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1038 return true;
1042 if (symbolic_operand (operands[1], mode))
1044 operands[1] = legitimize_pic_address (operands[1],
1045 mode,
1046 (reload_in_progress ?
1047 operands[0] :
1048 NULL_RTX));
1049 return false;
1053 /* If we are trying to toss an integer constant into FP registers,
1054 or loading a FP or vector constant, force it into memory. */
1055 if (CONSTANT_P (operands[1])
1056 && REG_P (operands[0])
1057 && (SPARC_FP_REG_P (REGNO (operands[0]))
1058 || SCALAR_FLOAT_MODE_P (mode)
1059 || VECTOR_MODE_P (mode)))
1061 /* emit_group_store will send such bogosity to us when it is
1062 not storing directly into memory. So fix this up to avoid
1063 crashes in output_constant_pool. */
1064 if (operands [1] == const0_rtx)
1065 operands[1] = CONST0_RTX (mode);
1067 /* We can clear FP registers if TARGET_VIS, and always other regs. */
1068 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1069 && const_zero_operand (operands[1], mode))
1070 return false;
1072 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1073 /* We are able to build any SF constant in integer registers
1074 with at most 2 instructions. */
1075 && (mode == SFmode
1076 /* And any DF constant in integer registers. */
1077 || (mode == DFmode
1078 && (reload_completed || reload_in_progress))))
1079 return false;
1081 operands[1] = force_const_mem (mode, operands[1]);
1082 if (!reload_in_progress)
1083 operands[1] = validize_mem (operands[1]);
1084 return false;
1087 /* Accept non-constants and valid constants unmodified. */
1088 if (!CONSTANT_P (operands[1])
1089 || GET_CODE (operands[1]) == HIGH
1090 || input_operand (operands[1], mode))
1091 return false;
1093 switch (mode)
1095 case QImode:
1096 /* All QImode constants require only one insn, so proceed. */
1097 break;
1099 case HImode:
1100 case SImode:
1101 sparc_emit_set_const32 (operands[0], operands[1]);
1102 return true;
1104 case DImode:
1105 /* input_operand should have filtered out 32-bit mode. */
1106 sparc_emit_set_const64 (operands[0], operands[1]);
1107 return true;
1109 default:
1110 gcc_unreachable ();
1113 return false;
1116 /* Load OP1, a 32-bit constant, into OP0, a register.
1117 We know it can't be done in one insn when we get
1118 here, the move expander guarantees this. */
1120 void
1121 sparc_emit_set_const32 (rtx op0, rtx op1)
1123 enum machine_mode mode = GET_MODE (op0);
1124 rtx temp;
1126 if (reload_in_progress || reload_completed)
1127 temp = op0;
1128 else
1129 temp = gen_reg_rtx (mode);
1131 if (GET_CODE (op1) == CONST_INT)
1133 gcc_assert (!small_int_operand (op1, mode)
1134 && !const_high_operand (op1, mode));
1136 /* Emit them as real moves instead of a HIGH/LO_SUM,
1137 this way CSE can see everything and reuse intermediate
1138 values if it wants. */
1139 emit_insn (gen_rtx_SET (VOIDmode, temp,
1140 GEN_INT (INTVAL (op1)
1141 & ~(HOST_WIDE_INT)0x3ff)));
1143 emit_insn (gen_rtx_SET (VOIDmode,
1144 op0,
1145 gen_rtx_IOR (mode, temp,
1146 GEN_INT (INTVAL (op1) & 0x3ff))));
1148 else
1150 /* A symbol, emit in the traditional way. */
1151 emit_insn (gen_rtx_SET (VOIDmode, temp,
1152 gen_rtx_HIGH (mode, op1)));
1153 emit_insn (gen_rtx_SET (VOIDmode,
1154 op0, gen_rtx_LO_SUM (mode, temp, op1)));
1158 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1159 If TEMP is nonzero, we are forbidden to use any other scratch
1160 registers. Otherwise, we are allowed to generate them as needed.
1162 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1163 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1165 void
1166 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1168 rtx temp1, temp2, temp3, temp4, temp5;
1169 rtx ti_temp = 0;
1171 if (temp && GET_MODE (temp) == TImode)
1173 ti_temp = temp;
1174 temp = gen_rtx_REG (DImode, REGNO (temp));
1177 /* SPARC-V9 code-model support. */
1178 switch (sparc_cmodel)
1180 case CM_MEDLOW:
1181 /* The range spanned by all instructions in the object is less
1182 than 2^31 bytes (2GB) and the distance from any instruction
1183 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1184 than 2^31 bytes (2GB).
1186 The executable must be in the low 4TB of the virtual address
1187 space.
1189 sethi %hi(symbol), %temp1
1190 or %temp1, %lo(symbol), %reg */
1191 if (temp)
1192 temp1 = temp; /* op0 is allowed. */
1193 else
1194 temp1 = gen_reg_rtx (DImode);
1196 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1197 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1198 break;
1200 case CM_MEDMID:
1201 /* The range spanned by all instructions in the object is less
1202 than 2^31 bytes (2GB) and the distance from any instruction
1203 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1204 than 2^31 bytes (2GB).
1206 The executable must be in the low 16TB of the virtual address
1207 space.
1209 sethi %h44(symbol), %temp1
1210 or %temp1, %m44(symbol), %temp2
1211 sllx %temp2, 12, %temp3
1212 or %temp3, %l44(symbol), %reg */
1213 if (temp)
1215 temp1 = op0;
1216 temp2 = op0;
1217 temp3 = temp; /* op0 is allowed. */
1219 else
1221 temp1 = gen_reg_rtx (DImode);
1222 temp2 = gen_reg_rtx (DImode);
1223 temp3 = gen_reg_rtx (DImode);
1226 emit_insn (gen_seth44 (temp1, op1));
1227 emit_insn (gen_setm44 (temp2, temp1, op1));
1228 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1229 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1230 emit_insn (gen_setl44 (op0, temp3, op1));
1231 break;
1233 case CM_MEDANY:
1234 /* The range spanned by all instructions in the object is less
1235 than 2^31 bytes (2GB) and the distance from any instruction
1236 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1237 than 2^31 bytes (2GB).
1239 The executable can be placed anywhere in the virtual address
1240 space.
1242 sethi %hh(symbol), %temp1
1243 sethi %lm(symbol), %temp2
1244 or %temp1, %hm(symbol), %temp3
1245 sllx %temp3, 32, %temp4
1246 or %temp4, %temp2, %temp5
1247 or %temp5, %lo(symbol), %reg */
1248 if (temp)
1250 /* It is possible that one of the registers we got for operands[2]
1251 might coincide with that of operands[0] (which is why we made
1252 it TImode). Pick the other one to use as our scratch. */
1253 if (rtx_equal_p (temp, op0))
1255 gcc_assert (ti_temp);
1256 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1258 temp1 = op0;
1259 temp2 = temp; /* op0 is _not_ allowed, see above. */
1260 temp3 = op0;
1261 temp4 = op0;
1262 temp5 = op0;
1264 else
1266 temp1 = gen_reg_rtx (DImode);
1267 temp2 = gen_reg_rtx (DImode);
1268 temp3 = gen_reg_rtx (DImode);
1269 temp4 = gen_reg_rtx (DImode);
1270 temp5 = gen_reg_rtx (DImode);
1273 emit_insn (gen_sethh (temp1, op1));
1274 emit_insn (gen_setlm (temp2, op1));
1275 emit_insn (gen_sethm (temp3, temp1, op1));
1276 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1277 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1278 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1279 gen_rtx_PLUS (DImode, temp4, temp2)));
1280 emit_insn (gen_setlo (op0, temp5, op1));
1281 break;
1283 case CM_EMBMEDANY:
1284 /* Old old old backwards compatibility kruft here.
1285 Essentially it is MEDLOW with a fixed 64-bit
1286 virtual base added to all data segment addresses.
1287 Text-segment stuff is computed like MEDANY, we can't
1288 reuse the code above because the relocation knobs
1289 look different.
1291 Data segment: sethi %hi(symbol), %temp1
1292 add %temp1, EMBMEDANY_BASE_REG, %temp2
1293 or %temp2, %lo(symbol), %reg */
1294 if (data_segment_operand (op1, GET_MODE (op1)))
1296 if (temp)
1298 temp1 = temp; /* op0 is allowed. */
1299 temp2 = op0;
1301 else
1303 temp1 = gen_reg_rtx (DImode);
1304 temp2 = gen_reg_rtx (DImode);
1307 emit_insn (gen_embmedany_sethi (temp1, op1));
1308 emit_insn (gen_embmedany_brsum (temp2, temp1));
1309 emit_insn (gen_embmedany_losum (op0, temp2, op1));
1312 /* Text segment: sethi %uhi(symbol), %temp1
1313 sethi %hi(symbol), %temp2
1314 or %temp1, %ulo(symbol), %temp3
1315 sllx %temp3, 32, %temp4
1316 or %temp4, %temp2, %temp5
1317 or %temp5, %lo(symbol), %reg */
1318 else
1320 if (temp)
1322 /* It is possible that one of the registers we got for operands[2]
1323 might coincide with that of operands[0] (which is why we made
1324 it TImode). Pick the other one to use as our scratch. */
1325 if (rtx_equal_p (temp, op0))
1327 gcc_assert (ti_temp);
1328 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1330 temp1 = op0;
1331 temp2 = temp; /* op0 is _not_ allowed, see above. */
1332 temp3 = op0;
1333 temp4 = op0;
1334 temp5 = op0;
1336 else
1338 temp1 = gen_reg_rtx (DImode);
1339 temp2 = gen_reg_rtx (DImode);
1340 temp3 = gen_reg_rtx (DImode);
1341 temp4 = gen_reg_rtx (DImode);
1342 temp5 = gen_reg_rtx (DImode);
1345 emit_insn (gen_embmedany_textuhi (temp1, op1));
1346 emit_insn (gen_embmedany_texthi (temp2, op1));
1347 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
1348 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1349 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1350 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1351 gen_rtx_PLUS (DImode, temp4, temp2)));
1352 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
1354 break;
1356 default:
1357 gcc_unreachable ();
1361 #if HOST_BITS_PER_WIDE_INT == 32
1362 void
1363 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
1365 gcc_unreachable ();
1367 #else
1368 /* These avoid problems when cross compiling. If we do not
1369 go through all this hair then the optimizer will see
1370 invalid REG_EQUAL notes or in some cases none at all. */
1371 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
1372 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
1373 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
1374 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
1376 /* The optimizer is not to assume anything about exactly
1377 which bits are set for a HIGH, they are unspecified.
1378 Unfortunately this leads to many missed optimizations
1379 during CSE. We mask out the non-HIGH bits, and matches
1380 a plain movdi, to alleviate this problem. */
1381 static rtx
1382 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
1384 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
1387 static rtx
1388 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
1390 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
1393 static rtx
1394 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
1396 return gen_rtx_IOR (DImode, src, GEN_INT (val));
1399 static rtx
1400 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
1402 return gen_rtx_XOR (DImode, src, GEN_INT (val));
1405 /* Worker routines for 64-bit constant formation on arch64.
1406 One of the key things to be doing in these emissions is
1407 to create as many temp REGs as possible. This makes it
1408 possible for half-built constants to be used later when
1409 such values are similar to something required later on.
1410 Without doing this, the optimizer cannot see such
1411 opportunities. */
1413 static void sparc_emit_set_const64_quick1 (rtx, rtx,
1414 unsigned HOST_WIDE_INT, int);
1416 static void
1417 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
1418 unsigned HOST_WIDE_INT low_bits, int is_neg)
1420 unsigned HOST_WIDE_INT high_bits;
1422 if (is_neg)
1423 high_bits = (~low_bits) & 0xffffffff;
1424 else
1425 high_bits = low_bits;
1427 emit_insn (gen_safe_HIGH64 (temp, high_bits));
1428 if (!is_neg)
1430 emit_insn (gen_rtx_SET (VOIDmode, op0,
1431 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1433 else
1435 /* If we are XOR'ing with -1, then we should emit a one's complement
1436 instead. This way the combiner will notice logical operations
1437 such as ANDN later on and substitute. */
1438 if ((low_bits & 0x3ff) == 0x3ff)
1440 emit_insn (gen_rtx_SET (VOIDmode, op0,
1441 gen_rtx_NOT (DImode, temp)));
1443 else
1445 emit_insn (gen_rtx_SET (VOIDmode, op0,
1446 gen_safe_XOR64 (temp,
1447 (-(HOST_WIDE_INT)0x400
1448 | (low_bits & 0x3ff)))));
1453 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
1454 unsigned HOST_WIDE_INT, int);
1456 static void
1457 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
1458 unsigned HOST_WIDE_INT high_bits,
1459 unsigned HOST_WIDE_INT low_immediate,
1460 int shift_count)
1462 rtx temp2 = op0;
1464 if ((high_bits & 0xfffffc00) != 0)
1466 emit_insn (gen_safe_HIGH64 (temp, high_bits));
1467 if ((high_bits & ~0xfffffc00) != 0)
1468 emit_insn (gen_rtx_SET (VOIDmode, op0,
1469 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1470 else
1471 temp2 = temp;
1473 else
1475 emit_insn (gen_safe_SET64 (temp, high_bits));
1476 temp2 = temp;
1479 /* Now shift it up into place. */
1480 emit_insn (gen_rtx_SET (VOIDmode, op0,
1481 gen_rtx_ASHIFT (DImode, temp2,
1482 GEN_INT (shift_count))));
1484 /* If there is a low immediate part piece, finish up by
1485 putting that in as well. */
1486 if (low_immediate != 0)
1487 emit_insn (gen_rtx_SET (VOIDmode, op0,
1488 gen_safe_OR64 (op0, low_immediate)));
1491 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
1492 unsigned HOST_WIDE_INT);
1494 /* Full 64-bit constant decomposition. Even though this is the
1495 'worst' case, we still optimize a few things away. */
1496 static void
1497 sparc_emit_set_const64_longway (rtx op0, rtx temp,
1498 unsigned HOST_WIDE_INT high_bits,
1499 unsigned HOST_WIDE_INT low_bits)
1501 rtx sub_temp;
1503 if (reload_in_progress || reload_completed)
1504 sub_temp = op0;
1505 else
1506 sub_temp = gen_reg_rtx (DImode);
1508 if ((high_bits & 0xfffffc00) != 0)
1510 emit_insn (gen_safe_HIGH64 (temp, high_bits));
1511 if ((high_bits & ~0xfffffc00) != 0)
1512 emit_insn (gen_rtx_SET (VOIDmode,
1513 sub_temp,
1514 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1515 else
1516 sub_temp = temp;
1518 else
1520 emit_insn (gen_safe_SET64 (temp, high_bits));
1521 sub_temp = temp;
1524 if (!reload_in_progress && !reload_completed)
1526 rtx temp2 = gen_reg_rtx (DImode);
1527 rtx temp3 = gen_reg_rtx (DImode);
1528 rtx temp4 = gen_reg_rtx (DImode);
1530 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1531 gen_rtx_ASHIFT (DImode, sub_temp,
1532 GEN_INT (32))));
1534 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
1535 if ((low_bits & ~0xfffffc00) != 0)
1537 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1538 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
1539 emit_insn (gen_rtx_SET (VOIDmode, op0,
1540 gen_rtx_PLUS (DImode, temp4, temp3)));
1542 else
1544 emit_insn (gen_rtx_SET (VOIDmode, op0,
1545 gen_rtx_PLUS (DImode, temp4, temp2)));
1548 else
1550 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
1551 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
1552 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
1553 int to_shift = 12;
1555 /* We are in the middle of reload, so this is really
1556 painful. However we do still make an attempt to
1557 avoid emitting truly stupid code. */
1558 if (low1 != const0_rtx)
1560 emit_insn (gen_rtx_SET (VOIDmode, op0,
1561 gen_rtx_ASHIFT (DImode, sub_temp,
1562 GEN_INT (to_shift))));
1563 emit_insn (gen_rtx_SET (VOIDmode, op0,
1564 gen_rtx_IOR (DImode, op0, low1)));
1565 sub_temp = op0;
1566 to_shift = 12;
1568 else
1570 to_shift += 12;
1572 if (low2 != const0_rtx)
1574 emit_insn (gen_rtx_SET (VOIDmode, op0,
1575 gen_rtx_ASHIFT (DImode, sub_temp,
1576 GEN_INT (to_shift))));
1577 emit_insn (gen_rtx_SET (VOIDmode, op0,
1578 gen_rtx_IOR (DImode, op0, low2)));
1579 sub_temp = op0;
1580 to_shift = 8;
1582 else
1584 to_shift += 8;
1586 emit_insn (gen_rtx_SET (VOIDmode, op0,
1587 gen_rtx_ASHIFT (DImode, sub_temp,
1588 GEN_INT (to_shift))));
1589 if (low3 != const0_rtx)
1590 emit_insn (gen_rtx_SET (VOIDmode, op0,
1591 gen_rtx_IOR (DImode, op0, low3)));
1592 /* phew... */
1596 /* Analyze a 64-bit constant for certain properties. */
1597 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
1598 unsigned HOST_WIDE_INT,
1599 int *, int *, int *);
1601 static void
1602 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
1603 unsigned HOST_WIDE_INT low_bits,
1604 int *hbsp, int *lbsp, int *abbasp)
1606 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
1607 int i;
1609 lowest_bit_set = highest_bit_set = -1;
1610 i = 0;
1613 if ((lowest_bit_set == -1)
1614 && ((low_bits >> i) & 1))
1615 lowest_bit_set = i;
1616 if ((highest_bit_set == -1)
1617 && ((high_bits >> (32 - i - 1)) & 1))
1618 highest_bit_set = (64 - i - 1);
1620 while (++i < 32
1621 && ((highest_bit_set == -1)
1622 || (lowest_bit_set == -1)));
1623 if (i == 32)
1625 i = 0;
1628 if ((lowest_bit_set == -1)
1629 && ((high_bits >> i) & 1))
1630 lowest_bit_set = i + 32;
1631 if ((highest_bit_set == -1)
1632 && ((low_bits >> (32 - i - 1)) & 1))
1633 highest_bit_set = 32 - i - 1;
1635 while (++i < 32
1636 && ((highest_bit_set == -1)
1637 || (lowest_bit_set == -1)));
1639 /* If there are no bits set this should have gone out
1640 as one instruction! */
1641 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
1642 all_bits_between_are_set = 1;
1643 for (i = lowest_bit_set; i <= highest_bit_set; i++)
1645 if (i < 32)
1647 if ((low_bits & (1 << i)) != 0)
1648 continue;
1650 else
1652 if ((high_bits & (1 << (i - 32))) != 0)
1653 continue;
1655 all_bits_between_are_set = 0;
1656 break;
1658 *hbsp = highest_bit_set;
1659 *lbsp = lowest_bit_set;
1660 *abbasp = all_bits_between_are_set;
1663 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
1665 static int
1666 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
1667 unsigned HOST_WIDE_INT low_bits)
1669 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
1671 if (high_bits == 0
1672 || high_bits == 0xffffffff)
1673 return 1;
1675 analyze_64bit_constant (high_bits, low_bits,
1676 &highest_bit_set, &lowest_bit_set,
1677 &all_bits_between_are_set);
1679 if ((highest_bit_set == 63
1680 || lowest_bit_set == 0)
1681 && all_bits_between_are_set != 0)
1682 return 1;
1684 if ((highest_bit_set - lowest_bit_set) < 21)
1685 return 1;
1687 return 0;
1690 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
1691 unsigned HOST_WIDE_INT,
1692 int, int);
1694 static unsigned HOST_WIDE_INT
1695 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
1696 unsigned HOST_WIDE_INT low_bits,
1697 int lowest_bit_set, int shift)
1699 HOST_WIDE_INT hi, lo;
1701 if (lowest_bit_set < 32)
1703 lo = (low_bits >> lowest_bit_set) << shift;
1704 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
1706 else
1708 lo = 0;
1709 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
1711 gcc_assert (! (hi & lo));
1712 return (hi | lo);
1715 /* Here we are sure to be arch64 and this is an integer constant
1716 being loaded into a register. Emit the most efficient
1717 insn sequence possible. Detection of all the 1-insn cases
1718 has been done already. */
1719 void
1720 sparc_emit_set_const64 (rtx op0, rtx op1)
1722 unsigned HOST_WIDE_INT high_bits, low_bits;
1723 int lowest_bit_set, highest_bit_set;
1724 int all_bits_between_are_set;
1725 rtx temp = 0;
1727 /* Sanity check that we know what we are working with. */
1728 gcc_assert (TARGET_ARCH64
1729 && (GET_CODE (op0) == SUBREG
1730 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
1732 if (reload_in_progress || reload_completed)
1733 temp = op0;
1735 if (GET_CODE (op1) != CONST_INT)
1737 sparc_emit_set_symbolic_const64 (op0, op1, temp);
1738 return;
1741 if (! temp)
1742 temp = gen_reg_rtx (DImode);
1744 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
1745 low_bits = (INTVAL (op1) & 0xffffffff);
1747 /* low_bits bits 0 --> 31
1748 high_bits bits 32 --> 63 */
1750 analyze_64bit_constant (high_bits, low_bits,
1751 &highest_bit_set, &lowest_bit_set,
1752 &all_bits_between_are_set);
1754 /* First try for a 2-insn sequence. */
1756 /* These situations are preferred because the optimizer can
1757 * do more things with them:
1758 * 1) mov -1, %reg
1759 * sllx %reg, shift, %reg
1760 * 2) mov -1, %reg
1761 * srlx %reg, shift, %reg
1762 * 3) mov some_small_const, %reg
1763 * sllx %reg, shift, %reg
1765 if (((highest_bit_set == 63
1766 || lowest_bit_set == 0)
1767 && all_bits_between_are_set != 0)
1768 || ((highest_bit_set - lowest_bit_set) < 12))
1770 HOST_WIDE_INT the_const = -1;
1771 int shift = lowest_bit_set;
1773 if ((highest_bit_set != 63
1774 && lowest_bit_set != 0)
1775 || all_bits_between_are_set == 0)
1777 the_const =
1778 create_simple_focus_bits (high_bits, low_bits,
1779 lowest_bit_set, 0);
1781 else if (lowest_bit_set == 0)
1782 shift = -(63 - highest_bit_set);
1784 gcc_assert (SPARC_SIMM13_P (the_const));
1785 gcc_assert (shift != 0);
1787 emit_insn (gen_safe_SET64 (temp, the_const));
1788 if (shift > 0)
1789 emit_insn (gen_rtx_SET (VOIDmode,
1790 op0,
1791 gen_rtx_ASHIFT (DImode,
1792 temp,
1793 GEN_INT (shift))));
1794 else if (shift < 0)
1795 emit_insn (gen_rtx_SET (VOIDmode,
1796 op0,
1797 gen_rtx_LSHIFTRT (DImode,
1798 temp,
1799 GEN_INT (-shift))));
1800 return;
1803 /* Now a range of 22 or less bits set somewhere.
1804 * 1) sethi %hi(focus_bits), %reg
1805 * sllx %reg, shift, %reg
1806 * 2) sethi %hi(focus_bits), %reg
1807 * srlx %reg, shift, %reg
1809 if ((highest_bit_set - lowest_bit_set) < 21)
1811 unsigned HOST_WIDE_INT focus_bits =
1812 create_simple_focus_bits (high_bits, low_bits,
1813 lowest_bit_set, 10);
1815 gcc_assert (SPARC_SETHI_P (focus_bits));
1816 gcc_assert (lowest_bit_set != 10);
1818 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
1820 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
1821 if (lowest_bit_set < 10)
1822 emit_insn (gen_rtx_SET (VOIDmode,
1823 op0,
1824 gen_rtx_LSHIFTRT (DImode, temp,
1825 GEN_INT (10 - lowest_bit_set))));
1826 else if (lowest_bit_set > 10)
1827 emit_insn (gen_rtx_SET (VOIDmode,
1828 op0,
1829 gen_rtx_ASHIFT (DImode, temp,
1830 GEN_INT (lowest_bit_set - 10))));
1831 return;
1834 /* 1) sethi %hi(low_bits), %reg
1835 * or %reg, %lo(low_bits), %reg
1836 * 2) sethi %hi(~low_bits), %reg
1837 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
1839 if (high_bits == 0
1840 || high_bits == 0xffffffff)
1842 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
1843 (high_bits == 0xffffffff));
1844 return;
1847 /* Now, try 3-insn sequences. */
1849 /* 1) sethi %hi(high_bits), %reg
1850 * or %reg, %lo(high_bits), %reg
1851 * sllx %reg, 32, %reg
1853 if (low_bits == 0)
1855 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
1856 return;
1859 /* We may be able to do something quick
1860 when the constant is negated, so try that. */
1861 if (const64_is_2insns ((~high_bits) & 0xffffffff,
1862 (~low_bits) & 0xfffffc00))
1864 /* NOTE: The trailing bits get XOR'd so we need the
1865 non-negated bits, not the negated ones. */
1866 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
1868 if ((((~high_bits) & 0xffffffff) == 0
1869 && ((~low_bits) & 0x80000000) == 0)
1870 || (((~high_bits) & 0xffffffff) == 0xffffffff
1871 && ((~low_bits) & 0x80000000) != 0))
1873 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
1875 if ((SPARC_SETHI_P (fast_int)
1876 && (~high_bits & 0xffffffff) == 0)
1877 || SPARC_SIMM13_P (fast_int))
1878 emit_insn (gen_safe_SET64 (temp, fast_int));
1879 else
1880 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
1882 else
1884 rtx negated_const;
1885 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
1886 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
1887 sparc_emit_set_const64 (temp, negated_const);
1890 /* If we are XOR'ing with -1, then we should emit a one's complement
1891 instead. This way the combiner will notice logical operations
1892 such as ANDN later on and substitute. */
1893 if (trailing_bits == 0x3ff)
1895 emit_insn (gen_rtx_SET (VOIDmode, op0,
1896 gen_rtx_NOT (DImode, temp)));
1898 else
1900 emit_insn (gen_rtx_SET (VOIDmode,
1901 op0,
1902 gen_safe_XOR64 (temp,
1903 (-0x400 | trailing_bits))));
1905 return;
1908 /* 1) sethi %hi(xxx), %reg
1909 * or %reg, %lo(xxx), %reg
1910 * sllx %reg, yyy, %reg
1912 * ??? This is just a generalized version of the low_bits==0
1913 * thing above, FIXME...
1915 if ((highest_bit_set - lowest_bit_set) < 32)
1917 unsigned HOST_WIDE_INT focus_bits =
1918 create_simple_focus_bits (high_bits, low_bits,
1919 lowest_bit_set, 0);
1921 /* We can't get here in this state. */
1922 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
1924 /* So what we know is that the set bits straddle the
1925 middle of the 64-bit word. */
1926 sparc_emit_set_const64_quick2 (op0, temp,
1927 focus_bits, 0,
1928 lowest_bit_set);
1929 return;
1932 /* 1) sethi %hi(high_bits), %reg
1933 * or %reg, %lo(high_bits), %reg
1934 * sllx %reg, 32, %reg
1935 * or %reg, low_bits, %reg
1937 if (SPARC_SIMM13_P(low_bits)
1938 && ((int)low_bits > 0))
1940 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
1941 return;
1944 /* The easiest way when all else fails, is full decomposition. */
1945 #if 0
1946 printf ("sparc_emit_set_const64: Hard constant [%08lx%08lx] neg[%08lx%08lx]\n",
1947 high_bits, low_bits, ~high_bits, ~low_bits);
1948 #endif
1949 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
1951 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
1953 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
1954 return the mode to be used for the comparison. For floating-point,
1955 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
1956 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
1957 processing is needed. */
1959 enum machine_mode
1960 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
1962 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1964 switch (op)
1966 case EQ:
1967 case NE:
1968 case UNORDERED:
1969 case ORDERED:
1970 case UNLT:
1971 case UNLE:
1972 case UNGT:
1973 case UNGE:
1974 case UNEQ:
1975 case LTGT:
1976 return CCFPmode;
1978 case LT:
1979 case LE:
1980 case GT:
1981 case GE:
1982 return CCFPEmode;
1984 default:
1985 gcc_unreachable ();
1988 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
1989 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
1991 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
1992 return CCX_NOOVmode;
1993 else
1994 return CC_NOOVmode;
1996 else
1998 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
1999 return CCXmode;
2000 else
2001 return CCmode;
2005 /* Emit the compare insn and return the CC reg for a CODE comparison
2006 with operands X and Y. */
2008 static rtx
2009 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2011 enum machine_mode mode;
2012 rtx cc_reg;
2014 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2015 return x;
2017 mode = SELECT_CC_MODE (code, x, y);
2019 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2020 fcc regs (cse can't tell they're really call clobbered regs and will
2021 remove a duplicate comparison even if there is an intervening function
2022 call - it will then try to reload the cc reg via an int reg which is why
2023 we need the movcc patterns). It is possible to provide the movcc
2024 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2025 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2026 to tell cse that CCFPE mode registers (even pseudos) are call
2027 clobbered. */
2029 /* ??? This is an experiment. Rather than making changes to cse which may
2030 or may not be easy/clean, we do our own cse. This is possible because
2031 we will generate hard registers. Cse knows they're call clobbered (it
2032 doesn't know the same thing about pseudos). If we guess wrong, no big
2033 deal, but if we win, great! */
2035 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2036 #if 1 /* experiment */
2038 int reg;
2039 /* We cycle through the registers to ensure they're all exercised. */
2040 static int next_fcc_reg = 0;
2041 /* Previous x,y for each fcc reg. */
2042 static rtx prev_args[4][2];
2044 /* Scan prev_args for x,y. */
2045 for (reg = 0; reg < 4; reg++)
2046 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2047 break;
2048 if (reg == 4)
2050 reg = next_fcc_reg;
2051 prev_args[reg][0] = x;
2052 prev_args[reg][1] = y;
2053 next_fcc_reg = (next_fcc_reg + 1) & 3;
2055 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2057 #else
2058 cc_reg = gen_reg_rtx (mode);
2059 #endif /* ! experiment */
2060 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2061 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2062 else
2063 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2065 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2066 will only result in an unrecognizable insn so no point in asserting. */
2067 emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2069 return cc_reg;
2073 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2076 gen_compare_reg (rtx cmp)
2078 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2081 /* This function is used for v9 only.
2082 DEST is the target of the Scc insn.
2083 CODE is the code for an Scc's comparison.
2084 X and Y are the values we compare.
2086 This function is needed to turn
2088 (set (reg:SI 110)
2089 (gt (reg:CCX 100 %icc)
2090 (const_int 0)))
2091 into
2092 (set (reg:SI 110)
2093 (gt:DI (reg:CCX 100 %icc)
2094 (const_int 0)))
2096 IE: The instruction recognizer needs to see the mode of the comparison to
2097 find the right instruction. We could use "gt:DI" right in the
2098 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2100 static int
2101 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2103 if (! TARGET_ARCH64
2104 && (GET_MODE (x) == DImode
2105 || GET_MODE (dest) == DImode))
2106 return 0;
2108 /* Try to use the movrCC insns. */
2109 if (TARGET_ARCH64
2110 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2111 && y == const0_rtx
2112 && v9_regcmp_p (compare_code))
2114 rtx op0 = x;
2115 rtx temp;
2117 /* Special case for op0 != 0. This can be done with one instruction if
2118 dest == x. */
2120 if (compare_code == NE
2121 && GET_MODE (dest) == DImode
2122 && rtx_equal_p (op0, dest))
2124 emit_insn (gen_rtx_SET (VOIDmode, dest,
2125 gen_rtx_IF_THEN_ELSE (DImode,
2126 gen_rtx_fmt_ee (compare_code, DImode,
2127 op0, const0_rtx),
2128 const1_rtx,
2129 dest)));
2130 return 1;
2133 if (reg_overlap_mentioned_p (dest, op0))
2135 /* Handle the case where dest == x.
2136 We "early clobber" the result. */
2137 op0 = gen_reg_rtx (GET_MODE (x));
2138 emit_move_insn (op0, x);
2141 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2142 if (GET_MODE (op0) != DImode)
2144 temp = gen_reg_rtx (DImode);
2145 convert_move (temp, op0, 0);
2147 else
2148 temp = op0;
2149 emit_insn (gen_rtx_SET (VOIDmode, dest,
2150 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2151 gen_rtx_fmt_ee (compare_code, DImode,
2152 temp, const0_rtx),
2153 const1_rtx,
2154 dest)));
2155 return 1;
2157 else
2159 x = gen_compare_reg_1 (compare_code, x, y);
2160 y = const0_rtx;
2162 gcc_assert (GET_MODE (x) != CC_NOOVmode
2163 && GET_MODE (x) != CCX_NOOVmode);
2165 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2166 emit_insn (gen_rtx_SET (VOIDmode, dest,
2167 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2168 gen_rtx_fmt_ee (compare_code,
2169 GET_MODE (x), x, y),
2170 const1_rtx, dest)));
2171 return 1;
2176 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2177 without jumps using the addx/subx instructions. */
2179 bool
2180 emit_scc_insn (rtx operands[])
2182 rtx tem;
2183 rtx x;
2184 rtx y;
2185 enum rtx_code code;
2187 /* The quad-word fp compare library routines all return nonzero to indicate
2188 true, which is different from the equivalent libgcc routines, so we must
2189 handle them specially here. */
2190 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2192 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2193 GET_CODE (operands[1]));
2194 operands[2] = XEXP (operands[1], 0);
2195 operands[3] = XEXP (operands[1], 1);
2198 code = GET_CODE (operands[1]);
2199 x = operands[2];
2200 y = operands[3];
2202 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2203 more applications). The exception to this is "reg != 0" which can
2204 be done in one instruction on v9 (so we do it). */
2205 if (code == EQ)
2207 if (GET_MODE (x) == SImode)
2209 rtx pat = gen_seqsi_special (operands[0], x, y);
2210 emit_insn (pat);
2211 return true;
2213 else if (GET_MODE (x) == DImode)
2215 rtx pat = gen_seqdi_special (operands[0], x, y);
2216 emit_insn (pat);
2217 return true;
2221 if (code == NE)
2223 if (GET_MODE (x) == SImode)
2225 rtx pat = gen_snesi_special (operands[0], x, y);
2226 emit_insn (pat);
2227 return true;
2229 else if (GET_MODE (x) == DImode)
2231 rtx pat = gen_snedi_special (operands[0], x, y);
2232 emit_insn (pat);
2233 return true;
2237 /* For the rest, on v9 we can use conditional moves. */
2239 if (TARGET_V9)
2241 if (gen_v9_scc (operands[0], code, x, y))
2242 return true;
2245 /* We can do LTU and GEU using the addx/subx instructions too. And
2246 for GTU/LEU, if both operands are registers swap them and fall
2247 back to the easy case. */
2248 if (code == GTU || code == LEU)
2250 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
2251 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
2253 tem = x;
2254 x = y;
2255 y = tem;
2256 code = swap_condition (code);
2260 if (code == LTU || code == GEU)
2262 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2263 gen_rtx_fmt_ee (code, SImode,
2264 gen_compare_reg_1 (code, x, y),
2265 const0_rtx)));
2266 return true;
2269 /* Nope, do branches. */
2270 return false;
2273 /* Emit a conditional jump insn for the v9 architecture using comparison code
2274 CODE and jump target LABEL.
2275 This function exists to take advantage of the v9 brxx insns. */
2277 static void
2278 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
2280 emit_jump_insn (gen_rtx_SET (VOIDmode,
2281 pc_rtx,
2282 gen_rtx_IF_THEN_ELSE (VOIDmode,
2283 gen_rtx_fmt_ee (code, GET_MODE (op0),
2284 op0, const0_rtx),
2285 gen_rtx_LABEL_REF (VOIDmode, label),
2286 pc_rtx)));
2289 void
2290 emit_conditional_branch_insn (rtx operands[])
2292 /* The quad-word fp compare library routines all return nonzero to indicate
2293 true, which is different from the equivalent libgcc routines, so we must
2294 handle them specially here. */
2295 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
2297 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
2298 GET_CODE (operands[0]));
2299 operands[1] = XEXP (operands[0], 0);
2300 operands[2] = XEXP (operands[0], 1);
2303 if (TARGET_ARCH64 && operands[2] == const0_rtx
2304 && GET_CODE (operands[1]) == REG
2305 && GET_MODE (operands[1]) == DImode)
2307 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
2308 return;
2311 operands[1] = gen_compare_reg (operands[0]);
2312 operands[2] = const0_rtx;
2313 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
2314 operands[1], operands[2]);
2315 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
2316 operands[3]));
2320 /* Generate a DFmode part of a hard TFmode register.
2321 REG is the TFmode hard register, LOW is 1 for the
2322 low 64bit of the register and 0 otherwise.
2325 gen_df_reg (rtx reg, int low)
2327 int regno = REGNO (reg);
2329 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
2330 regno += (TARGET_ARCH64 && regno < 32) ? 1 : 2;
2331 return gen_rtx_REG (DFmode, regno);
2334 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
2335 Unlike normal calls, TFmode operands are passed by reference. It is
2336 assumed that no more than 3 operands are required. */
2338 static void
2339 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
2341 rtx ret_slot = NULL, arg[3], func_sym;
2342 int i;
2344 /* We only expect to be called for conversions, unary, and binary ops. */
2345 gcc_assert (nargs == 2 || nargs == 3);
2347 for (i = 0; i < nargs; ++i)
2349 rtx this_arg = operands[i];
2350 rtx this_slot;
2352 /* TFmode arguments and return values are passed by reference. */
2353 if (GET_MODE (this_arg) == TFmode)
2355 int force_stack_temp;
2357 force_stack_temp = 0;
2358 if (TARGET_BUGGY_QP_LIB && i == 0)
2359 force_stack_temp = 1;
2361 if (GET_CODE (this_arg) == MEM
2362 && ! force_stack_temp)
2363 this_arg = XEXP (this_arg, 0);
2364 else if (CONSTANT_P (this_arg)
2365 && ! force_stack_temp)
2367 this_slot = force_const_mem (TFmode, this_arg);
2368 this_arg = XEXP (this_slot, 0);
2370 else
2372 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode), 0);
2374 /* Operand 0 is the return value. We'll copy it out later. */
2375 if (i > 0)
2376 emit_move_insn (this_slot, this_arg);
2377 else
2378 ret_slot = this_slot;
2380 this_arg = XEXP (this_slot, 0);
2384 arg[i] = this_arg;
2387 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
2389 if (GET_MODE (operands[0]) == TFmode)
2391 if (nargs == 2)
2392 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
2393 arg[0], GET_MODE (arg[0]),
2394 arg[1], GET_MODE (arg[1]));
2395 else
2396 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
2397 arg[0], GET_MODE (arg[0]),
2398 arg[1], GET_MODE (arg[1]),
2399 arg[2], GET_MODE (arg[2]));
2401 if (ret_slot)
2402 emit_move_insn (operands[0], ret_slot);
2404 else
2406 rtx ret;
2408 gcc_assert (nargs == 2);
2410 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
2411 GET_MODE (operands[0]), 1,
2412 arg[1], GET_MODE (arg[1]));
2414 if (ret != operands[0])
2415 emit_move_insn (operands[0], ret);
2419 /* Expand soft-float TFmode calls to sparc abi routines. */
2421 static void
2422 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
2424 const char *func;
2426 switch (code)
2428 case PLUS:
2429 func = "_Qp_add";
2430 break;
2431 case MINUS:
2432 func = "_Qp_sub";
2433 break;
2434 case MULT:
2435 func = "_Qp_mul";
2436 break;
2437 case DIV:
2438 func = "_Qp_div";
2439 break;
2440 default:
2441 gcc_unreachable ();
2444 emit_soft_tfmode_libcall (func, 3, operands);
2447 static void
2448 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
2450 const char *func;
2452 gcc_assert (code == SQRT);
2453 func = "_Qp_sqrt";
2455 emit_soft_tfmode_libcall (func, 2, operands);
2458 static void
2459 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
2461 const char *func;
2463 switch (code)
2465 case FLOAT_EXTEND:
2466 switch (GET_MODE (operands[1]))
2468 case SFmode:
2469 func = "_Qp_stoq";
2470 break;
2471 case DFmode:
2472 func = "_Qp_dtoq";
2473 break;
2474 default:
2475 gcc_unreachable ();
2477 break;
2479 case FLOAT_TRUNCATE:
2480 switch (GET_MODE (operands[0]))
2482 case SFmode:
2483 func = "_Qp_qtos";
2484 break;
2485 case DFmode:
2486 func = "_Qp_qtod";
2487 break;
2488 default:
2489 gcc_unreachable ();
2491 break;
2493 case FLOAT:
2494 switch (GET_MODE (operands[1]))
2496 case SImode:
2497 func = "_Qp_itoq";
2498 if (TARGET_ARCH64)
2499 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
2500 break;
2501 case DImode:
2502 func = "_Qp_xtoq";
2503 break;
2504 default:
2505 gcc_unreachable ();
2507 break;
2509 case UNSIGNED_FLOAT:
2510 switch (GET_MODE (operands[1]))
2512 case SImode:
2513 func = "_Qp_uitoq";
2514 if (TARGET_ARCH64)
2515 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
2516 break;
2517 case DImode:
2518 func = "_Qp_uxtoq";
2519 break;
2520 default:
2521 gcc_unreachable ();
2523 break;
2525 case FIX:
2526 switch (GET_MODE (operands[0]))
2528 case SImode:
2529 func = "_Qp_qtoi";
2530 break;
2531 case DImode:
2532 func = "_Qp_qtox";
2533 break;
2534 default:
2535 gcc_unreachable ();
2537 break;
2539 case UNSIGNED_FIX:
2540 switch (GET_MODE (operands[0]))
2542 case SImode:
2543 func = "_Qp_qtoui";
2544 break;
2545 case DImode:
2546 func = "_Qp_qtoux";
2547 break;
2548 default:
2549 gcc_unreachable ();
2551 break;
2553 default:
2554 gcc_unreachable ();
2557 emit_soft_tfmode_libcall (func, 2, operands);
2560 /* Expand a hard-float tfmode operation. All arguments must be in
2561 registers. */
2563 static void
2564 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
2566 rtx op, dest;
2568 if (GET_RTX_CLASS (code) == RTX_UNARY)
2570 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2571 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
2573 else
2575 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2576 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
2577 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
2578 operands[1], operands[2]);
2581 if (register_operand (operands[0], VOIDmode))
2582 dest = operands[0];
2583 else
2584 dest = gen_reg_rtx (GET_MODE (operands[0]));
2586 emit_insn (gen_rtx_SET (VOIDmode, dest, op));
2588 if (dest != operands[0])
2589 emit_move_insn (operands[0], dest);
2592 void
2593 emit_tfmode_binop (enum rtx_code code, rtx *operands)
2595 if (TARGET_HARD_QUAD)
2596 emit_hard_tfmode_operation (code, operands);
2597 else
2598 emit_soft_tfmode_binop (code, operands);
2601 void
2602 emit_tfmode_unop (enum rtx_code code, rtx *operands)
2604 if (TARGET_HARD_QUAD)
2605 emit_hard_tfmode_operation (code, operands);
2606 else
2607 emit_soft_tfmode_unop (code, operands);
2610 void
2611 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
2613 if (TARGET_HARD_QUAD)
2614 emit_hard_tfmode_operation (code, operands);
2615 else
2616 emit_soft_tfmode_cvt (code, operands);
2619 /* Return nonzero if a branch/jump/call instruction will be emitting
2620 nop into its delay slot. */
2623 empty_delay_slot (rtx insn)
2625 rtx seq;
2627 /* If no previous instruction (should not happen), return true. */
2628 if (PREV_INSN (insn) == NULL)
2629 return 1;
2631 seq = NEXT_INSN (PREV_INSN (insn));
2632 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
2633 return 0;
2635 return 1;
2638 /* Return nonzero if TRIAL can go into the call delay slot. */
2641 tls_call_delay (rtx trial)
2643 rtx pat;
2645 /* Binutils allows
2646 call __tls_get_addr, %tgd_call (foo)
2647 add %l7, %o0, %o0, %tgd_add (foo)
2648 while Sun as/ld does not. */
2649 if (TARGET_GNU_TLS || !TARGET_TLS)
2650 return 1;
2652 pat = PATTERN (trial);
2654 /* We must reject tgd_add{32|64}, i.e.
2655 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
2656 and tldm_add{32|64}, i.e.
2657 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
2658 for Sun as/ld. */
2659 if (GET_CODE (pat) == SET
2660 && GET_CODE (SET_SRC (pat)) == PLUS)
2662 rtx unspec = XEXP (SET_SRC (pat), 1);
2664 if (GET_CODE (unspec) == UNSPEC
2665 && (XINT (unspec, 1) == UNSPEC_TLSGD
2666 || XINT (unspec, 1) == UNSPEC_TLSLDM))
2667 return 0;
2670 return 1;
2673 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
2674 instruction. RETURN_P is true if the v9 variant 'return' is to be
2675 considered in the test too.
2677 TRIAL must be a SET whose destination is a REG appropriate for the
2678 'restore' instruction or, if RETURN_P is true, for the 'return'
2679 instruction. */
2681 static int
2682 eligible_for_restore_insn (rtx trial, bool return_p)
2684 rtx pat = PATTERN (trial);
2685 rtx src = SET_SRC (pat);
2687 /* The 'restore src,%g0,dest' pattern for word mode and below. */
2688 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2689 && arith_operand (src, GET_MODE (src)))
2691 if (TARGET_ARCH64)
2692 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2693 else
2694 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
2697 /* The 'restore src,%g0,dest' pattern for double-word mode. */
2698 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2699 && arith_double_operand (src, GET_MODE (src)))
2700 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2702 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
2703 else if (! TARGET_FPU && register_operand (src, SFmode))
2704 return 1;
2706 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
2707 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
2708 return 1;
2710 /* If we have the 'return' instruction, anything that does not use
2711 local or output registers and can go into a delay slot wins. */
2712 else if (return_p && TARGET_V9 && ! epilogue_renumber (&pat, 1)
2713 && (get_attr_in_uncond_branch_delay (trial)
2714 == IN_UNCOND_BRANCH_DELAY_TRUE))
2715 return 1;
2717 /* The 'restore src1,src2,dest' pattern for SImode. */
2718 else if (GET_CODE (src) == PLUS
2719 && register_operand (XEXP (src, 0), SImode)
2720 && arith_operand (XEXP (src, 1), SImode))
2721 return 1;
2723 /* The 'restore src1,src2,dest' pattern for DImode. */
2724 else if (GET_CODE (src) == PLUS
2725 && register_operand (XEXP (src, 0), DImode)
2726 && arith_double_operand (XEXP (src, 1), DImode))
2727 return 1;
2729 /* The 'restore src1,%lo(src2),dest' pattern. */
2730 else if (GET_CODE (src) == LO_SUM
2731 && ! TARGET_CM_MEDMID
2732 && ((register_operand (XEXP (src, 0), SImode)
2733 && immediate_operand (XEXP (src, 1), SImode))
2734 || (TARGET_ARCH64
2735 && register_operand (XEXP (src, 0), DImode)
2736 && immediate_operand (XEXP (src, 1), DImode))))
2737 return 1;
2739 /* The 'restore src,src,dest' pattern. */
2740 else if (GET_CODE (src) == ASHIFT
2741 && (register_operand (XEXP (src, 0), SImode)
2742 || register_operand (XEXP (src, 0), DImode))
2743 && XEXP (src, 1) == const1_rtx)
2744 return 1;
2746 return 0;
2749 /* Return nonzero if TRIAL can go into the function return's
2750 delay slot. */
2753 eligible_for_return_delay (rtx trial)
2755 rtx pat;
2757 if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2758 return 0;
2760 if (get_attr_length (trial) != 1)
2761 return 0;
2763 /* If there are any call-saved registers, we should scan TRIAL if it
2764 does not reference them. For now just make it easy. */
2765 if (num_gfregs)
2766 return 0;
2768 /* If the function uses __builtin_eh_return, the eh_return machinery
2769 occupies the delay slot. */
2770 if (crtl->calls_eh_return)
2771 return 0;
2773 /* In the case of a true leaf function, anything can go into the slot. */
2774 if (sparc_leaf_function_p)
2775 return get_attr_in_uncond_branch_delay (trial)
2776 == IN_UNCOND_BRANCH_DELAY_TRUE;
2778 pat = PATTERN (trial);
2780 /* Otherwise, only operations which can be done in tandem with
2781 a `restore' or `return' insn can go into the delay slot. */
2782 if (GET_CODE (SET_DEST (pat)) != REG
2783 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24))
2784 return 0;
2786 /* If this instruction sets up floating point register and we have a return
2787 instruction, it can probably go in. But restore will not work
2788 with FP_REGS. */
2789 if (REGNO (SET_DEST (pat)) >= 32)
2790 return (TARGET_V9
2791 && ! epilogue_renumber (&pat, 1)
2792 && (get_attr_in_uncond_branch_delay (trial)
2793 == IN_UNCOND_BRANCH_DELAY_TRUE));
2795 return eligible_for_restore_insn (trial, true);
2798 /* Return nonzero if TRIAL can go into the sibling call's
2799 delay slot. */
2802 eligible_for_sibcall_delay (rtx trial)
2804 rtx pat;
2806 if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2807 return 0;
2809 if (get_attr_length (trial) != 1)
2810 return 0;
2812 pat = PATTERN (trial);
2814 if (sparc_leaf_function_p)
2816 /* If the tail call is done using the call instruction,
2817 we have to restore %o7 in the delay slot. */
2818 if (LEAF_SIBCALL_SLOT_RESERVED_P)
2819 return 0;
2821 /* %g1 is used to build the function address */
2822 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
2823 return 0;
2825 return 1;
2828 /* Otherwise, only operations which can be done in tandem with
2829 a `restore' insn can go into the delay slot. */
2830 if (GET_CODE (SET_DEST (pat)) != REG
2831 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
2832 || REGNO (SET_DEST (pat)) >= 32)
2833 return 0;
2835 /* If it mentions %o7, it can't go in, because sibcall will clobber it
2836 in most cases. */
2837 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
2838 return 0;
2840 return eligible_for_restore_insn (trial, false);
2844 short_branch (int uid1, int uid2)
2846 int delta = INSN_ADDRESSES (uid1) - INSN_ADDRESSES (uid2);
2848 /* Leave a few words of "slop". */
2849 if (delta >= -1023 && delta <= 1022)
2850 return 1;
2852 return 0;
2855 /* Return nonzero if REG is not used after INSN.
2856 We assume REG is a reload reg, and therefore does
2857 not live past labels or calls or jumps. */
2859 reg_unused_after (rtx reg, rtx insn)
2861 enum rtx_code code, prev_code = UNKNOWN;
2863 while ((insn = NEXT_INSN (insn)))
2865 if (prev_code == CALL_INSN && call_used_regs[REGNO (reg)])
2866 return 1;
2868 code = GET_CODE (insn);
2869 if (GET_CODE (insn) == CODE_LABEL)
2870 return 1;
2872 if (INSN_P (insn))
2874 rtx set = single_set (insn);
2875 int in_src = set && reg_overlap_mentioned_p (reg, SET_SRC (set));
2876 if (set && in_src)
2877 return 0;
2878 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
2879 return 1;
2880 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
2881 return 0;
2883 prev_code = code;
2885 return 1;
2888 /* Determine if it's legal to put X into the constant pool. This
2889 is not possible if X contains the address of a symbol that is
2890 not constant (TLS) or not known at final link time (PIC). */
2892 static bool
2893 sparc_cannot_force_const_mem (rtx x)
2895 switch (GET_CODE (x))
2897 case CONST_INT:
2898 case CONST_DOUBLE:
2899 case CONST_VECTOR:
2900 /* Accept all non-symbolic constants. */
2901 return false;
2903 case LABEL_REF:
2904 /* Labels are OK iff we are non-PIC. */
2905 return flag_pic != 0;
2907 case SYMBOL_REF:
2908 /* 'Naked' TLS symbol references are never OK,
2909 non-TLS symbols are OK iff we are non-PIC. */
2910 if (SYMBOL_REF_TLS_MODEL (x))
2911 return true;
2912 else
2913 return flag_pic != 0;
2915 case CONST:
2916 return sparc_cannot_force_const_mem (XEXP (x, 0));
2917 case PLUS:
2918 case MINUS:
2919 return sparc_cannot_force_const_mem (XEXP (x, 0))
2920 || sparc_cannot_force_const_mem (XEXP (x, 1));
2921 case UNSPEC:
2922 return true;
2923 default:
2924 gcc_unreachable ();
2928 /* PIC support. */
2929 static GTY(()) char pic_helper_symbol_name[256];
2930 static GTY(()) rtx pic_helper_symbol;
2931 static GTY(()) bool pic_helper_emitted_p = false;
2932 static GTY(()) rtx global_offset_table;
2934 /* Ensure that we are not using patterns that are not OK with PIC. */
2937 check_pic (int i)
2939 switch (flag_pic)
2941 case 1:
2942 gcc_assert (GET_CODE (recog_data.operand[i]) != SYMBOL_REF
2943 && (GET_CODE (recog_data.operand[i]) != CONST
2944 || (GET_CODE (XEXP (recog_data.operand[i], 0)) == MINUS
2945 && (XEXP (XEXP (recog_data.operand[i], 0), 0)
2946 == global_offset_table)
2947 && (GET_CODE (XEXP (XEXP (recog_data.operand[i], 0), 1))
2948 == CONST))));
2949 case 2:
2950 default:
2951 return 1;
2955 /* Return true if X is an address which needs a temporary register when
2956 reloaded while generating PIC code. */
2959 pic_address_needs_scratch (rtx x)
2961 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
2962 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
2963 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
2964 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2965 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
2966 return 1;
2968 return 0;
2971 /* Determine if a given RTX is a valid constant. We already know this
2972 satisfies CONSTANT_P. */
2974 bool
2975 legitimate_constant_p (rtx x)
2977 rtx inner;
2979 switch (GET_CODE (x))
2981 case SYMBOL_REF:
2982 /* TLS symbols are not constant. */
2983 if (SYMBOL_REF_TLS_MODEL (x))
2984 return false;
2985 break;
2987 case CONST:
2988 inner = XEXP (x, 0);
2990 /* Offsets of TLS symbols are never valid.
2991 Discourage CSE from creating them. */
2992 if (GET_CODE (inner) == PLUS
2993 && SPARC_SYMBOL_REF_TLS_P (XEXP (inner, 0)))
2994 return false;
2995 break;
2997 case CONST_DOUBLE:
2998 if (GET_MODE (x) == VOIDmode)
2999 return true;
3001 /* Floating point constants are generally not ok.
3002 The only exception is 0.0 in VIS. */
3003 if (TARGET_VIS
3004 && SCALAR_FLOAT_MODE_P (GET_MODE (x))
3005 && const_zero_operand (x, GET_MODE (x)))
3006 return true;
3008 return false;
3010 case CONST_VECTOR:
3011 /* Vector constants are generally not ok.
3012 The only exception is 0 in VIS. */
3013 if (TARGET_VIS
3014 && const_zero_operand (x, GET_MODE (x)))
3015 return true;
3017 return false;
3019 default:
3020 break;
3023 return true;
3026 /* Determine if a given RTX is a valid constant address. */
3028 bool
3029 constant_address_p (rtx x)
3031 switch (GET_CODE (x))
3033 case LABEL_REF:
3034 case CONST_INT:
3035 case HIGH:
3036 return true;
3038 case CONST:
3039 if (flag_pic && pic_address_needs_scratch (x))
3040 return false;
3041 return legitimate_constant_p (x);
3043 case SYMBOL_REF:
3044 return !flag_pic && legitimate_constant_p (x);
3046 default:
3047 return false;
3051 /* Nonzero if the constant value X is a legitimate general operand
3052 when generating PIC code. It is given that flag_pic is on and
3053 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
3055 bool
3056 legitimate_pic_operand_p (rtx x)
3058 if (pic_address_needs_scratch (x))
3059 return false;
3060 if (SPARC_SYMBOL_REF_TLS_P (x)
3061 || (GET_CODE (x) == CONST
3062 && GET_CODE (XEXP (x, 0)) == PLUS
3063 && SPARC_SYMBOL_REF_TLS_P (XEXP (XEXP (x, 0), 0))))
3064 return false;
3065 return true;
3068 /* Return nonzero if ADDR is a valid memory address.
3069 STRICT specifies whether strict register checking applies. */
3071 static bool
3072 sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3074 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3076 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3077 rs1 = addr;
3078 else if (GET_CODE (addr) == PLUS)
3080 rs1 = XEXP (addr, 0);
3081 rs2 = XEXP (addr, 1);
3083 /* Canonicalize. REG comes first, if there are no regs,
3084 LO_SUM comes first. */
3085 if (!REG_P (rs1)
3086 && GET_CODE (rs1) != SUBREG
3087 && (REG_P (rs2)
3088 || GET_CODE (rs2) == SUBREG
3089 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3091 rs1 = XEXP (addr, 1);
3092 rs2 = XEXP (addr, 0);
3095 if ((flag_pic == 1
3096 && rs1 == pic_offset_table_rtx
3097 && !REG_P (rs2)
3098 && GET_CODE (rs2) != SUBREG
3099 && GET_CODE (rs2) != LO_SUM
3100 && GET_CODE (rs2) != MEM
3101 && ! SPARC_SYMBOL_REF_TLS_P (rs2)
3102 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3103 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3104 || ((REG_P (rs1)
3105 || GET_CODE (rs1) == SUBREG)
3106 && RTX_OK_FOR_OFFSET_P (rs2)))
3108 imm1 = rs2;
3109 rs2 = NULL;
3111 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3112 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3114 /* We prohibit REG + REG for TFmode when there are no quad move insns
3115 and we consequently need to split. We do this because REG+REG
3116 is not an offsettable address. If we get the situation in reload
3117 where source and destination of a movtf pattern are both MEMs with
3118 REG+REG address, then only one of them gets converted to an
3119 offsettable address. */
3120 if (mode == TFmode
3121 && ! (TARGET_FPU && TARGET_ARCH64 && TARGET_HARD_QUAD))
3122 return 0;
3124 /* We prohibit REG + REG on ARCH32 if not optimizing for
3125 DFmode/DImode because then mem_min_alignment is likely to be zero
3126 after reload and the forced split would lack a matching splitter
3127 pattern. */
3128 if (TARGET_ARCH32 && !optimize
3129 && (mode == DFmode || mode == DImode))
3130 return 0;
3132 else if (USE_AS_OFFSETABLE_LO10
3133 && GET_CODE (rs1) == LO_SUM
3134 && TARGET_ARCH64
3135 && ! TARGET_CM_MEDMID
3136 && RTX_OK_FOR_OLO10_P (rs2))
3138 rs2 = NULL;
3139 imm1 = XEXP (rs1, 1);
3140 rs1 = XEXP (rs1, 0);
3141 if (! CONSTANT_P (imm1) || SPARC_SYMBOL_REF_TLS_P (rs1))
3142 return 0;
3145 else if (GET_CODE (addr) == LO_SUM)
3147 rs1 = XEXP (addr, 0);
3148 imm1 = XEXP (addr, 1);
3150 if (! CONSTANT_P (imm1) || SPARC_SYMBOL_REF_TLS_P (rs1))
3151 return 0;
3153 /* We can't allow TFmode in 32-bit mode, because an offset greater
3154 than the alignment (8) may cause the LO_SUM to overflow. */
3155 if (mode == TFmode && TARGET_ARCH32)
3156 return 0;
3158 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
3159 return 1;
3160 else
3161 return 0;
3163 if (GET_CODE (rs1) == SUBREG)
3164 rs1 = SUBREG_REG (rs1);
3165 if (!REG_P (rs1))
3166 return 0;
3168 if (rs2)
3170 if (GET_CODE (rs2) == SUBREG)
3171 rs2 = SUBREG_REG (rs2);
3172 if (!REG_P (rs2))
3173 return 0;
3176 if (strict)
3178 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
3179 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
3180 return 0;
3182 else
3184 if ((REGNO (rs1) >= 32
3185 && REGNO (rs1) != FRAME_POINTER_REGNUM
3186 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
3187 || (rs2
3188 && (REGNO (rs2) >= 32
3189 && REGNO (rs2) != FRAME_POINTER_REGNUM
3190 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
3191 return 0;
3193 return 1;
3196 /* Construct the SYMBOL_REF for the tls_get_offset function. */
3198 static GTY(()) rtx sparc_tls_symbol;
3200 static rtx
3201 sparc_tls_get_addr (void)
3203 if (!sparc_tls_symbol)
3204 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
3206 return sparc_tls_symbol;
3209 static rtx
3210 sparc_tls_got (void)
3212 rtx temp;
3213 if (flag_pic)
3215 crtl->uses_pic_offset_table = 1;
3216 return pic_offset_table_rtx;
3219 if (!global_offset_table)
3220 global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3221 temp = gen_reg_rtx (Pmode);
3222 emit_move_insn (temp, global_offset_table);
3223 return temp;
3226 /* Return 1 if *X is a thread-local symbol. */
3228 static int
3229 sparc_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
3231 return SPARC_SYMBOL_REF_TLS_P (*x);
3234 /* Return 1 if X contains a thread-local symbol. */
3236 bool
3237 sparc_tls_referenced_p (rtx x)
3239 if (!TARGET_HAVE_TLS)
3240 return false;
3242 return for_each_rtx (&x, &sparc_tls_symbol_ref_1, 0);
3245 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
3246 this (thread-local) address. */
3249 legitimize_tls_address (rtx addr)
3251 rtx temp1, temp2, temp3, ret, o0, got, insn;
3253 gcc_assert (can_create_pseudo_p ());
3255 if (GET_CODE (addr) == SYMBOL_REF)
3256 switch (SYMBOL_REF_TLS_MODEL (addr))
3258 case TLS_MODEL_GLOBAL_DYNAMIC:
3259 start_sequence ();
3260 temp1 = gen_reg_rtx (SImode);
3261 temp2 = gen_reg_rtx (SImode);
3262 ret = gen_reg_rtx (Pmode);
3263 o0 = gen_rtx_REG (Pmode, 8);
3264 got = sparc_tls_got ();
3265 emit_insn (gen_tgd_hi22 (temp1, addr));
3266 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
3267 if (TARGET_ARCH32)
3269 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
3270 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
3271 addr, const1_rtx));
3273 else
3275 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
3276 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
3277 addr, const1_rtx));
3279 CALL_INSN_FUNCTION_USAGE (insn)
3280 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, o0),
3281 CALL_INSN_FUNCTION_USAGE (insn));
3282 insn = get_insns ();
3283 end_sequence ();
3284 emit_libcall_block (insn, ret, o0, addr);
3285 break;
3287 case TLS_MODEL_LOCAL_DYNAMIC:
3288 start_sequence ();
3289 temp1 = gen_reg_rtx (SImode);
3290 temp2 = gen_reg_rtx (SImode);
3291 temp3 = gen_reg_rtx (Pmode);
3292 ret = gen_reg_rtx (Pmode);
3293 o0 = gen_rtx_REG (Pmode, 8);
3294 got = sparc_tls_got ();
3295 emit_insn (gen_tldm_hi22 (temp1));
3296 emit_insn (gen_tldm_lo10 (temp2, temp1));
3297 if (TARGET_ARCH32)
3299 emit_insn (gen_tldm_add32 (o0, got, temp2));
3300 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
3301 const1_rtx));
3303 else
3305 emit_insn (gen_tldm_add64 (o0, got, temp2));
3306 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
3307 const1_rtx));
3309 CALL_INSN_FUNCTION_USAGE (insn)
3310 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, o0),
3311 CALL_INSN_FUNCTION_USAGE (insn));
3312 insn = get_insns ();
3313 end_sequence ();
3314 emit_libcall_block (insn, temp3, o0,
3315 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
3316 UNSPEC_TLSLD_BASE));
3317 temp1 = gen_reg_rtx (SImode);
3318 temp2 = gen_reg_rtx (SImode);
3319 emit_insn (gen_tldo_hix22 (temp1, addr));
3320 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
3321 if (TARGET_ARCH32)
3322 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
3323 else
3324 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
3325 break;
3327 case TLS_MODEL_INITIAL_EXEC:
3328 temp1 = gen_reg_rtx (SImode);
3329 temp2 = gen_reg_rtx (SImode);
3330 temp3 = gen_reg_rtx (Pmode);
3331 got = sparc_tls_got ();
3332 emit_insn (gen_tie_hi22 (temp1, addr));
3333 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
3334 if (TARGET_ARCH32)
3335 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
3336 else
3337 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
3338 if (TARGET_SUN_TLS)
3340 ret = gen_reg_rtx (Pmode);
3341 if (TARGET_ARCH32)
3342 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
3343 temp3, addr));
3344 else
3345 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
3346 temp3, addr));
3348 else
3349 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
3350 break;
3352 case TLS_MODEL_LOCAL_EXEC:
3353 temp1 = gen_reg_rtx (Pmode);
3354 temp2 = gen_reg_rtx (Pmode);
3355 if (TARGET_ARCH32)
3357 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
3358 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
3360 else
3362 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
3363 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
3365 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
3366 break;
3368 default:
3369 gcc_unreachable ();
3372 else
3373 gcc_unreachable (); /* for now ... */
3375 return ret;
3379 /* Legitimize PIC addresses. If the address is already position-independent,
3380 we return ORIG. Newly generated position-independent addresses go into a
3381 reg. This is REG if nonzero, otherwise we allocate register(s) as
3382 necessary. */
3385 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
3386 rtx reg)
3388 if (GET_CODE (orig) == SYMBOL_REF
3389 /* See the comment in sparc_expand_move. */
3390 || (TARGET_VXWORKS_RTP && GET_CODE (orig) == LABEL_REF))
3392 rtx pic_ref, address;
3393 rtx insn;
3395 if (reg == 0)
3397 gcc_assert (! reload_in_progress && ! reload_completed);
3398 reg = gen_reg_rtx (Pmode);
3401 if (flag_pic == 2)
3403 /* If not during reload, allocate another temp reg here for loading
3404 in the address, so that these instructions can be optimized
3405 properly. */
3406 rtx temp_reg = ((reload_in_progress || reload_completed)
3407 ? reg : gen_reg_rtx (Pmode));
3409 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
3410 won't get confused into thinking that these two instructions
3411 are loading in the true address of the symbol. If in the
3412 future a PIC rtx exists, that should be used instead. */
3413 if (TARGET_ARCH64)
3415 emit_insn (gen_movdi_high_pic (temp_reg, orig));
3416 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
3418 else
3420 emit_insn (gen_movsi_high_pic (temp_reg, orig));
3421 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
3423 address = temp_reg;
3425 else
3426 address = orig;
3428 pic_ref = gen_const_mem (Pmode,
3429 gen_rtx_PLUS (Pmode,
3430 pic_offset_table_rtx, address));
3431 crtl->uses_pic_offset_table = 1;
3432 insn = emit_move_insn (reg, pic_ref);
3433 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3434 by loop. */
3435 set_unique_reg_note (insn, REG_EQUAL, orig);
3436 return reg;
3438 else if (GET_CODE (orig) == CONST)
3440 rtx base, offset;
3442 if (GET_CODE (XEXP (orig, 0)) == PLUS
3443 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
3444 return orig;
3446 if (reg == 0)
3448 gcc_assert (! reload_in_progress && ! reload_completed);
3449 reg = gen_reg_rtx (Pmode);
3452 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3453 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3454 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3455 base == reg ? 0 : reg);
3457 if (GET_CODE (offset) == CONST_INT)
3459 if (SMALL_INT (offset))
3460 return plus_constant (base, INTVAL (offset));
3461 else if (! reload_in_progress && ! reload_completed)
3462 offset = force_reg (Pmode, offset);
3463 else
3464 /* If we reach here, then something is seriously wrong. */
3465 gcc_unreachable ();
3467 return gen_rtx_PLUS (Pmode, base, offset);
3469 else if (GET_CODE (orig) == LABEL_REF)
3470 /* ??? Why do we do this? */
3471 /* Now movsi_pic_label_ref uses it, but we ought to be checking that
3472 the register is live instead, in case it is eliminated. */
3473 crtl->uses_pic_offset_table = 1;
3475 return orig;
3478 /* Try machine-dependent ways of modifying an illegitimate address X
3479 to be legitimate. If we find one, return the new, valid address.
3481 OLDX is the address as it was before break_out_memory_refs was called.
3482 In some cases it is useful to look at this to decide what needs to be done.
3484 MODE is the mode of the operand pointed to by X.
3486 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
3489 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3490 enum machine_mode mode)
3492 rtx orig_x = x;
3494 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
3495 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
3496 force_operand (XEXP (x, 0), NULL_RTX));
3497 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
3498 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3499 force_operand (XEXP (x, 1), NULL_RTX));
3500 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
3501 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
3502 XEXP (x, 1));
3503 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
3504 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3505 force_operand (XEXP (x, 1), NULL_RTX));
3507 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
3508 return x;
3510 if (SPARC_SYMBOL_REF_TLS_P (x))
3511 x = legitimize_tls_address (x);
3512 else if (flag_pic)
3513 x = legitimize_pic_address (x, mode, 0);
3514 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
3515 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3516 copy_to_mode_reg (Pmode, XEXP (x, 1)));
3517 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
3518 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
3519 copy_to_mode_reg (Pmode, XEXP (x, 0)));
3520 else if (GET_CODE (x) == SYMBOL_REF
3521 || GET_CODE (x) == CONST
3522 || GET_CODE (x) == LABEL_REF)
3523 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
3524 return x;
3527 /* Emit the special PIC helper function. */
3529 static void
3530 emit_pic_helper (void)
3532 const char *pic_name = reg_names[REGNO (pic_offset_table_rtx)];
3533 int align;
3535 switch_to_section (text_section);
3537 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
3538 if (align > 0)
3539 ASM_OUTPUT_ALIGN (asm_out_file, align);
3540 ASM_OUTPUT_LABEL (asm_out_file, pic_helper_symbol_name);
3541 if (flag_delayed_branch)
3542 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
3543 pic_name, pic_name);
3544 else
3545 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
3546 pic_name, pic_name);
3548 pic_helper_emitted_p = true;
3551 /* Emit code to load the PIC register. */
3553 static void
3554 load_pic_register (bool delay_pic_helper)
3556 int orig_flag_pic = flag_pic;
3558 if (TARGET_VXWORKS_RTP)
3560 emit_insn (gen_vxworks_load_got ());
3561 emit_use (pic_offset_table_rtx);
3562 return;
3565 /* If we haven't initialized the special PIC symbols, do so now. */
3566 if (!pic_helper_symbol_name[0])
3568 ASM_GENERATE_INTERNAL_LABEL (pic_helper_symbol_name, "LADDPC", 0);
3569 pic_helper_symbol = gen_rtx_SYMBOL_REF (Pmode, pic_helper_symbol_name);
3570 global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3573 /* If we haven't emitted the special PIC helper function, do so now unless
3574 we are requested to delay it. */
3575 if (!delay_pic_helper && !pic_helper_emitted_p)
3576 emit_pic_helper ();
3578 flag_pic = 0;
3579 if (TARGET_ARCH64)
3580 emit_insn (gen_load_pcrel_symdi (pic_offset_table_rtx, global_offset_table,
3581 pic_helper_symbol));
3582 else
3583 emit_insn (gen_load_pcrel_symsi (pic_offset_table_rtx, global_offset_table,
3584 pic_helper_symbol));
3585 flag_pic = orig_flag_pic;
3587 /* Need to emit this whether or not we obey regdecls,
3588 since setjmp/longjmp can cause life info to screw up.
3589 ??? In the case where we don't obey regdecls, this is not sufficient
3590 since we may not fall out the bottom. */
3591 emit_use (pic_offset_table_rtx);
3594 /* Emit a call instruction with the pattern given by PAT. ADDR is the
3595 address of the call target. */
3597 void
3598 sparc_emit_call_insn (rtx pat, rtx addr)
3600 rtx insn;
3602 insn = emit_call_insn (pat);
3604 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
3605 if (TARGET_VXWORKS_RTP
3606 && flag_pic
3607 && GET_CODE (addr) == SYMBOL_REF
3608 && (SYMBOL_REF_DECL (addr)
3609 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
3610 : !SYMBOL_REF_LOCAL_P (addr)))
3612 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
3613 crtl->uses_pic_offset_table = 1;
3617 /* Return 1 if RTX is a MEM which is known to be aligned to at
3618 least a DESIRED byte boundary. */
3621 mem_min_alignment (rtx mem, int desired)
3623 rtx addr, base, offset;
3625 /* If it's not a MEM we can't accept it. */
3626 if (GET_CODE (mem) != MEM)
3627 return 0;
3629 /* Obviously... */
3630 if (!TARGET_UNALIGNED_DOUBLES
3631 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
3632 return 1;
3634 /* ??? The rest of the function predates MEM_ALIGN so
3635 there is probably a bit of redundancy. */
3636 addr = XEXP (mem, 0);
3637 base = offset = NULL_RTX;
3638 if (GET_CODE (addr) == PLUS)
3640 if (GET_CODE (XEXP (addr, 0)) == REG)
3642 base = XEXP (addr, 0);
3644 /* What we are saying here is that if the base
3645 REG is aligned properly, the compiler will make
3646 sure any REG based index upon it will be so
3647 as well. */
3648 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
3649 offset = XEXP (addr, 1);
3650 else
3651 offset = const0_rtx;
3654 else if (GET_CODE (addr) == REG)
3656 base = addr;
3657 offset = const0_rtx;
3660 if (base != NULL_RTX)
3662 int regno = REGNO (base);
3664 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
3666 /* Check if the compiler has recorded some information
3667 about the alignment of the base REG. If reload has
3668 completed, we already matched with proper alignments.
3669 If not running global_alloc, reload might give us
3670 unaligned pointer to local stack though. */
3671 if (((cfun != 0
3672 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
3673 || (optimize && reload_completed))
3674 && (INTVAL (offset) & (desired - 1)) == 0)
3675 return 1;
3677 else
3679 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
3680 return 1;
3683 else if (! TARGET_UNALIGNED_DOUBLES
3684 || CONSTANT_P (addr)
3685 || GET_CODE (addr) == LO_SUM)
3687 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
3688 is true, in which case we can only assume that an access is aligned if
3689 it is to a constant address, or the address involves a LO_SUM. */
3690 return 1;
3693 /* An obviously unaligned address. */
3694 return 0;
3698 /* Vectors to keep interesting information about registers where it can easily
3699 be got. We used to use the actual mode value as the bit number, but there
3700 are more than 32 modes now. Instead we use two tables: one indexed by
3701 hard register number, and one indexed by mode. */
3703 /* The purpose of sparc_mode_class is to shrink the range of modes so that
3704 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
3705 mapped into one sparc_mode_class mode. */
3707 enum sparc_mode_class {
3708 S_MODE, D_MODE, T_MODE, O_MODE,
3709 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
3710 CC_MODE, CCFP_MODE
3713 /* Modes for single-word and smaller quantities. */
3714 #define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
3716 /* Modes for double-word and smaller quantities. */
3717 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
3719 /* Modes for quad-word and smaller quantities. */
3720 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
3722 /* Modes for 8-word and smaller quantities. */
3723 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
3725 /* Modes for single-float quantities. We must allow any single word or
3726 smaller quantity. This is because the fix/float conversion instructions
3727 take integer inputs/outputs from the float registers. */
3728 #define SF_MODES (S_MODES)
3730 /* Modes for double-float and smaller quantities. */
3731 #define DF_MODES (S_MODES | D_MODES)
3733 /* Modes for double-float only quantities. */
3734 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
3736 /* Modes for quad-float only quantities. */
3737 #define TF_ONLY_MODES (1 << (int) TF_MODE)
3739 /* Modes for quad-float and smaller quantities. */
3740 #define TF_MODES (DF_MODES | TF_ONLY_MODES)
3742 /* Modes for quad-float and double-float quantities. */
3743 #define TF_MODES_NO_S (DF_MODES_NO_S | TF_ONLY_MODES)
3745 /* Modes for quad-float pair only quantities. */
3746 #define OF_ONLY_MODES (1 << (int) OF_MODE)
3748 /* Modes for quad-float pairs and smaller quantities. */
3749 #define OF_MODES (TF_MODES | OF_ONLY_MODES)
3751 #define OF_MODES_NO_S (TF_MODES_NO_S | OF_ONLY_MODES)
3753 /* Modes for condition codes. */
3754 #define CC_MODES (1 << (int) CC_MODE)
3755 #define CCFP_MODES (1 << (int) CCFP_MODE)
3757 /* Value is 1 if register/mode pair is acceptable on sparc.
3758 The funny mixture of D and T modes is because integer operations
3759 do not specially operate on tetra quantities, so non-quad-aligned
3760 registers can hold quadword quantities (except %o4 and %i4 because
3761 they cross fixed registers). */
3763 /* This points to either the 32 bit or the 64 bit version. */
3764 const int *hard_regno_mode_classes;
3766 static const int hard_32bit_mode_classes[] = {
3767 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
3768 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
3769 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
3770 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
3772 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3773 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3774 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3775 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
3777 /* FP regs f32 to f63. Only the even numbered registers actually exist,
3778 and none can hold SFmode/SImode values. */
3779 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3780 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3781 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3782 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3784 /* %fcc[0123] */
3785 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
3787 /* %icc */
3788 CC_MODES
3791 static const int hard_64bit_mode_classes[] = {
3792 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3793 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3794 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3795 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3797 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3798 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3799 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3800 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
3802 /* FP regs f32 to f63. Only the even numbered registers actually exist,
3803 and none can hold SFmode/SImode values. */
3804 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3805 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3806 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3807 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3809 /* %fcc[0123] */
3810 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
3812 /* %icc */
3813 CC_MODES
3816 int sparc_mode_class [NUM_MACHINE_MODES];
3818 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
3820 static void
3821 sparc_init_modes (void)
3823 int i;
3825 for (i = 0; i < NUM_MACHINE_MODES; i++)
3827 switch (GET_MODE_CLASS (i))
3829 case MODE_INT:
3830 case MODE_PARTIAL_INT:
3831 case MODE_COMPLEX_INT:
3832 if (GET_MODE_SIZE (i) <= 4)
3833 sparc_mode_class[i] = 1 << (int) S_MODE;
3834 else if (GET_MODE_SIZE (i) == 8)
3835 sparc_mode_class[i] = 1 << (int) D_MODE;
3836 else if (GET_MODE_SIZE (i) == 16)
3837 sparc_mode_class[i] = 1 << (int) T_MODE;
3838 else if (GET_MODE_SIZE (i) == 32)
3839 sparc_mode_class[i] = 1 << (int) O_MODE;
3840 else
3841 sparc_mode_class[i] = 0;
3842 break;
3843 case MODE_VECTOR_INT:
3844 if (GET_MODE_SIZE (i) <= 4)
3845 sparc_mode_class[i] = 1 << (int)SF_MODE;
3846 else if (GET_MODE_SIZE (i) == 8)
3847 sparc_mode_class[i] = 1 << (int)DF_MODE;
3848 break;
3849 case MODE_FLOAT:
3850 case MODE_COMPLEX_FLOAT:
3851 if (GET_MODE_SIZE (i) <= 4)
3852 sparc_mode_class[i] = 1 << (int) SF_MODE;
3853 else if (GET_MODE_SIZE (i) == 8)
3854 sparc_mode_class[i] = 1 << (int) DF_MODE;
3855 else if (GET_MODE_SIZE (i) == 16)
3856 sparc_mode_class[i] = 1 << (int) TF_MODE;
3857 else if (GET_MODE_SIZE (i) == 32)
3858 sparc_mode_class[i] = 1 << (int) OF_MODE;
3859 else
3860 sparc_mode_class[i] = 0;
3861 break;
3862 case MODE_CC:
3863 if (i == (int) CCFPmode || i == (int) CCFPEmode)
3864 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
3865 else
3866 sparc_mode_class[i] = 1 << (int) CC_MODE;
3867 break;
3868 default:
3869 sparc_mode_class[i] = 0;
3870 break;
3874 if (TARGET_ARCH64)
3875 hard_regno_mode_classes = hard_64bit_mode_classes;
3876 else
3877 hard_regno_mode_classes = hard_32bit_mode_classes;
3879 /* Initialize the array used by REGNO_REG_CLASS. */
3880 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3882 if (i < 16 && TARGET_V8PLUS)
3883 sparc_regno_reg_class[i] = I64_REGS;
3884 else if (i < 32 || i == FRAME_POINTER_REGNUM)
3885 sparc_regno_reg_class[i] = GENERAL_REGS;
3886 else if (i < 64)
3887 sparc_regno_reg_class[i] = FP_REGS;
3888 else if (i < 96)
3889 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
3890 else if (i < 100)
3891 sparc_regno_reg_class[i] = FPCC_REGS;
3892 else
3893 sparc_regno_reg_class[i] = NO_REGS;
3897 /* Compute the frame size required by the function. This function is called
3898 during the reload pass and also by sparc_expand_prologue. */
3900 HOST_WIDE_INT
3901 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function_p)
3903 int outgoing_args_size = (crtl->outgoing_args_size
3904 + REG_PARM_STACK_SPACE (current_function_decl));
3905 int n_regs = 0; /* N_REGS is the number of 4-byte regs saved thus far. */
3906 int i;
3908 if (TARGET_ARCH64)
3910 for (i = 0; i < 8; i++)
3911 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3912 n_regs += 2;
3914 else
3916 for (i = 0; i < 8; i += 2)
3917 if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
3918 || (df_regs_ever_live_p (i+1) && ! call_used_regs[i+1]))
3919 n_regs += 2;
3922 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
3923 if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
3924 || (df_regs_ever_live_p (i+1) && ! call_used_regs[i+1]))
3925 n_regs += 2;
3927 /* Set up values for use in prologue and epilogue. */
3928 num_gfregs = n_regs;
3930 if (leaf_function_p
3931 && n_regs == 0
3932 && size == 0
3933 && crtl->outgoing_args_size == 0)
3934 actual_fsize = apparent_fsize = 0;
3935 else
3937 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
3938 apparent_fsize = (size - STARTING_FRAME_OFFSET + 7) & -8;
3939 apparent_fsize += n_regs * 4;
3940 actual_fsize = apparent_fsize + ((outgoing_args_size + 7) & -8);
3943 /* Make sure nothing can clobber our register windows.
3944 If a SAVE must be done, or there is a stack-local variable,
3945 the register window area must be allocated. */
3946 if (! leaf_function_p || size > 0)
3947 actual_fsize += FIRST_PARM_OFFSET (current_function_decl);
3949 return SPARC_STACK_ALIGN (actual_fsize);
3952 /* Output any necessary .register pseudo-ops. */
3954 void
3955 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
3957 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
3958 int i;
3960 if (TARGET_ARCH32)
3961 return;
3963 /* Check if %g[2367] were used without
3964 .register being printed for them already. */
3965 for (i = 2; i < 8; i++)
3967 if (df_regs_ever_live_p (i)
3968 && ! sparc_hard_reg_printed [i])
3970 sparc_hard_reg_printed [i] = 1;
3971 /* %g7 is used as TLS base register, use #ignore
3972 for it instead of #scratch. */
3973 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
3974 i == 7 ? "ignore" : "scratch");
3976 if (i == 3) i = 5;
3978 #endif
3981 /* Save/restore call-saved registers from LOW to HIGH at BASE+OFFSET
3982 as needed. LOW should be double-word aligned for 32-bit registers.
3983 Return the new OFFSET. */
3985 #define SORR_SAVE 0
3986 #define SORR_RESTORE 1
3988 static int
3989 save_or_restore_regs (int low, int high, rtx base, int offset, int action)
3991 rtx mem, insn;
3992 int i;
3994 if (TARGET_ARCH64 && high <= 32)
3996 for (i = low; i < high; i++)
3998 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4000 mem = gen_rtx_MEM (DImode, plus_constant (base, offset));
4001 set_mem_alias_set (mem, sparc_sr_alias_set);
4002 if (action == SORR_SAVE)
4004 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
4005 RTX_FRAME_RELATED_P (insn) = 1;
4007 else /* action == SORR_RESTORE */
4008 emit_move_insn (gen_rtx_REG (DImode, i), mem);
4009 offset += 8;
4013 else
4015 for (i = low; i < high; i += 2)
4017 bool reg0 = df_regs_ever_live_p (i) && ! call_used_regs[i];
4018 bool reg1 = df_regs_ever_live_p (i+1) && ! call_used_regs[i+1];
4019 enum machine_mode mode;
4020 int regno;
4022 if (reg0 && reg1)
4024 mode = i < 32 ? DImode : DFmode;
4025 regno = i;
4027 else if (reg0)
4029 mode = i < 32 ? SImode : SFmode;
4030 regno = i;
4032 else if (reg1)
4034 mode = i < 32 ? SImode : SFmode;
4035 regno = i + 1;
4036 offset += 4;
4038 else
4039 continue;
4041 mem = gen_rtx_MEM (mode, plus_constant (base, offset));
4042 set_mem_alias_set (mem, sparc_sr_alias_set);
4043 if (action == SORR_SAVE)
4045 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
4046 RTX_FRAME_RELATED_P (insn) = 1;
4048 else /* action == SORR_RESTORE */
4049 emit_move_insn (gen_rtx_REG (mode, regno), mem);
4051 /* Always preserve double-word alignment. */
4052 offset = (offset + 7) & -8;
4056 return offset;
4059 /* Emit code to save call-saved registers. */
4061 static void
4062 emit_save_or_restore_regs (int action)
4064 HOST_WIDE_INT offset;
4065 rtx base;
4067 offset = frame_base_offset - apparent_fsize;
4069 if (offset < -4096 || offset + num_gfregs * 4 > 4095)
4071 /* ??? This might be optimized a little as %g1 might already have a
4072 value close enough that a single add insn will do. */
4073 /* ??? Although, all of this is probably only a temporary fix
4074 because if %g1 can hold a function result, then
4075 sparc_expand_epilogue will lose (the result will be
4076 clobbered). */
4077 base = gen_rtx_REG (Pmode, 1);
4078 emit_move_insn (base, GEN_INT (offset));
4079 emit_insn (gen_rtx_SET (VOIDmode,
4080 base,
4081 gen_rtx_PLUS (Pmode, frame_base_reg, base)));
4082 offset = 0;
4084 else
4085 base = frame_base_reg;
4087 offset = save_or_restore_regs (0, 8, base, offset, action);
4088 save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, action);
4091 /* Generate a save_register_window insn. */
4093 static rtx
4094 gen_save_register_window (rtx increment)
4096 if (TARGET_ARCH64)
4097 return gen_save_register_windowdi (increment);
4098 else
4099 return gen_save_register_windowsi (increment);
4102 /* Generate an increment for the stack pointer. */
4104 static rtx
4105 gen_stack_pointer_inc (rtx increment)
4107 return gen_rtx_SET (VOIDmode,
4108 stack_pointer_rtx,
4109 gen_rtx_PLUS (Pmode,
4110 stack_pointer_rtx,
4111 increment));
4114 /* Generate a decrement for the stack pointer. */
4116 static rtx
4117 gen_stack_pointer_dec (rtx decrement)
4119 return gen_rtx_SET (VOIDmode,
4120 stack_pointer_rtx,
4121 gen_rtx_MINUS (Pmode,
4122 stack_pointer_rtx,
4123 decrement));
4126 /* Expand the function prologue. The prologue is responsible for reserving
4127 storage for the frame, saving the call-saved registers and loading the
4128 PIC register if needed. */
4130 void
4131 sparc_expand_prologue (void)
4133 rtx insn;
4134 int i;
4136 /* Compute a snapshot of current_function_uses_only_leaf_regs. Relying
4137 on the final value of the flag means deferring the prologue/epilogue
4138 expansion until just before the second scheduling pass, which is too
4139 late to emit multiple epilogues or return insns.
4141 Of course we are making the assumption that the value of the flag
4142 will not change between now and its final value. Of the three parts
4143 of the formula, only the last one can reasonably vary. Let's take a
4144 closer look, after assuming that the first two ones are set to true
4145 (otherwise the last value is effectively silenced).
4147 If only_leaf_regs_used returns false, the global predicate will also
4148 be false so the actual frame size calculated below will be positive.
4149 As a consequence, the save_register_window insn will be emitted in
4150 the instruction stream; now this insn explicitly references %fp
4151 which is not a leaf register so only_leaf_regs_used will always
4152 return false subsequently.
4154 If only_leaf_regs_used returns true, we hope that the subsequent
4155 optimization passes won't cause non-leaf registers to pop up. For
4156 example, the regrename pass has special provisions to not rename to
4157 non-leaf registers in a leaf function. */
4158 sparc_leaf_function_p
4159 = optimize > 0 && leaf_function_p () && only_leaf_regs_used ();
4161 /* Need to use actual_fsize, since we are also allocating
4162 space for our callee (and our own register save area). */
4163 actual_fsize
4164 = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
4166 /* Advertise that the data calculated just above are now valid. */
4167 sparc_prologue_data_valid_p = true;
4169 if (sparc_leaf_function_p)
4171 frame_base_reg = stack_pointer_rtx;
4172 frame_base_offset = actual_fsize + SPARC_STACK_BIAS;
4174 else
4176 frame_base_reg = hard_frame_pointer_rtx;
4177 frame_base_offset = SPARC_STACK_BIAS;
4180 if (actual_fsize == 0)
4181 /* do nothing. */ ;
4182 else if (sparc_leaf_function_p)
4184 if (actual_fsize <= 4096)
4185 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-actual_fsize)));
4186 else if (actual_fsize <= 8192)
4188 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
4189 /* %sp is still the CFA register. */
4190 RTX_FRAME_RELATED_P (insn) = 1;
4191 insn
4192 = emit_insn (gen_stack_pointer_inc (GEN_INT (4096-actual_fsize)));
4194 else
4196 rtx reg = gen_rtx_REG (Pmode, 1);
4197 emit_move_insn (reg, GEN_INT (-actual_fsize));
4198 insn = emit_insn (gen_stack_pointer_inc (reg));
4199 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4200 gen_stack_pointer_inc (GEN_INT (-actual_fsize)));
4203 RTX_FRAME_RELATED_P (insn) = 1;
4205 else
4207 if (actual_fsize <= 4096)
4208 insn = emit_insn (gen_save_register_window (GEN_INT (-actual_fsize)));
4209 else if (actual_fsize <= 8192)
4211 insn = emit_insn (gen_save_register_window (GEN_INT (-4096)));
4212 /* %sp is not the CFA register anymore. */
4213 emit_insn (gen_stack_pointer_inc (GEN_INT (4096-actual_fsize)));
4215 else
4217 rtx reg = gen_rtx_REG (Pmode, 1);
4218 emit_move_insn (reg, GEN_INT (-actual_fsize));
4219 insn = emit_insn (gen_save_register_window (reg));
4222 RTX_FRAME_RELATED_P (insn) = 1;
4223 for (i=0; i < XVECLEN (PATTERN (insn), 0); i++)
4224 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, i)) = 1;
4227 if (num_gfregs)
4228 emit_save_or_restore_regs (SORR_SAVE);
4230 /* Load the PIC register if needed. */
4231 if (flag_pic && crtl->uses_pic_offset_table)
4232 load_pic_register (false);
4235 /* This function generates the assembly code for function entry, which boils
4236 down to emitting the necessary .register directives. */
4238 static void
4239 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4241 /* Check that the assumption we made in sparc_expand_prologue is valid. */
4242 gcc_assert (sparc_leaf_function_p == current_function_uses_only_leaf_regs);
4244 sparc_output_scratch_registers (file);
4247 /* Expand the function epilogue, either normal or part of a sibcall.
4248 We emit all the instructions except the return or the call. */
4250 void
4251 sparc_expand_epilogue (void)
4253 if (num_gfregs)
4254 emit_save_or_restore_regs (SORR_RESTORE);
4256 if (actual_fsize == 0)
4257 /* do nothing. */ ;
4258 else if (sparc_leaf_function_p)
4260 if (actual_fsize <= 4096)
4261 emit_insn (gen_stack_pointer_dec (GEN_INT (- actual_fsize)));
4262 else if (actual_fsize <= 8192)
4264 emit_insn (gen_stack_pointer_dec (GEN_INT (-4096)));
4265 emit_insn (gen_stack_pointer_dec (GEN_INT (4096 - actual_fsize)));
4267 else
4269 rtx reg = gen_rtx_REG (Pmode, 1);
4270 emit_move_insn (reg, GEN_INT (-actual_fsize));
4271 emit_insn (gen_stack_pointer_dec (reg));
4276 /* Return true if it is appropriate to emit `return' instructions in the
4277 body of a function. */
4279 bool
4280 sparc_can_use_return_insn_p (void)
4282 return sparc_prologue_data_valid_p
4283 && (actual_fsize == 0 || !sparc_leaf_function_p);
4286 /* This function generates the assembly code for function exit. */
4288 static void
4289 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4291 /* If code does not drop into the epilogue, we have to still output
4292 a dummy nop for the sake of sane backtraces. Otherwise, if the
4293 last two instructions of a function were "call foo; dslot;" this
4294 can make the return PC of foo (i.e. address of call instruction
4295 plus 8) point to the first instruction in the next function. */
4297 rtx insn, last_real_insn;
4299 insn = get_last_insn ();
4301 last_real_insn = prev_real_insn (insn);
4302 if (last_real_insn
4303 && GET_CODE (last_real_insn) == INSN
4304 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
4305 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
4307 if (last_real_insn && GET_CODE (last_real_insn) == CALL_INSN)
4308 fputs("\tnop\n", file);
4310 sparc_output_deferred_case_vectors ();
4313 /* Output a 'restore' instruction. */
4315 static void
4316 output_restore (rtx pat)
4318 rtx operands[3];
4320 if (! pat)
4322 fputs ("\t restore\n", asm_out_file);
4323 return;
4326 gcc_assert (GET_CODE (pat) == SET);
4328 operands[0] = SET_DEST (pat);
4329 pat = SET_SRC (pat);
4331 switch (GET_CODE (pat))
4333 case PLUS:
4334 operands[1] = XEXP (pat, 0);
4335 operands[2] = XEXP (pat, 1);
4336 output_asm_insn (" restore %r1, %2, %Y0", operands);
4337 break;
4338 case LO_SUM:
4339 operands[1] = XEXP (pat, 0);
4340 operands[2] = XEXP (pat, 1);
4341 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
4342 break;
4343 case ASHIFT:
4344 operands[1] = XEXP (pat, 0);
4345 gcc_assert (XEXP (pat, 1) == const1_rtx);
4346 output_asm_insn (" restore %r1, %r1, %Y0", operands);
4347 break;
4348 default:
4349 operands[1] = pat;
4350 output_asm_insn (" restore %%g0, %1, %Y0", operands);
4351 break;
4355 /* Output a return. */
4357 const char *
4358 output_return (rtx insn)
4360 if (sparc_leaf_function_p)
4362 /* This is a leaf function so we don't have to bother restoring the
4363 register window, which frees us from dealing with the convoluted
4364 semantics of restore/return. We simply output the jump to the
4365 return address and the insn in the delay slot (if any). */
4367 gcc_assert (! crtl->calls_eh_return);
4369 return "jmp\t%%o7+%)%#";
4371 else
4373 /* This is a regular function so we have to restore the register window.
4374 We may have a pending insn for the delay slot, which will be either
4375 combined with the 'restore' instruction or put in the delay slot of
4376 the 'return' instruction. */
4378 if (crtl->calls_eh_return)
4380 /* If the function uses __builtin_eh_return, the eh_return
4381 machinery occupies the delay slot. */
4382 gcc_assert (! final_sequence);
4384 if (! flag_delayed_branch)
4385 fputs ("\tadd\t%fp, %g1, %fp\n", asm_out_file);
4387 if (TARGET_V9)
4388 fputs ("\treturn\t%i7+8\n", asm_out_file);
4389 else
4390 fputs ("\trestore\n\tjmp\t%o7+8\n", asm_out_file);
4392 if (flag_delayed_branch)
4393 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
4394 else
4395 fputs ("\t nop\n", asm_out_file);
4397 else if (final_sequence)
4399 rtx delay, pat;
4401 delay = NEXT_INSN (insn);
4402 gcc_assert (delay);
4404 pat = PATTERN (delay);
4406 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
4408 epilogue_renumber (&pat, 0);
4409 return "return\t%%i7+%)%#";
4411 else
4413 output_asm_insn ("jmp\t%%i7+%)", NULL);
4414 output_restore (pat);
4415 PATTERN (delay) = gen_blockage ();
4416 INSN_CODE (delay) = -1;
4419 else
4421 /* The delay slot is empty. */
4422 if (TARGET_V9)
4423 return "return\t%%i7+%)\n\t nop";
4424 else if (flag_delayed_branch)
4425 return "jmp\t%%i7+%)\n\t restore";
4426 else
4427 return "restore\n\tjmp\t%%o7+%)\n\t nop";
4431 return "";
4434 /* Output a sibling call. */
4436 const char *
4437 output_sibcall (rtx insn, rtx call_operand)
4439 rtx operands[1];
4441 gcc_assert (flag_delayed_branch);
4443 operands[0] = call_operand;
4445 if (sparc_leaf_function_p)
4447 /* This is a leaf function so we don't have to bother restoring the
4448 register window. We simply output the jump to the function and
4449 the insn in the delay slot (if any). */
4451 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
4453 if (final_sequence)
4454 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
4455 operands);
4456 else
4457 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
4458 it into branch if possible. */
4459 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
4460 operands);
4462 else
4464 /* This is a regular function so we have to restore the register window.
4465 We may have a pending insn for the delay slot, which will be combined
4466 with the 'restore' instruction. */
4468 output_asm_insn ("call\t%a0, 0", operands);
4470 if (final_sequence)
4472 rtx delay = NEXT_INSN (insn);
4473 gcc_assert (delay);
4475 output_restore (PATTERN (delay));
4477 PATTERN (delay) = gen_blockage ();
4478 INSN_CODE (delay) = -1;
4480 else
4481 output_restore (NULL_RTX);
4484 return "";
4487 /* Functions for handling argument passing.
4489 For 32-bit, the first 6 args are normally in registers and the rest are
4490 pushed. Any arg that starts within the first 6 words is at least
4491 partially passed in a register unless its data type forbids.
4493 For 64-bit, the argument registers are laid out as an array of 16 elements
4494 and arguments are added sequentially. The first 6 int args and up to the
4495 first 16 fp args (depending on size) are passed in regs.
4497 Slot Stack Integral Float Float in structure Double Long Double
4498 ---- ----- -------- ----- ------------------ ------ -----------
4499 15 [SP+248] %f31 %f30,%f31 %d30
4500 14 [SP+240] %f29 %f28,%f29 %d28 %q28
4501 13 [SP+232] %f27 %f26,%f27 %d26
4502 12 [SP+224] %f25 %f24,%f25 %d24 %q24
4503 11 [SP+216] %f23 %f22,%f23 %d22
4504 10 [SP+208] %f21 %f20,%f21 %d20 %q20
4505 9 [SP+200] %f19 %f18,%f19 %d18
4506 8 [SP+192] %f17 %f16,%f17 %d16 %q16
4507 7 [SP+184] %f15 %f14,%f15 %d14
4508 6 [SP+176] %f13 %f12,%f13 %d12 %q12
4509 5 [SP+168] %o5 %f11 %f10,%f11 %d10
4510 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
4511 3 [SP+152] %o3 %f7 %f6,%f7 %d6
4512 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
4513 1 [SP+136] %o1 %f3 %f2,%f3 %d2
4514 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
4516 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
4518 Integral arguments are always passed as 64-bit quantities appropriately
4519 extended.
4521 Passing of floating point values is handled as follows.
4522 If a prototype is in scope:
4523 If the value is in a named argument (i.e. not a stdarg function or a
4524 value not part of the `...') then the value is passed in the appropriate
4525 fp reg.
4526 If the value is part of the `...' and is passed in one of the first 6
4527 slots then the value is passed in the appropriate int reg.
4528 If the value is part of the `...' and is not passed in one of the first 6
4529 slots then the value is passed in memory.
4530 If a prototype is not in scope:
4531 If the value is one of the first 6 arguments the value is passed in the
4532 appropriate integer reg and the appropriate fp reg.
4533 If the value is not one of the first 6 arguments the value is passed in
4534 the appropriate fp reg and in memory.
4537 Summary of the calling conventions implemented by GCC on SPARC:
4539 32-bit ABI:
4540 size argument return value
4542 small integer <4 int. reg. int. reg.
4543 word 4 int. reg. int. reg.
4544 double word 8 int. reg. int. reg.
4546 _Complex small integer <8 int. reg. int. reg.
4547 _Complex word 8 int. reg. int. reg.
4548 _Complex double word 16 memory int. reg.
4550 vector integer <=8 int. reg. FP reg.
4551 vector integer >8 memory memory
4553 float 4 int. reg. FP reg.
4554 double 8 int. reg. FP reg.
4555 long double 16 memory memory
4557 _Complex float 8 memory FP reg.
4558 _Complex double 16 memory FP reg.
4559 _Complex long double 32 memory FP reg.
4561 vector float any memory memory
4563 aggregate any memory memory
4567 64-bit ABI:
4568 size argument return value
4570 small integer <8 int. reg. int. reg.
4571 word 8 int. reg. int. reg.
4572 double word 16 int. reg. int. reg.
4574 _Complex small integer <16 int. reg. int. reg.
4575 _Complex word 16 int. reg. int. reg.
4576 _Complex double word 32 memory int. reg.
4578 vector integer <=16 FP reg. FP reg.
4579 vector integer 16<s<=32 memory FP reg.
4580 vector integer >32 memory memory
4582 float 4 FP reg. FP reg.
4583 double 8 FP reg. FP reg.
4584 long double 16 FP reg. FP reg.
4586 _Complex float 8 FP reg. FP reg.
4587 _Complex double 16 FP reg. FP reg.
4588 _Complex long double 32 memory FP reg.
4590 vector float <=16 FP reg. FP reg.
4591 vector float 16<s<=32 memory FP reg.
4592 vector float >32 memory memory
4594 aggregate <=16 reg. reg.
4595 aggregate 16<s<=32 memory reg.
4596 aggregate >32 memory memory
4600 Note #1: complex floating-point types follow the extended SPARC ABIs as
4601 implemented by the Sun compiler.
4603 Note #2: integral vector types follow the scalar floating-point types
4604 conventions to match what is implemented by the Sun VIS SDK.
4606 Note #3: floating-point vector types follow the aggregate types
4607 conventions. */
4610 /* Maximum number of int regs for args. */
4611 #define SPARC_INT_ARG_MAX 6
4612 /* Maximum number of fp regs for args. */
4613 #define SPARC_FP_ARG_MAX 16
4615 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
4617 /* Handle the INIT_CUMULATIVE_ARGS macro.
4618 Initialize a variable CUM of type CUMULATIVE_ARGS
4619 for a call to a function whose data type is FNTYPE.
4620 For a library call, FNTYPE is 0. */
4622 void
4623 init_cumulative_args (struct sparc_args *cum, tree fntype,
4624 rtx libname ATTRIBUTE_UNUSED,
4625 tree fndecl ATTRIBUTE_UNUSED)
4627 cum->words = 0;
4628 cum->prototype_p = fntype && TYPE_ARG_TYPES (fntype);
4629 cum->libcall_p = fntype == 0;
4632 /* Handle the TARGET_PROMOTE_PROTOTYPES target hook.
4633 When a prototype says `char' or `short', really pass an `int'. */
4635 static bool
4636 sparc_promote_prototypes (const_tree fntype ATTRIBUTE_UNUSED)
4638 return TARGET_ARCH32 ? true : false;
4641 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
4643 static bool
4644 sparc_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
4646 return TARGET_ARCH64 ? true : false;
4649 /* Scan the record type TYPE and return the following predicates:
4650 - INTREGS_P: the record contains at least one field or sub-field
4651 that is eligible for promotion in integer registers.
4652 - FP_REGS_P: the record contains at least one field or sub-field
4653 that is eligible for promotion in floating-point registers.
4654 - PACKED_P: the record contains at least one field that is packed.
4656 Sub-fields are not taken into account for the PACKED_P predicate. */
4658 static void
4659 scan_record_type (tree type, int *intregs_p, int *fpregs_p, int *packed_p)
4661 tree field;
4663 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4665 if (TREE_CODE (field) == FIELD_DECL)
4667 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
4668 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
4669 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
4670 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
4671 && TARGET_FPU)
4672 *fpregs_p = 1;
4673 else
4674 *intregs_p = 1;
4676 if (packed_p && DECL_PACKED (field))
4677 *packed_p = 1;
4682 /* Compute the slot number to pass an argument in.
4683 Return the slot number or -1 if passing on the stack.
4685 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4686 the preceding args and about the function being called.
4687 MODE is the argument's machine mode.
4688 TYPE is the data type of the argument (as a tree).
4689 This is null for libcalls where that information may
4690 not be available.
4691 NAMED is nonzero if this argument is a named parameter
4692 (otherwise it is an extra parameter matching an ellipsis).
4693 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
4694 *PREGNO records the register number to use if scalar type.
4695 *PPADDING records the amount of padding needed in words. */
4697 static int
4698 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
4699 tree type, int named, int incoming_p,
4700 int *pregno, int *ppadding)
4702 int regbase = (incoming_p
4703 ? SPARC_INCOMING_INT_ARG_FIRST
4704 : SPARC_OUTGOING_INT_ARG_FIRST);
4705 int slotno = cum->words;
4706 enum mode_class mclass;
4707 int regno;
4709 *ppadding = 0;
4711 if (type && TREE_ADDRESSABLE (type))
4712 return -1;
4714 if (TARGET_ARCH32
4715 && mode == BLKmode
4716 && type
4717 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
4718 return -1;
4720 /* For SPARC64, objects requiring 16-byte alignment get it. */
4721 if (TARGET_ARCH64
4722 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
4723 && (slotno & 1) != 0)
4724 slotno++, *ppadding = 1;
4726 mclass = GET_MODE_CLASS (mode);
4727 if (type && TREE_CODE (type) == VECTOR_TYPE)
4729 /* Vector types deserve special treatment because they are
4730 polymorphic wrt their mode, depending upon whether VIS
4731 instructions are enabled. */
4732 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4734 /* The SPARC port defines no floating-point vector modes. */
4735 gcc_assert (mode == BLKmode);
4737 else
4739 /* Integral vector types should either have a vector
4740 mode or an integral mode, because we are guaranteed
4741 by pass_by_reference that their size is not greater
4742 than 16 bytes and TImode is 16-byte wide. */
4743 gcc_assert (mode != BLKmode);
4745 /* Vector integers are handled like floats according to
4746 the Sun VIS SDK. */
4747 mclass = MODE_FLOAT;
4751 switch (mclass)
4753 case MODE_FLOAT:
4754 case MODE_COMPLEX_FLOAT:
4755 case MODE_VECTOR_INT:
4756 if (TARGET_ARCH64 && TARGET_FPU && named)
4758 if (slotno >= SPARC_FP_ARG_MAX)
4759 return -1;
4760 regno = SPARC_FP_ARG_FIRST + slotno * 2;
4761 /* Arguments filling only one single FP register are
4762 right-justified in the outer double FP register. */
4763 if (GET_MODE_SIZE (mode) <= 4)
4764 regno++;
4765 break;
4767 /* fallthrough */
4769 case MODE_INT:
4770 case MODE_COMPLEX_INT:
4771 if (slotno >= SPARC_INT_ARG_MAX)
4772 return -1;
4773 regno = regbase + slotno;
4774 break;
4776 case MODE_RANDOM:
4777 if (mode == VOIDmode)
4778 /* MODE is VOIDmode when generating the actual call. */
4779 return -1;
4781 gcc_assert (mode == BLKmode);
4783 if (TARGET_ARCH32
4784 || !type
4785 || (TREE_CODE (type) != VECTOR_TYPE
4786 && TREE_CODE (type) != RECORD_TYPE))
4788 if (slotno >= SPARC_INT_ARG_MAX)
4789 return -1;
4790 regno = regbase + slotno;
4792 else /* TARGET_ARCH64 && type */
4794 int intregs_p = 0, fpregs_p = 0, packed_p = 0;
4796 /* First see what kinds of registers we would need. */
4797 if (TREE_CODE (type) == VECTOR_TYPE)
4798 fpregs_p = 1;
4799 else
4800 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
4802 /* The ABI obviously doesn't specify how packed structures
4803 are passed. These are defined to be passed in int regs
4804 if possible, otherwise memory. */
4805 if (packed_p || !named)
4806 fpregs_p = 0, intregs_p = 1;
4808 /* If all arg slots are filled, then must pass on stack. */
4809 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
4810 return -1;
4812 /* If there are only int args and all int arg slots are filled,
4813 then must pass on stack. */
4814 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
4815 return -1;
4817 /* Note that even if all int arg slots are filled, fp members may
4818 still be passed in regs if such regs are available.
4819 *PREGNO isn't set because there may be more than one, it's up
4820 to the caller to compute them. */
4821 return slotno;
4823 break;
4825 default :
4826 gcc_unreachable ();
4829 *pregno = regno;
4830 return slotno;
4833 /* Handle recursive register counting for structure field layout. */
4835 struct function_arg_record_value_parms
4837 rtx ret; /* return expression being built. */
4838 int slotno; /* slot number of the argument. */
4839 int named; /* whether the argument is named. */
4840 int regbase; /* regno of the base register. */
4841 int stack; /* 1 if part of the argument is on the stack. */
4842 int intoffset; /* offset of the first pending integer field. */
4843 unsigned int nregs; /* number of words passed in registers. */
4846 static void function_arg_record_value_3
4847 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
4848 static void function_arg_record_value_2
4849 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
4850 static void function_arg_record_value_1
4851 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
4852 static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
4853 static rtx function_arg_union_value (int, enum machine_mode, int, int);
4855 /* A subroutine of function_arg_record_value. Traverse the structure
4856 recursively and determine how many registers will be required. */
4858 static void
4859 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
4860 struct function_arg_record_value_parms *parms,
4861 bool packed_p)
4863 tree field;
4865 /* We need to compute how many registers are needed so we can
4866 allocate the PARALLEL but before we can do that we need to know
4867 whether there are any packed fields. The ABI obviously doesn't
4868 specify how structures are passed in this case, so they are
4869 defined to be passed in int regs if possible, otherwise memory,
4870 regardless of whether there are fp values present. */
4872 if (! packed_p)
4873 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4875 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
4877 packed_p = true;
4878 break;
4882 /* Compute how many registers we need. */
4883 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4885 if (TREE_CODE (field) == FIELD_DECL)
4887 HOST_WIDE_INT bitpos = startbitpos;
4889 if (DECL_SIZE (field) != 0)
4891 if (integer_zerop (DECL_SIZE (field)))
4892 continue;
4894 if (host_integerp (bit_position (field), 1))
4895 bitpos += int_bit_position (field);
4898 /* ??? FIXME: else assume zero offset. */
4900 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
4901 function_arg_record_value_1 (TREE_TYPE (field),
4902 bitpos,
4903 parms,
4904 packed_p);
4905 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
4906 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
4907 && TARGET_FPU
4908 && parms->named
4909 && ! packed_p)
4911 if (parms->intoffset != -1)
4913 unsigned int startbit, endbit;
4914 int intslots, this_slotno;
4916 startbit = parms->intoffset & -BITS_PER_WORD;
4917 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
4919 intslots = (endbit - startbit) / BITS_PER_WORD;
4920 this_slotno = parms->slotno + parms->intoffset
4921 / BITS_PER_WORD;
4923 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
4925 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
4926 /* We need to pass this field on the stack. */
4927 parms->stack = 1;
4930 parms->nregs += intslots;
4931 parms->intoffset = -1;
4934 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
4935 If it wasn't true we wouldn't be here. */
4936 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
4937 && DECL_MODE (field) == BLKmode)
4938 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
4939 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
4940 parms->nregs += 2;
4941 else
4942 parms->nregs += 1;
4944 else
4946 if (parms->intoffset == -1)
4947 parms->intoffset = bitpos;
4953 /* A subroutine of function_arg_record_value. Assign the bits of the
4954 structure between parms->intoffset and bitpos to integer registers. */
4956 static void
4957 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
4958 struct function_arg_record_value_parms *parms)
4960 enum machine_mode mode;
4961 unsigned int regno;
4962 unsigned int startbit, endbit;
4963 int this_slotno, intslots, intoffset;
4964 rtx reg;
4966 if (parms->intoffset == -1)
4967 return;
4969 intoffset = parms->intoffset;
4970 parms->intoffset = -1;
4972 startbit = intoffset & -BITS_PER_WORD;
4973 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
4974 intslots = (endbit - startbit) / BITS_PER_WORD;
4975 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
4977 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
4978 if (intslots <= 0)
4979 return;
4981 /* If this is the trailing part of a word, only load that much into
4982 the register. Otherwise load the whole register. Note that in
4983 the latter case we may pick up unwanted bits. It's not a problem
4984 at the moment but may wish to revisit. */
4986 if (intoffset % BITS_PER_WORD != 0)
4987 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
4988 MODE_INT);
4989 else
4990 mode = word_mode;
4992 intoffset /= BITS_PER_UNIT;
4995 regno = parms->regbase + this_slotno;
4996 reg = gen_rtx_REG (mode, regno);
4997 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
4998 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
5000 this_slotno += 1;
5001 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
5002 mode = word_mode;
5003 parms->nregs += 1;
5004 intslots -= 1;
5006 while (intslots > 0);
5009 /* A subroutine of function_arg_record_value. Traverse the structure
5010 recursively and assign bits to floating point registers. Track which
5011 bits in between need integer registers; invoke function_arg_record_value_3
5012 to make that happen. */
5014 static void
5015 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
5016 struct function_arg_record_value_parms *parms,
5017 bool packed_p)
5019 tree field;
5021 if (! packed_p)
5022 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5024 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
5026 packed_p = true;
5027 break;
5031 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5033 if (TREE_CODE (field) == FIELD_DECL)
5035 HOST_WIDE_INT bitpos = startbitpos;
5037 if (DECL_SIZE (field) != 0)
5039 if (integer_zerop (DECL_SIZE (field)))
5040 continue;
5042 if (host_integerp (bit_position (field), 1))
5043 bitpos += int_bit_position (field);
5046 /* ??? FIXME: else assume zero offset. */
5048 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
5049 function_arg_record_value_2 (TREE_TYPE (field),
5050 bitpos,
5051 parms,
5052 packed_p);
5053 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
5054 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
5055 && TARGET_FPU
5056 && parms->named
5057 && ! packed_p)
5059 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
5060 int regno, nregs, pos;
5061 enum machine_mode mode = DECL_MODE (field);
5062 rtx reg;
5064 function_arg_record_value_3 (bitpos, parms);
5066 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
5067 && mode == BLKmode)
5069 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
5070 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
5072 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
5074 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
5075 nregs = 2;
5077 else
5078 nregs = 1;
5080 regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
5081 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
5082 regno++;
5083 reg = gen_rtx_REG (mode, regno);
5084 pos = bitpos / BITS_PER_UNIT;
5085 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
5086 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
5087 parms->nregs += 1;
5088 while (--nregs > 0)
5090 regno += GET_MODE_SIZE (mode) / 4;
5091 reg = gen_rtx_REG (mode, regno);
5092 pos += GET_MODE_SIZE (mode);
5093 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
5094 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
5095 parms->nregs += 1;
5098 else
5100 if (parms->intoffset == -1)
5101 parms->intoffset = bitpos;
5107 /* Used by function_arg and function_value to implement the complex
5108 conventions of the 64-bit ABI for passing and returning structures.
5109 Return an expression valid as a return value for the two macros
5110 FUNCTION_ARG and FUNCTION_VALUE.
5112 TYPE is the data type of the argument (as a tree).
5113 This is null for libcalls where that information may
5114 not be available.
5115 MODE is the argument's machine mode.
5116 SLOTNO is the index number of the argument's slot in the parameter array.
5117 NAMED is nonzero if this argument is a named parameter
5118 (otherwise it is an extra parameter matching an ellipsis).
5119 REGBASE is the regno of the base register for the parameter array. */
5121 static rtx
5122 function_arg_record_value (const_tree type, enum machine_mode mode,
5123 int slotno, int named, int regbase)
5125 HOST_WIDE_INT typesize = int_size_in_bytes (type);
5126 struct function_arg_record_value_parms parms;
5127 unsigned int nregs;
5129 parms.ret = NULL_RTX;
5130 parms.slotno = slotno;
5131 parms.named = named;
5132 parms.regbase = regbase;
5133 parms.stack = 0;
5135 /* Compute how many registers we need. */
5136 parms.nregs = 0;
5137 parms.intoffset = 0;
5138 function_arg_record_value_1 (type, 0, &parms, false);
5140 /* Take into account pending integer fields. */
5141 if (parms.intoffset != -1)
5143 unsigned int startbit, endbit;
5144 int intslots, this_slotno;
5146 startbit = parms.intoffset & -BITS_PER_WORD;
5147 endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
5148 intslots = (endbit - startbit) / BITS_PER_WORD;
5149 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
5151 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
5153 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
5154 /* We need to pass this field on the stack. */
5155 parms.stack = 1;
5158 parms.nregs += intslots;
5160 nregs = parms.nregs;
5162 /* Allocate the vector and handle some annoying special cases. */
5163 if (nregs == 0)
5165 /* ??? Empty structure has no value? Duh? */
5166 if (typesize <= 0)
5168 /* Though there's nothing really to store, return a word register
5169 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
5170 leads to breakage due to the fact that there are zero bytes to
5171 load. */
5172 return gen_rtx_REG (mode, regbase);
5174 else
5176 /* ??? C++ has structures with no fields, and yet a size. Give up
5177 for now and pass everything back in integer registers. */
5178 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5180 if (nregs + slotno > SPARC_INT_ARG_MAX)
5181 nregs = SPARC_INT_ARG_MAX - slotno;
5183 gcc_assert (nregs != 0);
5185 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
5187 /* If at least one field must be passed on the stack, generate
5188 (parallel [(expr_list (nil) ...) ...]) so that all fields will
5189 also be passed on the stack. We can't do much better because the
5190 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
5191 of structures for which the fields passed exclusively in registers
5192 are not at the beginning of the structure. */
5193 if (parms.stack)
5194 XVECEXP (parms.ret, 0, 0)
5195 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
5197 /* Fill in the entries. */
5198 parms.nregs = 0;
5199 parms.intoffset = 0;
5200 function_arg_record_value_2 (type, 0, &parms, false);
5201 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
5203 gcc_assert (parms.nregs == nregs);
5205 return parms.ret;
5208 /* Used by function_arg and function_value to implement the conventions
5209 of the 64-bit ABI for passing and returning unions.
5210 Return an expression valid as a return value for the two macros
5211 FUNCTION_ARG and FUNCTION_VALUE.
5213 SIZE is the size in bytes of the union.
5214 MODE is the argument's machine mode.
5215 REGNO is the hard register the union will be passed in. */
5217 static rtx
5218 function_arg_union_value (int size, enum machine_mode mode, int slotno,
5219 int regno)
5221 int nwords = ROUND_ADVANCE (size), i;
5222 rtx regs;
5224 /* See comment in previous function for empty structures. */
5225 if (nwords == 0)
5226 return gen_rtx_REG (mode, regno);
5228 if (slotno == SPARC_INT_ARG_MAX - 1)
5229 nwords = 1;
5231 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
5233 for (i = 0; i < nwords; i++)
5235 /* Unions are passed left-justified. */
5236 XVECEXP (regs, 0, i)
5237 = gen_rtx_EXPR_LIST (VOIDmode,
5238 gen_rtx_REG (word_mode, regno),
5239 GEN_INT (UNITS_PER_WORD * i));
5240 regno++;
5243 return regs;
5246 /* Used by function_arg and function_value to implement the conventions
5247 for passing and returning large (BLKmode) vectors.
5248 Return an expression valid as a return value for the two macros
5249 FUNCTION_ARG and FUNCTION_VALUE.
5251 SIZE is the size in bytes of the vector (at least 8 bytes).
5252 REGNO is the FP hard register the vector will be passed in. */
5254 static rtx
5255 function_arg_vector_value (int size, int regno)
5257 int i, nregs = size / 8;
5258 rtx regs;
5260 regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
5262 for (i = 0; i < nregs; i++)
5264 XVECEXP (regs, 0, i)
5265 = gen_rtx_EXPR_LIST (VOIDmode,
5266 gen_rtx_REG (DImode, regno + 2*i),
5267 GEN_INT (i*8));
5270 return regs;
5273 /* Handle the FUNCTION_ARG macro.
5274 Determine where to put an argument to a function.
5275 Value is zero to push the argument on the stack,
5276 or a hard register in which to store the argument.
5278 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5279 the preceding args and about the function being called.
5280 MODE is the argument's machine mode.
5281 TYPE is the data type of the argument (as a tree).
5282 This is null for libcalls where that information may
5283 not be available.
5284 NAMED is nonzero if this argument is a named parameter
5285 (otherwise it is an extra parameter matching an ellipsis).
5286 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG. */
5289 function_arg (const struct sparc_args *cum, enum machine_mode mode,
5290 tree type, int named, int incoming_p)
5292 int regbase = (incoming_p
5293 ? SPARC_INCOMING_INT_ARG_FIRST
5294 : SPARC_OUTGOING_INT_ARG_FIRST);
5295 int slotno, regno, padding;
5296 enum mode_class mclass = GET_MODE_CLASS (mode);
5298 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
5299 &regno, &padding);
5300 if (slotno == -1)
5301 return 0;
5303 /* Vector types deserve special treatment because they are polymorphic wrt
5304 their mode, depending upon whether VIS instructions are enabled. */
5305 if (type && TREE_CODE (type) == VECTOR_TYPE)
5307 HOST_WIDE_INT size = int_size_in_bytes (type);
5308 gcc_assert ((TARGET_ARCH32 && size <= 8)
5309 || (TARGET_ARCH64 && size <= 16));
5311 if (mode == BLKmode)
5312 return function_arg_vector_value (size,
5313 SPARC_FP_ARG_FIRST + 2*slotno);
5314 else
5315 mclass = MODE_FLOAT;
5318 if (TARGET_ARCH32)
5319 return gen_rtx_REG (mode, regno);
5321 /* Structures up to 16 bytes in size are passed in arg slots on the stack
5322 and are promoted to registers if possible. */
5323 if (type && TREE_CODE (type) == RECORD_TYPE)
5325 HOST_WIDE_INT size = int_size_in_bytes (type);
5326 gcc_assert (size <= 16);
5328 return function_arg_record_value (type, mode, slotno, named, regbase);
5331 /* Unions up to 16 bytes in size are passed in integer registers. */
5332 else if (type && TREE_CODE (type) == UNION_TYPE)
5334 HOST_WIDE_INT size = int_size_in_bytes (type);
5335 gcc_assert (size <= 16);
5337 return function_arg_union_value (size, mode, slotno, regno);
5340 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
5341 but also have the slot allocated for them.
5342 If no prototype is in scope fp values in register slots get passed
5343 in two places, either fp regs and int regs or fp regs and memory. */
5344 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
5345 && SPARC_FP_REG_P (regno))
5347 rtx reg = gen_rtx_REG (mode, regno);
5348 if (cum->prototype_p || cum->libcall_p)
5350 /* "* 2" because fp reg numbers are recorded in 4 byte
5351 quantities. */
5352 #if 0
5353 /* ??? This will cause the value to be passed in the fp reg and
5354 in the stack. When a prototype exists we want to pass the
5355 value in the reg but reserve space on the stack. That's an
5356 optimization, and is deferred [for a bit]. */
5357 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
5358 return gen_rtx_PARALLEL (mode,
5359 gen_rtvec (2,
5360 gen_rtx_EXPR_LIST (VOIDmode,
5361 NULL_RTX, const0_rtx),
5362 gen_rtx_EXPR_LIST (VOIDmode,
5363 reg, const0_rtx)));
5364 else
5365 #else
5366 /* ??? It seems that passing back a register even when past
5367 the area declared by REG_PARM_STACK_SPACE will allocate
5368 space appropriately, and will not copy the data onto the
5369 stack, exactly as we desire.
5371 This is due to locate_and_pad_parm being called in
5372 expand_call whenever reg_parm_stack_space > 0, which
5373 while beneficial to our example here, would seem to be
5374 in error from what had been intended. Ho hum... -- r~ */
5375 #endif
5376 return reg;
5378 else
5380 rtx v0, v1;
5382 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
5384 int intreg;
5386 /* On incoming, we don't need to know that the value
5387 is passed in %f0 and %i0, and it confuses other parts
5388 causing needless spillage even on the simplest cases. */
5389 if (incoming_p)
5390 return reg;
5392 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
5393 + (regno - SPARC_FP_ARG_FIRST) / 2);
5395 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
5396 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
5397 const0_rtx);
5398 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
5400 else
5402 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
5403 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
5404 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
5409 /* All other aggregate types are passed in an integer register in a mode
5410 corresponding to the size of the type. */
5411 else if (type && AGGREGATE_TYPE_P (type))
5413 HOST_WIDE_INT size = int_size_in_bytes (type);
5414 gcc_assert (size <= 16);
5416 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5419 return gen_rtx_REG (mode, regno);
5422 /* For an arg passed partly in registers and partly in memory,
5423 this is the number of bytes of registers used.
5424 For args passed entirely in registers or entirely in memory, zero.
5426 Any arg that starts in the first 6 regs but won't entirely fit in them
5427 needs partial registers on v8. On v9, structures with integer
5428 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
5429 values that begin in the last fp reg [where "last fp reg" varies with the
5430 mode] will be split between that reg and memory. */
5432 static int
5433 sparc_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5434 tree type, bool named)
5436 int slotno, regno, padding;
5438 /* We pass 0 for incoming_p here, it doesn't matter. */
5439 slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
5441 if (slotno == -1)
5442 return 0;
5444 if (TARGET_ARCH32)
5446 if ((slotno + (mode == BLKmode
5447 ? ROUND_ADVANCE (int_size_in_bytes (type))
5448 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
5449 > SPARC_INT_ARG_MAX)
5450 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
5452 else
5454 /* We are guaranteed by pass_by_reference that the size of the
5455 argument is not greater than 16 bytes, so we only need to return
5456 one word if the argument is partially passed in registers. */
5458 if (type && AGGREGATE_TYPE_P (type))
5460 int size = int_size_in_bytes (type);
5462 if (size > UNITS_PER_WORD
5463 && slotno == SPARC_INT_ARG_MAX - 1)
5464 return UNITS_PER_WORD;
5466 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
5467 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5468 && ! (TARGET_FPU && named)))
5470 /* The complex types are passed as packed types. */
5471 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
5472 && slotno == SPARC_INT_ARG_MAX - 1)
5473 return UNITS_PER_WORD;
5475 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5477 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
5478 > SPARC_FP_ARG_MAX)
5479 return UNITS_PER_WORD;
5483 return 0;
5486 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
5487 Specify whether to pass the argument by reference. */
5489 static bool
5490 sparc_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5491 enum machine_mode mode, const_tree type,
5492 bool named ATTRIBUTE_UNUSED)
5494 if (TARGET_ARCH32)
5495 /* Original SPARC 32-bit ABI says that structures and unions,
5496 and quad-precision floats are passed by reference. For Pascal,
5497 also pass arrays by reference. All other base types are passed
5498 in registers.
5500 Extended ABI (as implemented by the Sun compiler) says that all
5501 complex floats are passed by reference. Pass complex integers
5502 in registers up to 8 bytes. More generally, enforce the 2-word
5503 cap for passing arguments in registers.
5505 Vector ABI (as implemented by the Sun VIS SDK) says that vector
5506 integers are passed like floats of the same size, that is in
5507 registers up to 8 bytes. Pass all vector floats by reference
5508 like structure and unions. */
5509 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
5510 || mode == SCmode
5511 /* Catch CDImode, TFmode, DCmode and TCmode. */
5512 || GET_MODE_SIZE (mode) > 8
5513 || (type
5514 && TREE_CODE (type) == VECTOR_TYPE
5515 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
5516 else
5517 /* Original SPARC 64-bit ABI says that structures and unions
5518 smaller than 16 bytes are passed in registers, as well as
5519 all other base types.
5521 Extended ABI (as implemented by the Sun compiler) says that
5522 complex floats are passed in registers up to 16 bytes. Pass
5523 all complex integers in registers up to 16 bytes. More generally,
5524 enforce the 2-word cap for passing arguments in registers.
5526 Vector ABI (as implemented by the Sun VIS SDK) says that vector
5527 integers are passed like floats of the same size, that is in
5528 registers (up to 16 bytes). Pass all vector floats like structure
5529 and unions. */
5530 return ((type
5531 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
5532 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
5533 /* Catch CTImode and TCmode. */
5534 || GET_MODE_SIZE (mode) > 16);
5537 /* Handle the FUNCTION_ARG_ADVANCE macro.
5538 Update the data in CUM to advance over an argument
5539 of mode MODE and data type TYPE.
5540 TYPE is null for libcalls where that information may not be available. */
5542 void
5543 function_arg_advance (struct sparc_args *cum, enum machine_mode mode,
5544 tree type, int named)
5546 int slotno, regno, padding;
5548 /* We pass 0 for incoming_p here, it doesn't matter. */
5549 slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
5551 /* If register required leading padding, add it. */
5552 if (slotno != -1)
5553 cum->words += padding;
5555 if (TARGET_ARCH32)
5557 cum->words += (mode != BLKmode
5558 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
5559 : ROUND_ADVANCE (int_size_in_bytes (type)));
5561 else
5563 if (type && AGGREGATE_TYPE_P (type))
5565 int size = int_size_in_bytes (type);
5567 if (size <= 8)
5568 ++cum->words;
5569 else if (size <= 16)
5570 cum->words += 2;
5571 else /* passed by reference */
5572 ++cum->words;
5574 else
5576 cum->words += (mode != BLKmode
5577 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
5578 : ROUND_ADVANCE (int_size_in_bytes (type)));
5583 /* Handle the FUNCTION_ARG_PADDING macro.
5584 For the 64 bit ABI structs are always stored left shifted in their
5585 argument slot. */
5587 enum direction
5588 function_arg_padding (enum machine_mode mode, const_tree type)
5590 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
5591 return upward;
5593 /* Fall back to the default. */
5594 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
5597 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
5598 Specify whether to return the return value in memory. */
5600 static bool
5601 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5603 if (TARGET_ARCH32)
5604 /* Original SPARC 32-bit ABI says that structures and unions,
5605 and quad-precision floats are returned in memory. All other
5606 base types are returned in registers.
5608 Extended ABI (as implemented by the Sun compiler) says that
5609 all complex floats are returned in registers (8 FP registers
5610 at most for '_Complex long double'). Return all complex integers
5611 in registers (4 at most for '_Complex long long').
5613 Vector ABI (as implemented by the Sun VIS SDK) says that vector
5614 integers are returned like floats of the same size, that is in
5615 registers up to 8 bytes and in memory otherwise. Return all
5616 vector floats in memory like structure and unions; note that
5617 they always have BLKmode like the latter. */
5618 return (TYPE_MODE (type) == BLKmode
5619 || TYPE_MODE (type) == TFmode
5620 || (TREE_CODE (type) == VECTOR_TYPE
5621 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
5622 else
5623 /* Original SPARC 64-bit ABI says that structures and unions
5624 smaller than 32 bytes are returned in registers, as well as
5625 all other base types.
5627 Extended ABI (as implemented by the Sun compiler) says that all
5628 complex floats are returned in registers (8 FP registers at most
5629 for '_Complex long double'). Return all complex integers in
5630 registers (4 at most for '_Complex TItype').
5632 Vector ABI (as implemented by the Sun VIS SDK) says that vector
5633 integers are returned like floats of the same size, that is in
5634 registers. Return all vector floats like structure and unions;
5635 note that they always have BLKmode like the latter. */
5636 return ((TYPE_MODE (type) == BLKmode
5637 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32));
5640 /* Handle the TARGET_STRUCT_VALUE target hook.
5641 Return where to find the structure return value address. */
5643 static rtx
5644 sparc_struct_value_rtx (tree fndecl, int incoming)
5646 if (TARGET_ARCH64)
5647 return 0;
5648 else
5650 rtx mem;
5652 if (incoming)
5653 mem = gen_rtx_MEM (Pmode, plus_constant (frame_pointer_rtx,
5654 STRUCT_VALUE_OFFSET));
5655 else
5656 mem = gen_rtx_MEM (Pmode, plus_constant (stack_pointer_rtx,
5657 STRUCT_VALUE_OFFSET));
5659 /* Only follow the SPARC ABI for fixed-size structure returns.
5660 Variable size structure returns are handled per the normal
5661 procedures in GCC. This is enabled by -mstd-struct-return */
5662 if (incoming == 2
5663 && sparc_std_struct_return
5664 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
5665 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
5667 /* We must check and adjust the return address, as it is
5668 optional as to whether the return object is really
5669 provided. */
5670 rtx ret_rtx = gen_rtx_REG (Pmode, 31);
5671 rtx scratch = gen_reg_rtx (SImode);
5672 rtx endlab = gen_label_rtx ();
5674 /* Calculate the return object size */
5675 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
5676 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
5677 /* Construct a temporary return value */
5678 rtx temp_val = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
5680 /* Implement SPARC 32-bit psABI callee returns struck checking
5681 requirements:
5683 Fetch the instruction where we will return to and see if
5684 it's an unimp instruction (the most significant 10 bits
5685 will be zero). */
5686 emit_move_insn (scratch, gen_rtx_MEM (SImode,
5687 plus_constant (ret_rtx, 8)));
5688 /* Assume the size is valid and pre-adjust */
5689 emit_insn (gen_add3_insn (ret_rtx, ret_rtx, GEN_INT (4)));
5690 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode, 0, endlab);
5691 emit_insn (gen_sub3_insn (ret_rtx, ret_rtx, GEN_INT (4)));
5692 /* Assign stack temp:
5693 Write the address of the memory pointed to by temp_val into
5694 the memory pointed to by mem */
5695 emit_move_insn (mem, XEXP (temp_val, 0));
5696 emit_label (endlab);
5699 set_mem_alias_set (mem, struct_value_alias_set);
5700 return mem;
5704 /* Handle FUNCTION_VALUE, FUNCTION_OUTGOING_VALUE, and LIBCALL_VALUE macros.
5705 For v9, function return values are subject to the same rules as arguments,
5706 except that up to 32 bytes may be returned in registers. */
5709 function_value (const_tree type, enum machine_mode mode, int incoming_p)
5711 /* Beware that the two values are swapped here wrt function_arg. */
5712 int regbase = (incoming_p
5713 ? SPARC_OUTGOING_INT_ARG_FIRST
5714 : SPARC_INCOMING_INT_ARG_FIRST);
5715 enum mode_class mclass = GET_MODE_CLASS (mode);
5716 int regno;
5718 /* Vector types deserve special treatment because they are polymorphic wrt
5719 their mode, depending upon whether VIS instructions are enabled. */
5720 if (type && TREE_CODE (type) == VECTOR_TYPE)
5722 HOST_WIDE_INT size = int_size_in_bytes (type);
5723 gcc_assert ((TARGET_ARCH32 && size <= 8)
5724 || (TARGET_ARCH64 && size <= 32));
5726 if (mode == BLKmode)
5727 return function_arg_vector_value (size,
5728 SPARC_FP_ARG_FIRST);
5729 else
5730 mclass = MODE_FLOAT;
5733 if (TARGET_ARCH64 && type)
5735 /* Structures up to 32 bytes in size are returned in registers. */
5736 if (TREE_CODE (type) == RECORD_TYPE)
5738 HOST_WIDE_INT size = int_size_in_bytes (type);
5739 gcc_assert (size <= 32);
5741 return function_arg_record_value (type, mode, 0, 1, regbase);
5744 /* Unions up to 32 bytes in size are returned in integer registers. */
5745 else if (TREE_CODE (type) == UNION_TYPE)
5747 HOST_WIDE_INT size = int_size_in_bytes (type);
5748 gcc_assert (size <= 32);
5750 return function_arg_union_value (size, mode, 0, regbase);
5753 /* Objects that require it are returned in FP registers. */
5754 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
5757 /* All other aggregate types are returned in an integer register in a
5758 mode corresponding to the size of the type. */
5759 else if (AGGREGATE_TYPE_P (type))
5761 /* All other aggregate types are passed in an integer register
5762 in a mode corresponding to the size of the type. */
5763 HOST_WIDE_INT size = int_size_in_bytes (type);
5764 gcc_assert (size <= 32);
5766 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5768 /* ??? We probably should have made the same ABI change in
5769 3.4.0 as the one we made for unions. The latter was
5770 required by the SCD though, while the former is not
5771 specified, so we favored compatibility and efficiency.
5773 Now we're stuck for aggregates larger than 16 bytes,
5774 because OImode vanished in the meantime. Let's not
5775 try to be unduly clever, and simply follow the ABI
5776 for unions in that case. */
5777 if (mode == BLKmode)
5778 return function_arg_union_value (size, mode, 0, regbase);
5779 else
5780 mclass = MODE_INT;
5783 /* This must match PROMOTE_FUNCTION_MODE. */
5784 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5785 mode = word_mode;
5788 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
5789 regno = SPARC_FP_ARG_FIRST;
5790 else
5791 regno = regbase;
5793 return gen_rtx_REG (mode, regno);
5796 /* Do what is necessary for `va_start'. We look at the current function
5797 to determine if stdarg or varargs is used and return the address of
5798 the first unnamed parameter. */
5800 static rtx
5801 sparc_builtin_saveregs (void)
5803 int first_reg = crtl->args.info.words;
5804 rtx address;
5805 int regno;
5807 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
5808 emit_move_insn (gen_rtx_MEM (word_mode,
5809 gen_rtx_PLUS (Pmode,
5810 frame_pointer_rtx,
5811 GEN_INT (FIRST_PARM_OFFSET (0)
5812 + (UNITS_PER_WORD
5813 * regno)))),
5814 gen_rtx_REG (word_mode,
5815 SPARC_INCOMING_INT_ARG_FIRST + regno));
5817 address = gen_rtx_PLUS (Pmode,
5818 frame_pointer_rtx,
5819 GEN_INT (FIRST_PARM_OFFSET (0)
5820 + UNITS_PER_WORD * first_reg));
5822 return address;
5825 /* Implement `va_start' for stdarg. */
5827 static void
5828 sparc_va_start (tree valist, rtx nextarg)
5830 nextarg = expand_builtin_saveregs ();
5831 std_expand_builtin_va_start (valist, nextarg);
5834 /* Implement `va_arg' for stdarg. */
5836 static tree
5837 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
5838 gimple_seq *post_p)
5840 HOST_WIDE_INT size, rsize, align;
5841 tree addr, incr;
5842 bool indirect;
5843 tree ptrtype = build_pointer_type (type);
5845 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
5847 indirect = true;
5848 size = rsize = UNITS_PER_WORD;
5849 align = 0;
5851 else
5853 indirect = false;
5854 size = int_size_in_bytes (type);
5855 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5856 align = 0;
5858 if (TARGET_ARCH64)
5860 /* For SPARC64, objects requiring 16-byte alignment get it. */
5861 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
5862 align = 2 * UNITS_PER_WORD;
5864 /* SPARC-V9 ABI states that structures up to 16 bytes in size
5865 are left-justified in their slots. */
5866 if (AGGREGATE_TYPE_P (type))
5868 if (size == 0)
5869 size = rsize = UNITS_PER_WORD;
5870 else
5871 size = rsize;
5876 incr = valist;
5877 if (align)
5879 incr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr,
5880 size_int (align - 1));
5881 incr = fold_convert (sizetype, incr);
5882 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
5883 size_int (-align));
5884 incr = fold_convert (ptr_type_node, incr);
5887 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
5888 addr = incr;
5890 if (BYTES_BIG_ENDIAN && size < rsize)
5891 addr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr,
5892 size_int (rsize - size));
5894 if (indirect)
5896 addr = fold_convert (build_pointer_type (ptrtype), addr);
5897 addr = build_va_arg_indirect_ref (addr);
5900 /* If the address isn't aligned properly for the type, we need a temporary.
5901 FIXME: This is inefficient, usually we can do this in registers. */
5902 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
5904 tree tmp = create_tmp_var (type, "va_arg_tmp");
5905 tree dest_addr = build_fold_addr_expr (tmp);
5906 tree copy = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
5907 3, dest_addr, addr, size_int (rsize));
5908 TREE_ADDRESSABLE (tmp) = 1;
5909 gimplify_and_add (copy, pre_p);
5910 addr = dest_addr;
5913 else
5914 addr = fold_convert (ptrtype, addr);
5916 incr
5917 = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr, size_int (rsize));
5918 gimplify_assign (valist, incr, post_p);
5920 return build_va_arg_indirect_ref (addr);
5923 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
5924 Specify whether the vector mode is supported by the hardware. */
5926 static bool
5927 sparc_vector_mode_supported_p (enum machine_mode mode)
5929 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
5932 /* Return the string to output an unconditional branch to LABEL, which is
5933 the operand number of the label.
5935 DEST is the destination insn (i.e. the label), INSN is the source. */
5937 const char *
5938 output_ubranch (rtx dest, int label, rtx insn)
5940 static char string[64];
5941 bool v9_form = false;
5942 char *p;
5944 if (TARGET_V9 && INSN_ADDRESSES_SET_P ())
5946 int delta = (INSN_ADDRESSES (INSN_UID (dest))
5947 - INSN_ADDRESSES (INSN_UID (insn)));
5948 /* Leave some instructions for "slop". */
5949 if (delta >= -260000 && delta < 260000)
5950 v9_form = true;
5953 if (v9_form)
5954 strcpy (string, "ba%*,pt\t%%xcc, ");
5955 else
5956 strcpy (string, "b%*\t");
5958 p = strchr (string, '\0');
5959 *p++ = '%';
5960 *p++ = 'l';
5961 *p++ = '0' + label;
5962 *p++ = '%';
5963 *p++ = '(';
5964 *p = '\0';
5966 return string;
5969 /* Return the string to output a conditional branch to LABEL, which is
5970 the operand number of the label. OP is the conditional expression.
5971 XEXP (OP, 0) is assumed to be a condition code register (integer or
5972 floating point) and its mode specifies what kind of comparison we made.
5974 DEST is the destination insn (i.e. the label), INSN is the source.
5976 REVERSED is nonzero if we should reverse the sense of the comparison.
5978 ANNUL is nonzero if we should generate an annulling branch. */
5980 const char *
5981 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
5982 rtx insn)
5984 static char string[64];
5985 enum rtx_code code = GET_CODE (op);
5986 rtx cc_reg = XEXP (op, 0);
5987 enum machine_mode mode = GET_MODE (cc_reg);
5988 const char *labelno, *branch;
5989 int spaces = 8, far;
5990 char *p;
5992 /* v9 branches are limited to +-1MB. If it is too far away,
5993 change
5995 bne,pt %xcc, .LC30
5999 be,pn %xcc, .+12
6001 ba .LC30
6005 fbne,a,pn %fcc2, .LC29
6009 fbe,pt %fcc2, .+16
6011 ba .LC29 */
6013 far = TARGET_V9 && (get_attr_length (insn) >= 3);
6014 if (reversed ^ far)
6016 /* Reversal of FP compares takes care -- an ordered compare
6017 becomes an unordered compare and vice versa. */
6018 if (mode == CCFPmode || mode == CCFPEmode)
6019 code = reverse_condition_maybe_unordered (code);
6020 else
6021 code = reverse_condition (code);
6024 /* Start by writing the branch condition. */
6025 if (mode == CCFPmode || mode == CCFPEmode)
6027 switch (code)
6029 case NE:
6030 branch = "fbne";
6031 break;
6032 case EQ:
6033 branch = "fbe";
6034 break;
6035 case GE:
6036 branch = "fbge";
6037 break;
6038 case GT:
6039 branch = "fbg";
6040 break;
6041 case LE:
6042 branch = "fble";
6043 break;
6044 case LT:
6045 branch = "fbl";
6046 break;
6047 case UNORDERED:
6048 branch = "fbu";
6049 break;
6050 case ORDERED:
6051 branch = "fbo";
6052 break;
6053 case UNGT:
6054 branch = "fbug";
6055 break;
6056 case UNLT:
6057 branch = "fbul";
6058 break;
6059 case UNEQ:
6060 branch = "fbue";
6061 break;
6062 case UNGE:
6063 branch = "fbuge";
6064 break;
6065 case UNLE:
6066 branch = "fbule";
6067 break;
6068 case LTGT:
6069 branch = "fblg";
6070 break;
6072 default:
6073 gcc_unreachable ();
6076 /* ??? !v9: FP branches cannot be preceded by another floating point
6077 insn. Because there is currently no concept of pre-delay slots,
6078 we can fix this only by always emitting a nop before a floating
6079 point branch. */
6081 string[0] = '\0';
6082 if (! TARGET_V9)
6083 strcpy (string, "nop\n\t");
6084 strcat (string, branch);
6086 else
6088 switch (code)
6090 case NE:
6091 branch = "bne";
6092 break;
6093 case EQ:
6094 branch = "be";
6095 break;
6096 case GE:
6097 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
6098 branch = "bpos";
6099 else
6100 branch = "bge";
6101 break;
6102 case GT:
6103 branch = "bg";
6104 break;
6105 case LE:
6106 branch = "ble";
6107 break;
6108 case LT:
6109 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
6110 branch = "bneg";
6111 else
6112 branch = "bl";
6113 break;
6114 case GEU:
6115 branch = "bgeu";
6116 break;
6117 case GTU:
6118 branch = "bgu";
6119 break;
6120 case LEU:
6121 branch = "bleu";
6122 break;
6123 case LTU:
6124 branch = "blu";
6125 break;
6127 default:
6128 gcc_unreachable ();
6130 strcpy (string, branch);
6132 spaces -= strlen (branch);
6133 p = strchr (string, '\0');
6135 /* Now add the annulling, the label, and a possible noop. */
6136 if (annul && ! far)
6138 strcpy (p, ",a");
6139 p += 2;
6140 spaces -= 2;
6143 if (TARGET_V9)
6145 rtx note;
6146 int v8 = 0;
6148 if (! far && insn && INSN_ADDRESSES_SET_P ())
6150 int delta = (INSN_ADDRESSES (INSN_UID (dest))
6151 - INSN_ADDRESSES (INSN_UID (insn)));
6152 /* Leave some instructions for "slop". */
6153 if (delta < -260000 || delta >= 260000)
6154 v8 = 1;
6157 if (mode == CCFPmode || mode == CCFPEmode)
6159 static char v9_fcc_labelno[] = "%%fccX, ";
6160 /* Set the char indicating the number of the fcc reg to use. */
6161 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
6162 labelno = v9_fcc_labelno;
6163 if (v8)
6165 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
6166 labelno = "";
6169 else if (mode == CCXmode || mode == CCX_NOOVmode)
6171 labelno = "%%xcc, ";
6172 gcc_assert (! v8);
6174 else
6176 labelno = "%%icc, ";
6177 if (v8)
6178 labelno = "";
6181 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
6183 strcpy (p,
6184 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
6185 ? ",pt" : ",pn");
6186 p += 3;
6187 spaces -= 3;
6190 else
6191 labelno = "";
6193 if (spaces > 0)
6194 *p++ = '\t';
6195 else
6196 *p++ = ' ';
6197 strcpy (p, labelno);
6198 p = strchr (p, '\0');
6199 if (far)
6201 strcpy (p, ".+12\n\t nop\n\tb\t");
6202 /* Skip the next insn if requested or
6203 if we know that it will be a nop. */
6204 if (annul || ! final_sequence)
6205 p[3] = '6';
6206 p += 14;
6208 *p++ = '%';
6209 *p++ = 'l';
6210 *p++ = label + '0';
6211 *p++ = '%';
6212 *p++ = '#';
6213 *p = '\0';
6215 return string;
6218 /* Emit a library call comparison between floating point X and Y.
6219 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
6220 Return the new operator to be used in the comparison sequence.
6222 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
6223 values as arguments instead of the TFmode registers themselves,
6224 that's why we cannot call emit_float_lib_cmp. */
6227 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
6229 const char *qpfunc;
6230 rtx slot0, slot1, result, tem, tem2;
6231 enum machine_mode mode;
6232 enum rtx_code new_comparison;
6234 switch (comparison)
6236 case EQ:
6237 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
6238 break;
6240 case NE:
6241 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
6242 break;
6244 case GT:
6245 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
6246 break;
6248 case GE:
6249 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
6250 break;
6252 case LT:
6253 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
6254 break;
6256 case LE:
6257 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
6258 break;
6260 case ORDERED:
6261 case UNORDERED:
6262 case UNGT:
6263 case UNLT:
6264 case UNEQ:
6265 case UNGE:
6266 case UNLE:
6267 case LTGT:
6268 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
6269 break;
6271 default:
6272 gcc_unreachable ();
6275 if (TARGET_ARCH64)
6277 if (MEM_P (x))
6278 slot0 = x;
6279 else
6281 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
6282 emit_move_insn (slot0, x);
6285 if (MEM_P (y))
6286 slot1 = y;
6287 else
6289 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
6290 emit_move_insn (slot1, y);
6293 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, qpfunc), LCT_NORMAL,
6294 DImode, 2,
6295 XEXP (slot0, 0), Pmode,
6296 XEXP (slot1, 0), Pmode);
6297 mode = DImode;
6299 else
6301 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, qpfunc), LCT_NORMAL,
6302 SImode, 2,
6303 x, TFmode, y, TFmode);
6304 mode = SImode;
6308 /* Immediately move the result of the libcall into a pseudo
6309 register so reload doesn't clobber the value if it needs
6310 the return register for a spill reg. */
6311 result = gen_reg_rtx (mode);
6312 emit_move_insn (result, hard_libcall_value (mode));
6314 switch (comparison)
6316 default:
6317 return gen_rtx_NE (VOIDmode, result, const0_rtx);
6318 case ORDERED:
6319 case UNORDERED:
6320 new_comparison = (comparison == UNORDERED ? EQ : NE);
6321 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
6322 case UNGT:
6323 case UNGE:
6324 new_comparison = (comparison == UNGT ? GT : NE);
6325 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
6326 case UNLE:
6327 return gen_rtx_NE (VOIDmode, result, const2_rtx);
6328 case UNLT:
6329 tem = gen_reg_rtx (mode);
6330 if (TARGET_ARCH32)
6331 emit_insn (gen_andsi3 (tem, result, const1_rtx));
6332 else
6333 emit_insn (gen_anddi3 (tem, result, const1_rtx));
6334 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
6335 case UNEQ:
6336 case LTGT:
6337 tem = gen_reg_rtx (mode);
6338 if (TARGET_ARCH32)
6339 emit_insn (gen_addsi3 (tem, result, const1_rtx));
6340 else
6341 emit_insn (gen_adddi3 (tem, result, const1_rtx));
6342 tem2 = gen_reg_rtx (mode);
6343 if (TARGET_ARCH32)
6344 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
6345 else
6346 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
6347 new_comparison = (comparison == UNEQ ? EQ : NE);
6348 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
6351 gcc_unreachable ();
6354 /* Generate an unsigned DImode to FP conversion. This is the same code
6355 optabs would emit if we didn't have TFmode patterns. */
6357 void
6358 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
6360 rtx neglab, donelab, i0, i1, f0, in, out;
6362 out = operands[0];
6363 in = force_reg (DImode, operands[1]);
6364 neglab = gen_label_rtx ();
6365 donelab = gen_label_rtx ();
6366 i0 = gen_reg_rtx (DImode);
6367 i1 = gen_reg_rtx (DImode);
6368 f0 = gen_reg_rtx (mode);
6370 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
6372 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
6373 emit_jump_insn (gen_jump (donelab));
6374 emit_barrier ();
6376 emit_label (neglab);
6378 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
6379 emit_insn (gen_anddi3 (i1, in, const1_rtx));
6380 emit_insn (gen_iordi3 (i0, i0, i1));
6381 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
6382 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
6384 emit_label (donelab);
6387 /* Generate an FP to unsigned DImode conversion. This is the same code
6388 optabs would emit if we didn't have TFmode patterns. */
6390 void
6391 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
6393 rtx neglab, donelab, i0, i1, f0, in, out, limit;
6395 out = operands[0];
6396 in = force_reg (mode, operands[1]);
6397 neglab = gen_label_rtx ();
6398 donelab = gen_label_rtx ();
6399 i0 = gen_reg_rtx (DImode);
6400 i1 = gen_reg_rtx (DImode);
6401 limit = gen_reg_rtx (mode);
6402 f0 = gen_reg_rtx (mode);
6404 emit_move_insn (limit,
6405 CONST_DOUBLE_FROM_REAL_VALUE (
6406 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
6407 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
6409 emit_insn (gen_rtx_SET (VOIDmode,
6410 out,
6411 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
6412 emit_jump_insn (gen_jump (donelab));
6413 emit_barrier ();
6415 emit_label (neglab);
6417 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
6418 emit_insn (gen_rtx_SET (VOIDmode,
6420 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
6421 emit_insn (gen_movdi (i1, const1_rtx));
6422 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
6423 emit_insn (gen_xordi3 (out, i0, i1));
6425 emit_label (donelab);
6428 /* Return the string to output a conditional branch to LABEL, testing
6429 register REG. LABEL is the operand number of the label; REG is the
6430 operand number of the reg. OP is the conditional expression. The mode
6431 of REG says what kind of comparison we made.
6433 DEST is the destination insn (i.e. the label), INSN is the source.
6435 REVERSED is nonzero if we should reverse the sense of the comparison.
6437 ANNUL is nonzero if we should generate an annulling branch. */
6439 const char *
6440 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
6441 int annul, rtx insn)
6443 static char string[64];
6444 enum rtx_code code = GET_CODE (op);
6445 enum machine_mode mode = GET_MODE (XEXP (op, 0));
6446 rtx note;
6447 int far;
6448 char *p;
6450 /* branch on register are limited to +-128KB. If it is too far away,
6451 change
6453 brnz,pt %g1, .LC30
6457 brz,pn %g1, .+12
6459 ba,pt %xcc, .LC30
6463 brgez,a,pn %o1, .LC29
6467 brlz,pt %o1, .+16
6469 ba,pt %xcc, .LC29 */
6471 far = get_attr_length (insn) >= 3;
6473 /* If not floating-point or if EQ or NE, we can just reverse the code. */
6474 if (reversed ^ far)
6475 code = reverse_condition (code);
6477 /* Only 64 bit versions of these instructions exist. */
6478 gcc_assert (mode == DImode);
6480 /* Start by writing the branch condition. */
6482 switch (code)
6484 case NE:
6485 strcpy (string, "brnz");
6486 break;
6488 case EQ:
6489 strcpy (string, "brz");
6490 break;
6492 case GE:
6493 strcpy (string, "brgez");
6494 break;
6496 case LT:
6497 strcpy (string, "brlz");
6498 break;
6500 case LE:
6501 strcpy (string, "brlez");
6502 break;
6504 case GT:
6505 strcpy (string, "brgz");
6506 break;
6508 default:
6509 gcc_unreachable ();
6512 p = strchr (string, '\0');
6514 /* Now add the annulling, reg, label, and nop. */
6515 if (annul && ! far)
6517 strcpy (p, ",a");
6518 p += 2;
6521 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
6523 strcpy (p,
6524 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
6525 ? ",pt" : ",pn");
6526 p += 3;
6529 *p = p < string + 8 ? '\t' : ' ';
6530 p++;
6531 *p++ = '%';
6532 *p++ = '0' + reg;
6533 *p++ = ',';
6534 *p++ = ' ';
6535 if (far)
6537 int veryfar = 1, delta;
6539 if (INSN_ADDRESSES_SET_P ())
6541 delta = (INSN_ADDRESSES (INSN_UID (dest))
6542 - INSN_ADDRESSES (INSN_UID (insn)));
6543 /* Leave some instructions for "slop". */
6544 if (delta >= -260000 && delta < 260000)
6545 veryfar = 0;
6548 strcpy (p, ".+12\n\t nop\n\t");
6549 /* Skip the next insn if requested or
6550 if we know that it will be a nop. */
6551 if (annul || ! final_sequence)
6552 p[3] = '6';
6553 p += 12;
6554 if (veryfar)
6556 strcpy (p, "b\t");
6557 p += 2;
6559 else
6561 strcpy (p, "ba,pt\t%%xcc, ");
6562 p += 13;
6565 *p++ = '%';
6566 *p++ = 'l';
6567 *p++ = '0' + label;
6568 *p++ = '%';
6569 *p++ = '#';
6570 *p = '\0';
6572 return string;
6575 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
6576 Such instructions cannot be used in the delay slot of return insn on v9.
6577 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
6580 static int
6581 epilogue_renumber (register rtx *where, int test)
6583 register const char *fmt;
6584 register int i;
6585 register enum rtx_code code;
6587 if (*where == 0)
6588 return 0;
6590 code = GET_CODE (*where);
6592 switch (code)
6594 case REG:
6595 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
6596 return 1;
6597 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
6598 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
6599 case SCRATCH:
6600 case CC0:
6601 case PC:
6602 case CONST_INT:
6603 case CONST_DOUBLE:
6604 return 0;
6606 /* Do not replace the frame pointer with the stack pointer because
6607 it can cause the delayed instruction to load below the stack.
6608 This occurs when instructions like:
6610 (set (reg/i:SI 24 %i0)
6611 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
6612 (const_int -20 [0xffffffec])) 0))
6614 are in the return delayed slot. */
6615 case PLUS:
6616 if (GET_CODE (XEXP (*where, 0)) == REG
6617 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
6618 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
6619 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
6620 return 1;
6621 break;
6623 case MEM:
6624 if (SPARC_STACK_BIAS
6625 && GET_CODE (XEXP (*where, 0)) == REG
6626 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
6627 return 1;
6628 break;
6630 default:
6631 break;
6634 fmt = GET_RTX_FORMAT (code);
6636 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
6638 if (fmt[i] == 'E')
6640 register int j;
6641 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
6642 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
6643 return 1;
6645 else if (fmt[i] == 'e'
6646 && epilogue_renumber (&(XEXP (*where, i)), test))
6647 return 1;
6649 return 0;
6652 /* Leaf functions and non-leaf functions have different needs. */
6654 static const int
6655 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
6657 static const int
6658 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
6660 static const int *const reg_alloc_orders[] = {
6661 reg_leaf_alloc_order,
6662 reg_nonleaf_alloc_order};
6664 void
6665 order_regs_for_local_alloc (void)
6667 static int last_order_nonleaf = 1;
6669 if (df_regs_ever_live_p (15) != last_order_nonleaf)
6671 last_order_nonleaf = !last_order_nonleaf;
6672 memcpy ((char *) reg_alloc_order,
6673 (const char *) reg_alloc_orders[last_order_nonleaf],
6674 FIRST_PSEUDO_REGISTER * sizeof (int));
6678 /* Return 1 if REG and MEM are legitimate enough to allow the various
6679 mem<-->reg splits to be run. */
6682 sparc_splitdi_legitimate (rtx reg, rtx mem)
6684 /* Punt if we are here by mistake. */
6685 gcc_assert (reload_completed);
6687 /* We must have an offsettable memory reference. */
6688 if (! offsettable_memref_p (mem))
6689 return 0;
6691 /* If we have legitimate args for ldd/std, we do not want
6692 the split to happen. */
6693 if ((REGNO (reg) % 2) == 0
6694 && mem_min_alignment (mem, 8))
6695 return 0;
6697 /* Success. */
6698 return 1;
6701 /* Return 1 if x and y are some kind of REG and they refer to
6702 different hard registers. This test is guaranteed to be
6703 run after reload. */
6706 sparc_absnegfloat_split_legitimate (rtx x, rtx y)
6708 if (GET_CODE (x) != REG)
6709 return 0;
6710 if (GET_CODE (y) != REG)
6711 return 0;
6712 if (REGNO (x) == REGNO (y))
6713 return 0;
6714 return 1;
6717 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
6718 This makes them candidates for using ldd and std insns.
6720 Note reg1 and reg2 *must* be hard registers. */
6723 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
6725 /* We might have been passed a SUBREG. */
6726 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
6727 return 0;
6729 if (REGNO (reg1) % 2 != 0)
6730 return 0;
6732 /* Integer ldd is deprecated in SPARC V9 */
6733 if (TARGET_V9 && REGNO (reg1) < 32)
6734 return 0;
6736 return (REGNO (reg1) == REGNO (reg2) - 1);
6739 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
6740 an ldd or std insn.
6742 This can only happen when addr1 and addr2, the addresses in mem1
6743 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
6744 addr1 must also be aligned on a 64-bit boundary.
6746 Also iff dependent_reg_rtx is not null it should not be used to
6747 compute the address for mem1, i.e. we cannot optimize a sequence
6748 like:
6749 ld [%o0], %o0
6750 ld [%o0 + 4], %o1
6752 ldd [%o0], %o0
6753 nor:
6754 ld [%g3 + 4], %g3
6755 ld [%g3], %g2
6757 ldd [%g3], %g2
6759 But, note that the transformation from:
6760 ld [%g2 + 4], %g3
6761 ld [%g2], %g2
6763 ldd [%g2], %g2
6764 is perfectly fine. Thus, the peephole2 patterns always pass us
6765 the destination register of the first load, never the second one.
6767 For stores we don't have a similar problem, so dependent_reg_rtx is
6768 NULL_RTX. */
6771 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
6773 rtx addr1, addr2;
6774 unsigned int reg1;
6775 HOST_WIDE_INT offset1;
6777 /* The mems cannot be volatile. */
6778 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
6779 return 0;
6781 /* MEM1 should be aligned on a 64-bit boundary. */
6782 if (MEM_ALIGN (mem1) < 64)
6783 return 0;
6785 addr1 = XEXP (mem1, 0);
6786 addr2 = XEXP (mem2, 0);
6788 /* Extract a register number and offset (if used) from the first addr. */
6789 if (GET_CODE (addr1) == PLUS)
6791 /* If not a REG, return zero. */
6792 if (GET_CODE (XEXP (addr1, 0)) != REG)
6793 return 0;
6794 else
6796 reg1 = REGNO (XEXP (addr1, 0));
6797 /* The offset must be constant! */
6798 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
6799 return 0;
6800 offset1 = INTVAL (XEXP (addr1, 1));
6803 else if (GET_CODE (addr1) != REG)
6804 return 0;
6805 else
6807 reg1 = REGNO (addr1);
6808 /* This was a simple (mem (reg)) expression. Offset is 0. */
6809 offset1 = 0;
6812 /* Make sure the second address is a (mem (plus (reg) (const_int). */
6813 if (GET_CODE (addr2) != PLUS)
6814 return 0;
6816 if (GET_CODE (XEXP (addr2, 0)) != REG
6817 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
6818 return 0;
6820 if (reg1 != REGNO (XEXP (addr2, 0)))
6821 return 0;
6823 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
6824 return 0;
6826 /* The first offset must be evenly divisible by 8 to ensure the
6827 address is 64 bit aligned. */
6828 if (offset1 % 8 != 0)
6829 return 0;
6831 /* The offset for the second addr must be 4 more than the first addr. */
6832 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
6833 return 0;
6835 /* All the tests passed. addr1 and addr2 are valid for ldd and std
6836 instructions. */
6837 return 1;
6840 /* Return 1 if reg is a pseudo, or is the first register in
6841 a hard register pair. This makes it suitable for use in
6842 ldd and std insns. */
6845 register_ok_for_ldd (rtx reg)
6847 /* We might have been passed a SUBREG. */
6848 if (!REG_P (reg))
6849 return 0;
6851 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
6852 return (REGNO (reg) % 2 == 0);
6854 return 1;
6857 /* Return 1 if OP is a memory whose address is known to be
6858 aligned to 8-byte boundary, or a pseudo during reload.
6859 This makes it suitable for use in ldd and std insns. */
6862 memory_ok_for_ldd (rtx op)
6864 if (MEM_P (op))
6866 /* In 64-bit mode, we assume that the address is word-aligned. */
6867 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
6868 return 0;
6870 if ((reload_in_progress || reload_completed)
6871 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
6872 return 0;
6874 else if (REG_P (op) && REGNO (op) >= FIRST_PSEUDO_REGISTER)
6876 if (!(reload_in_progress && reg_renumber [REGNO (op)] < 0))
6877 return 0;
6879 else
6880 return 0;
6882 return 1;
6885 /* Print operand X (an rtx) in assembler syntax to file FILE.
6886 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
6887 For `%' followed by punctuation, CODE is the punctuation and X is null. */
6889 void
6890 print_operand (FILE *file, rtx x, int code)
6892 switch (code)
6894 case '#':
6895 /* Output an insn in a delay slot. */
6896 if (final_sequence)
6897 sparc_indent_opcode = 1;
6898 else
6899 fputs ("\n\t nop", file);
6900 return;
6901 case '*':
6902 /* Output an annul flag if there's nothing for the delay slot and we
6903 are optimizing. This is always used with '(' below.
6904 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
6905 this is a dbx bug. So, we only do this when optimizing.
6906 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
6907 Always emit a nop in case the next instruction is a branch. */
6908 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
6909 fputs (",a", file);
6910 return;
6911 case '(':
6912 /* Output a 'nop' if there's nothing for the delay slot and we are
6913 not optimizing. This is always used with '*' above. */
6914 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
6915 fputs ("\n\t nop", file);
6916 else if (final_sequence)
6917 sparc_indent_opcode = 1;
6918 return;
6919 case ')':
6920 /* Output the right displacement from the saved PC on function return.
6921 The caller may have placed an "unimp" insn immediately after the call
6922 so we have to account for it. This insn is used in the 32-bit ABI
6923 when calling a function that returns a non zero-sized structure. The
6924 64-bit ABI doesn't have it. Be careful to have this test be the same
6925 as that used on the call. The exception here is that when
6926 sparc_std_struct_return is enabled, the psABI is followed exactly
6927 and the adjustment is made by the code in sparc_struct_value_rtx.
6928 The call emitted is the same when sparc_std_struct_return is
6929 present. */
6930 if (! TARGET_ARCH64
6931 && cfun->returns_struct
6932 && ! sparc_std_struct_return
6933 && (TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
6934 == INTEGER_CST)
6935 && ! integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
6936 fputs ("12", file);
6937 else
6938 fputc ('8', file);
6939 return;
6940 case '_':
6941 /* Output the Embedded Medium/Anywhere code model base register. */
6942 fputs (EMBMEDANY_BASE_REG, file);
6943 return;
6944 case '&':
6945 /* Print some local dynamic TLS name. */
6946 assemble_name (file, get_some_local_dynamic_name ());
6947 return;
6949 case 'Y':
6950 /* Adjust the operand to take into account a RESTORE operation. */
6951 if (GET_CODE (x) == CONST_INT)
6952 break;
6953 else if (GET_CODE (x) != REG)
6954 output_operand_lossage ("invalid %%Y operand");
6955 else if (REGNO (x) < 8)
6956 fputs (reg_names[REGNO (x)], file);
6957 else if (REGNO (x) >= 24 && REGNO (x) < 32)
6958 fputs (reg_names[REGNO (x)-16], file);
6959 else
6960 output_operand_lossage ("invalid %%Y operand");
6961 return;
6962 case 'L':
6963 /* Print out the low order register name of a register pair. */
6964 if (WORDS_BIG_ENDIAN)
6965 fputs (reg_names[REGNO (x)+1], file);
6966 else
6967 fputs (reg_names[REGNO (x)], file);
6968 return;
6969 case 'H':
6970 /* Print out the high order register name of a register pair. */
6971 if (WORDS_BIG_ENDIAN)
6972 fputs (reg_names[REGNO (x)], file);
6973 else
6974 fputs (reg_names[REGNO (x)+1], file);
6975 return;
6976 case 'R':
6977 /* Print out the second register name of a register pair or quad.
6978 I.e., R (%o0) => %o1. */
6979 fputs (reg_names[REGNO (x)+1], file);
6980 return;
6981 case 'S':
6982 /* Print out the third register name of a register quad.
6983 I.e., S (%o0) => %o2. */
6984 fputs (reg_names[REGNO (x)+2], file);
6985 return;
6986 case 'T':
6987 /* Print out the fourth register name of a register quad.
6988 I.e., T (%o0) => %o3. */
6989 fputs (reg_names[REGNO (x)+3], file);
6990 return;
6991 case 'x':
6992 /* Print a condition code register. */
6993 if (REGNO (x) == SPARC_ICC_REG)
6995 /* We don't handle CC[X]_NOOVmode because they're not supposed
6996 to occur here. */
6997 if (GET_MODE (x) == CCmode)
6998 fputs ("%icc", file);
6999 else if (GET_MODE (x) == CCXmode)
7000 fputs ("%xcc", file);
7001 else
7002 gcc_unreachable ();
7004 else
7005 /* %fccN register */
7006 fputs (reg_names[REGNO (x)], file);
7007 return;
7008 case 'm':
7009 /* Print the operand's address only. */
7010 output_address (XEXP (x, 0));
7011 return;
7012 case 'r':
7013 /* In this case we need a register. Use %g0 if the
7014 operand is const0_rtx. */
7015 if (x == const0_rtx
7016 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
7018 fputs ("%g0", file);
7019 return;
7021 else
7022 break;
7024 case 'A':
7025 switch (GET_CODE (x))
7027 case IOR: fputs ("or", file); break;
7028 case AND: fputs ("and", file); break;
7029 case XOR: fputs ("xor", file); break;
7030 default: output_operand_lossage ("invalid %%A operand");
7032 return;
7034 case 'B':
7035 switch (GET_CODE (x))
7037 case IOR: fputs ("orn", file); break;
7038 case AND: fputs ("andn", file); break;
7039 case XOR: fputs ("xnor", file); break;
7040 default: output_operand_lossage ("invalid %%B operand");
7042 return;
7044 /* These are used by the conditional move instructions. */
7045 case 'c' :
7046 case 'C':
7048 enum rtx_code rc = GET_CODE (x);
7050 if (code == 'c')
7052 enum machine_mode mode = GET_MODE (XEXP (x, 0));
7053 if (mode == CCFPmode || mode == CCFPEmode)
7054 rc = reverse_condition_maybe_unordered (GET_CODE (x));
7055 else
7056 rc = reverse_condition (GET_CODE (x));
7058 switch (rc)
7060 case NE: fputs ("ne", file); break;
7061 case EQ: fputs ("e", file); break;
7062 case GE: fputs ("ge", file); break;
7063 case GT: fputs ("g", file); break;
7064 case LE: fputs ("le", file); break;
7065 case LT: fputs ("l", file); break;
7066 case GEU: fputs ("geu", file); break;
7067 case GTU: fputs ("gu", file); break;
7068 case LEU: fputs ("leu", file); break;
7069 case LTU: fputs ("lu", file); break;
7070 case LTGT: fputs ("lg", file); break;
7071 case UNORDERED: fputs ("u", file); break;
7072 case ORDERED: fputs ("o", file); break;
7073 case UNLT: fputs ("ul", file); break;
7074 case UNLE: fputs ("ule", file); break;
7075 case UNGT: fputs ("ug", file); break;
7076 case UNGE: fputs ("uge", file); break;
7077 case UNEQ: fputs ("ue", file); break;
7078 default: output_operand_lossage (code == 'c'
7079 ? "invalid %%c operand"
7080 : "invalid %%C operand");
7082 return;
7085 /* These are used by the movr instruction pattern. */
7086 case 'd':
7087 case 'D':
7089 enum rtx_code rc = (code == 'd'
7090 ? reverse_condition (GET_CODE (x))
7091 : GET_CODE (x));
7092 switch (rc)
7094 case NE: fputs ("ne", file); break;
7095 case EQ: fputs ("e", file); break;
7096 case GE: fputs ("gez", file); break;
7097 case LT: fputs ("lz", file); break;
7098 case LE: fputs ("lez", file); break;
7099 case GT: fputs ("gz", file); break;
7100 default: output_operand_lossage (code == 'd'
7101 ? "invalid %%d operand"
7102 : "invalid %%D operand");
7104 return;
7107 case 'b':
7109 /* Print a sign-extended character. */
7110 int i = trunc_int_for_mode (INTVAL (x), QImode);
7111 fprintf (file, "%d", i);
7112 return;
7115 case 'f':
7116 /* Operand must be a MEM; write its address. */
7117 if (GET_CODE (x) != MEM)
7118 output_operand_lossage ("invalid %%f operand");
7119 output_address (XEXP (x, 0));
7120 return;
7122 case 's':
7124 /* Print a sign-extended 32-bit value. */
7125 HOST_WIDE_INT i;
7126 if (GET_CODE(x) == CONST_INT)
7127 i = INTVAL (x);
7128 else if (GET_CODE(x) == CONST_DOUBLE)
7129 i = CONST_DOUBLE_LOW (x);
7130 else
7132 output_operand_lossage ("invalid %%s operand");
7133 return;
7135 i = trunc_int_for_mode (i, SImode);
7136 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
7137 return;
7140 case 0:
7141 /* Do nothing special. */
7142 break;
7144 default:
7145 /* Undocumented flag. */
7146 output_operand_lossage ("invalid operand output code");
7149 if (GET_CODE (x) == REG)
7150 fputs (reg_names[REGNO (x)], file);
7151 else if (GET_CODE (x) == MEM)
7153 fputc ('[', file);
7154 /* Poor Sun assembler doesn't understand absolute addressing. */
7155 if (CONSTANT_P (XEXP (x, 0)))
7156 fputs ("%g0+", file);
7157 output_address (XEXP (x, 0));
7158 fputc (']', file);
7160 else if (GET_CODE (x) == HIGH)
7162 fputs ("%hi(", file);
7163 output_addr_const (file, XEXP (x, 0));
7164 fputc (')', file);
7166 else if (GET_CODE (x) == LO_SUM)
7168 print_operand (file, XEXP (x, 0), 0);
7169 if (TARGET_CM_MEDMID)
7170 fputs ("+%l44(", file);
7171 else
7172 fputs ("+%lo(", file);
7173 output_addr_const (file, XEXP (x, 1));
7174 fputc (')', file);
7176 else if (GET_CODE (x) == CONST_DOUBLE
7177 && (GET_MODE (x) == VOIDmode
7178 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
7180 if (CONST_DOUBLE_HIGH (x) == 0)
7181 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
7182 else if (CONST_DOUBLE_HIGH (x) == -1
7183 && CONST_DOUBLE_LOW (x) < 0)
7184 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
7185 else
7186 output_operand_lossage ("long long constant not a valid immediate operand");
7188 else if (GET_CODE (x) == CONST_DOUBLE)
7189 output_operand_lossage ("floating point constant not a valid immediate operand");
7190 else { output_addr_const (file, x); }
7193 /* Target hook for assembling integer objects. The sparc version has
7194 special handling for aligned DI-mode objects. */
7196 static bool
7197 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
7199 /* ??? We only output .xword's for symbols and only then in environments
7200 where the assembler can handle them. */
7201 if (aligned_p && size == 8
7202 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
7204 if (TARGET_V9)
7206 assemble_integer_with_op ("\t.xword\t", x);
7207 return true;
7209 else
7211 assemble_aligned_integer (4, const0_rtx);
7212 assemble_aligned_integer (4, x);
7213 return true;
7216 return default_assemble_integer (x, size, aligned_p);
7219 /* Return the value of a code used in the .proc pseudo-op that says
7220 what kind of result this function returns. For non-C types, we pick
7221 the closest C type. */
7223 #ifndef SHORT_TYPE_SIZE
7224 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
7225 #endif
7227 #ifndef INT_TYPE_SIZE
7228 #define INT_TYPE_SIZE BITS_PER_WORD
7229 #endif
7231 #ifndef LONG_TYPE_SIZE
7232 #define LONG_TYPE_SIZE BITS_PER_WORD
7233 #endif
7235 #ifndef LONG_LONG_TYPE_SIZE
7236 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
7237 #endif
7239 #ifndef FLOAT_TYPE_SIZE
7240 #define FLOAT_TYPE_SIZE BITS_PER_WORD
7241 #endif
7243 #ifndef DOUBLE_TYPE_SIZE
7244 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
7245 #endif
7247 #ifndef LONG_DOUBLE_TYPE_SIZE
7248 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
7249 #endif
7251 unsigned long
7252 sparc_type_code (register tree type)
7254 register unsigned long qualifiers = 0;
7255 register unsigned shift;
7257 /* Only the first 30 bits of the qualifier are valid. We must refrain from
7258 setting more, since some assemblers will give an error for this. Also,
7259 we must be careful to avoid shifts of 32 bits or more to avoid getting
7260 unpredictable results. */
7262 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
7264 switch (TREE_CODE (type))
7266 case ERROR_MARK:
7267 return qualifiers;
7269 case ARRAY_TYPE:
7270 qualifiers |= (3 << shift);
7271 break;
7273 case FUNCTION_TYPE:
7274 case METHOD_TYPE:
7275 qualifiers |= (2 << shift);
7276 break;
7278 case POINTER_TYPE:
7279 case REFERENCE_TYPE:
7280 case OFFSET_TYPE:
7281 qualifiers |= (1 << shift);
7282 break;
7284 case RECORD_TYPE:
7285 return (qualifiers | 8);
7287 case UNION_TYPE:
7288 case QUAL_UNION_TYPE:
7289 return (qualifiers | 9);
7291 case ENUMERAL_TYPE:
7292 return (qualifiers | 10);
7294 case VOID_TYPE:
7295 return (qualifiers | 16);
7297 case INTEGER_TYPE:
7298 /* If this is a range type, consider it to be the underlying
7299 type. */
7300 if (TREE_TYPE (type) != 0)
7301 break;
7303 /* Carefully distinguish all the standard types of C,
7304 without messing up if the language is not C. We do this by
7305 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
7306 look at both the names and the above fields, but that's redundant.
7307 Any type whose size is between two C types will be considered
7308 to be the wider of the two types. Also, we do not have a
7309 special code to use for "long long", so anything wider than
7310 long is treated the same. Note that we can't distinguish
7311 between "int" and "long" in this code if they are the same
7312 size, but that's fine, since neither can the assembler. */
7314 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
7315 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
7317 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
7318 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
7320 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
7321 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
7323 else
7324 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
7326 case REAL_TYPE:
7327 /* If this is a range type, consider it to be the underlying
7328 type. */
7329 if (TREE_TYPE (type) != 0)
7330 break;
7332 /* Carefully distinguish all the standard types of C,
7333 without messing up if the language is not C. */
7335 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
7336 return (qualifiers | 6);
7338 else
7339 return (qualifiers | 7);
7341 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
7342 /* ??? We need to distinguish between double and float complex types,
7343 but I don't know how yet because I can't reach this code from
7344 existing front-ends. */
7345 return (qualifiers | 7); /* Who knows? */
7347 case VECTOR_TYPE:
7348 case BOOLEAN_TYPE: /* Boolean truth value type. */
7349 case LANG_TYPE: /* ? */
7350 return qualifiers;
7352 default:
7353 gcc_unreachable (); /* Not a type! */
7357 return qualifiers;
7360 /* Nested function support. */
7362 /* Emit RTL insns to initialize the variable parts of a trampoline.
7363 FNADDR is an RTX for the address of the function's pure code.
7364 CXT is an RTX for the static chain value for the function.
7366 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
7367 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
7368 (to store insns). This is a bit excessive. Perhaps a different
7369 mechanism would be better here.
7371 Emit enough FLUSH insns to synchronize the data and instruction caches. */
7373 void
7374 sparc_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
7376 /* SPARC 32-bit trampoline:
7378 sethi %hi(fn), %g1
7379 sethi %hi(static), %g2
7380 jmp %g1+%lo(fn)
7381 or %g2, %lo(static), %g2
7383 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
7384 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
7387 emit_move_insn
7388 (gen_rtx_MEM (SImode, plus_constant (tramp, 0)),
7389 expand_binop (SImode, ior_optab,
7390 expand_shift (RSHIFT_EXPR, SImode, fnaddr,
7391 size_int (10), 0, 1),
7392 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
7393 NULL_RTX, 1, OPTAB_DIRECT));
7395 emit_move_insn
7396 (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
7397 expand_binop (SImode, ior_optab,
7398 expand_shift (RSHIFT_EXPR, SImode, cxt,
7399 size_int (10), 0, 1),
7400 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
7401 NULL_RTX, 1, OPTAB_DIRECT));
7403 emit_move_insn
7404 (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
7405 expand_binop (SImode, ior_optab,
7406 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
7407 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
7408 NULL_RTX, 1, OPTAB_DIRECT));
7410 emit_move_insn
7411 (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
7412 expand_binop (SImode, ior_optab,
7413 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
7414 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
7415 NULL_RTX, 1, OPTAB_DIRECT));
7417 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
7418 aligned on a 16 byte boundary so one flush clears it all. */
7419 emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, tramp))));
7420 if (sparc_cpu != PROCESSOR_ULTRASPARC
7421 && sparc_cpu != PROCESSOR_ULTRASPARC3
7422 && sparc_cpu != PROCESSOR_NIAGARA
7423 && sparc_cpu != PROCESSOR_NIAGARA2)
7424 emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
7425 plus_constant (tramp, 8)))));
7427 /* Call __enable_execute_stack after writing onto the stack to make sure
7428 the stack address is accessible. */
7429 #ifdef ENABLE_EXECUTE_STACK
7430 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
7431 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
7432 #endif
7436 /* The 64-bit version is simpler because it makes more sense to load the
7437 values as "immediate" data out of the trampoline. It's also easier since
7438 we can read the PC without clobbering a register. */
7440 void
7441 sparc64_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
7443 /* SPARC 64-bit trampoline:
7445 rd %pc, %g1
7446 ldx [%g1+24], %g5
7447 jmp %g5
7448 ldx [%g1+16], %g5
7449 +16 bytes data
7452 emit_move_insn (gen_rtx_MEM (SImode, tramp),
7453 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
7454 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
7455 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
7456 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
7457 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
7458 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
7459 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
7460 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), cxt);
7461 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 24)), fnaddr);
7462 emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, tramp))));
7464 if (sparc_cpu != PROCESSOR_ULTRASPARC
7465 && sparc_cpu != PROCESSOR_ULTRASPARC3
7466 && sparc_cpu != PROCESSOR_NIAGARA
7467 && sparc_cpu != PROCESSOR_NIAGARA2)
7468 emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8)))));
7470 /* Call __enable_execute_stack after writing onto the stack to make sure
7471 the stack address is accessible. */
7472 #ifdef ENABLE_EXECUTE_STACK
7473 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
7474 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
7475 #endif
7478 /* Adjust the cost of a scheduling dependency. Return the new cost of
7479 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
7481 static int
7482 supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
7484 enum attr_type insn_type;
7486 if (! recog_memoized (insn))
7487 return 0;
7489 insn_type = get_attr_type (insn);
7491 if (REG_NOTE_KIND (link) == 0)
7493 /* Data dependency; DEP_INSN writes a register that INSN reads some
7494 cycles later. */
7496 /* if a load, then the dependence must be on the memory address;
7497 add an extra "cycle". Note that the cost could be two cycles
7498 if the reg was written late in an instruction group; we ca not tell
7499 here. */
7500 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
7501 return cost + 3;
7503 /* Get the delay only if the address of the store is the dependence. */
7504 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
7506 rtx pat = PATTERN(insn);
7507 rtx dep_pat = PATTERN (dep_insn);
7509 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
7510 return cost; /* This should not happen! */
7512 /* The dependency between the two instructions was on the data that
7513 is being stored. Assume that this implies that the address of the
7514 store is not dependent. */
7515 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
7516 return cost;
7518 return cost + 3; /* An approximation. */
7521 /* A shift instruction cannot receive its data from an instruction
7522 in the same cycle; add a one cycle penalty. */
7523 if (insn_type == TYPE_SHIFT)
7524 return cost + 3; /* Split before cascade into shift. */
7526 else
7528 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
7529 INSN writes some cycles later. */
7531 /* These are only significant for the fpu unit; writing a fp reg before
7532 the fpu has finished with it stalls the processor. */
7534 /* Reusing an integer register causes no problems. */
7535 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
7536 return 0;
7539 return cost;
7542 static int
7543 hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
7545 enum attr_type insn_type, dep_type;
7546 rtx pat = PATTERN(insn);
7547 rtx dep_pat = PATTERN (dep_insn);
7549 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
7550 return cost;
7552 insn_type = get_attr_type (insn);
7553 dep_type = get_attr_type (dep_insn);
7555 switch (REG_NOTE_KIND (link))
7557 case 0:
7558 /* Data dependency; DEP_INSN writes a register that INSN reads some
7559 cycles later. */
7561 switch (insn_type)
7563 case TYPE_STORE:
7564 case TYPE_FPSTORE:
7565 /* Get the delay iff the address of the store is the dependence. */
7566 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
7567 return cost;
7569 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
7570 return cost;
7571 return cost + 3;
7573 case TYPE_LOAD:
7574 case TYPE_SLOAD:
7575 case TYPE_FPLOAD:
7576 /* If a load, then the dependence must be on the memory address. If
7577 the addresses aren't equal, then it might be a false dependency */
7578 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
7580 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
7581 || GET_CODE (SET_DEST (dep_pat)) != MEM
7582 || GET_CODE (SET_SRC (pat)) != MEM
7583 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
7584 XEXP (SET_SRC (pat), 0)))
7585 return cost + 2;
7587 return cost + 8;
7589 break;
7591 case TYPE_BRANCH:
7592 /* Compare to branch latency is 0. There is no benefit from
7593 separating compare and branch. */
7594 if (dep_type == TYPE_COMPARE)
7595 return 0;
7596 /* Floating point compare to branch latency is less than
7597 compare to conditional move. */
7598 if (dep_type == TYPE_FPCMP)
7599 return cost - 1;
7600 break;
7601 default:
7602 break;
7604 break;
7606 case REG_DEP_ANTI:
7607 /* Anti-dependencies only penalize the fpu unit. */
7608 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
7609 return 0;
7610 break;
7612 default:
7613 break;
7616 return cost;
7619 static int
7620 sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
7622 switch (sparc_cpu)
7624 case PROCESSOR_SUPERSPARC:
7625 cost = supersparc_adjust_cost (insn, link, dep, cost);
7626 break;
7627 case PROCESSOR_HYPERSPARC:
7628 case PROCESSOR_SPARCLITE86X:
7629 cost = hypersparc_adjust_cost (insn, link, dep, cost);
7630 break;
7631 default:
7632 break;
7634 return cost;
7637 static void
7638 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7639 int sched_verbose ATTRIBUTE_UNUSED,
7640 int max_ready ATTRIBUTE_UNUSED)
7644 static int
7645 sparc_use_sched_lookahead (void)
7647 if (sparc_cpu == PROCESSOR_NIAGARA
7648 || sparc_cpu == PROCESSOR_NIAGARA2)
7649 return 0;
7650 if (sparc_cpu == PROCESSOR_ULTRASPARC
7651 || sparc_cpu == PROCESSOR_ULTRASPARC3)
7652 return 4;
7653 if ((1 << sparc_cpu) &
7654 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
7655 (1 << PROCESSOR_SPARCLITE86X)))
7656 return 3;
7657 return 0;
7660 static int
7661 sparc_issue_rate (void)
7663 switch (sparc_cpu)
7665 case PROCESSOR_NIAGARA:
7666 case PROCESSOR_NIAGARA2:
7667 default:
7668 return 1;
7669 case PROCESSOR_V9:
7670 /* Assume V9 processors are capable of at least dual-issue. */
7671 return 2;
7672 case PROCESSOR_SUPERSPARC:
7673 return 3;
7674 case PROCESSOR_HYPERSPARC:
7675 case PROCESSOR_SPARCLITE86X:
7676 return 2;
7677 case PROCESSOR_ULTRASPARC:
7678 case PROCESSOR_ULTRASPARC3:
7679 return 4;
7683 static int
7684 set_extends (rtx insn)
7686 register rtx pat = PATTERN (insn);
7688 switch (GET_CODE (SET_SRC (pat)))
7690 /* Load and some shift instructions zero extend. */
7691 case MEM:
7692 case ZERO_EXTEND:
7693 /* sethi clears the high bits */
7694 case HIGH:
7695 /* LO_SUM is used with sethi. sethi cleared the high
7696 bits and the values used with lo_sum are positive */
7697 case LO_SUM:
7698 /* Store flag stores 0 or 1 */
7699 case LT: case LTU:
7700 case GT: case GTU:
7701 case LE: case LEU:
7702 case GE: case GEU:
7703 case EQ:
7704 case NE:
7705 return 1;
7706 case AND:
7708 rtx op0 = XEXP (SET_SRC (pat), 0);
7709 rtx op1 = XEXP (SET_SRC (pat), 1);
7710 if (GET_CODE (op1) == CONST_INT)
7711 return INTVAL (op1) >= 0;
7712 if (GET_CODE (op0) != REG)
7713 return 0;
7714 if (sparc_check_64 (op0, insn) == 1)
7715 return 1;
7716 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
7718 case IOR:
7719 case XOR:
7721 rtx op0 = XEXP (SET_SRC (pat), 0);
7722 rtx op1 = XEXP (SET_SRC (pat), 1);
7723 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
7724 return 0;
7725 if (GET_CODE (op1) == CONST_INT)
7726 return INTVAL (op1) >= 0;
7727 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
7729 case LSHIFTRT:
7730 return GET_MODE (SET_SRC (pat)) == SImode;
7731 /* Positive integers leave the high bits zero. */
7732 case CONST_DOUBLE:
7733 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
7734 case CONST_INT:
7735 return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
7736 case ASHIFTRT:
7737 case SIGN_EXTEND:
7738 return - (GET_MODE (SET_SRC (pat)) == SImode);
7739 case REG:
7740 return sparc_check_64 (SET_SRC (pat), insn);
7741 default:
7742 return 0;
7746 /* We _ought_ to have only one kind per function, but... */
7747 static GTY(()) rtx sparc_addr_diff_list;
7748 static GTY(()) rtx sparc_addr_list;
7750 void
7751 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
7753 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
7754 if (diff)
7755 sparc_addr_diff_list
7756 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
7757 else
7758 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
7761 static void
7762 sparc_output_addr_vec (rtx vec)
7764 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7765 int idx, vlen = XVECLEN (body, 0);
7767 #ifdef ASM_OUTPUT_ADDR_VEC_START
7768 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7769 #endif
7771 #ifdef ASM_OUTPUT_CASE_LABEL
7772 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7773 NEXT_INSN (lab));
7774 #else
7775 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7776 #endif
7778 for (idx = 0; idx < vlen; idx++)
7780 ASM_OUTPUT_ADDR_VEC_ELT
7781 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
7784 #ifdef ASM_OUTPUT_ADDR_VEC_END
7785 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7786 #endif
7789 static void
7790 sparc_output_addr_diff_vec (rtx vec)
7792 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7793 rtx base = XEXP (XEXP (body, 0), 0);
7794 int idx, vlen = XVECLEN (body, 1);
7796 #ifdef ASM_OUTPUT_ADDR_VEC_START
7797 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7798 #endif
7800 #ifdef ASM_OUTPUT_CASE_LABEL
7801 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7802 NEXT_INSN (lab));
7803 #else
7804 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7805 #endif
7807 for (idx = 0; idx < vlen; idx++)
7809 ASM_OUTPUT_ADDR_DIFF_ELT
7810 (asm_out_file,
7811 body,
7812 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
7813 CODE_LABEL_NUMBER (base));
7816 #ifdef ASM_OUTPUT_ADDR_VEC_END
7817 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7818 #endif
7821 static void
7822 sparc_output_deferred_case_vectors (void)
7824 rtx t;
7825 int align;
7827 if (sparc_addr_list == NULL_RTX
7828 && sparc_addr_diff_list == NULL_RTX)
7829 return;
7831 /* Align to cache line in the function's code section. */
7832 switch_to_section (current_function_section ());
7834 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
7835 if (align > 0)
7836 ASM_OUTPUT_ALIGN (asm_out_file, align);
7838 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
7839 sparc_output_addr_vec (XEXP (t, 0));
7840 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
7841 sparc_output_addr_diff_vec (XEXP (t, 0));
7843 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
7846 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
7847 unknown. Return 1 if the high bits are zero, -1 if the register is
7848 sign extended. */
7850 sparc_check_64 (rtx x, rtx insn)
7852 /* If a register is set only once it is safe to ignore insns this
7853 code does not know how to handle. The loop will either recognize
7854 the single set and return the correct value or fail to recognize
7855 it and return 0. */
7856 int set_once = 0;
7857 rtx y = x;
7859 gcc_assert (GET_CODE (x) == REG);
7861 if (GET_MODE (x) == DImode)
7862 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
7864 if (flag_expensive_optimizations
7865 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
7866 set_once = 1;
7868 if (insn == 0)
7870 if (set_once)
7871 insn = get_last_insn_anywhere ();
7872 else
7873 return 0;
7876 while ((insn = PREV_INSN (insn)))
7878 switch (GET_CODE (insn))
7880 case JUMP_INSN:
7881 case NOTE:
7882 break;
7883 case CODE_LABEL:
7884 case CALL_INSN:
7885 default:
7886 if (! set_once)
7887 return 0;
7888 break;
7889 case INSN:
7891 rtx pat = PATTERN (insn);
7892 if (GET_CODE (pat) != SET)
7893 return 0;
7894 if (rtx_equal_p (x, SET_DEST (pat)))
7895 return set_extends (insn);
7896 if (y && rtx_equal_p (y, SET_DEST (pat)))
7897 return set_extends (insn);
7898 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
7899 return 0;
7903 return 0;
7906 /* Returns assembly code to perform a DImode shift using
7907 a 64-bit global or out register on SPARC-V8+. */
7908 const char *
7909 output_v8plus_shift (rtx *operands, rtx insn, const char *opcode)
7911 static char asm_code[60];
7913 /* The scratch register is only required when the destination
7914 register is not a 64-bit global or out register. */
7915 if (which_alternative != 2)
7916 operands[3] = operands[0];
7918 /* We can only shift by constants <= 63. */
7919 if (GET_CODE (operands[2]) == CONST_INT)
7920 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
7922 if (GET_CODE (operands[1]) == CONST_INT)
7924 output_asm_insn ("mov\t%1, %3", operands);
7926 else
7928 output_asm_insn ("sllx\t%H1, 32, %3", operands);
7929 if (sparc_check_64 (operands[1], insn) <= 0)
7930 output_asm_insn ("srl\t%L1, 0, %L1", operands);
7931 output_asm_insn ("or\t%L1, %3, %3", operands);
7934 strcpy(asm_code, opcode);
7936 if (which_alternative != 2)
7937 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
7938 else
7939 return strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
7942 /* Output rtl to increment the profiler label LABELNO
7943 for profiling a function entry. */
7945 void
7946 sparc_profile_hook (int labelno)
7948 char buf[32];
7949 rtx lab, fun;
7951 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
7952 if (NO_PROFILE_COUNTERS)
7954 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
7956 else
7958 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
7959 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
7960 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
7964 #ifdef OBJECT_FORMAT_ELF
7965 static void
7966 sparc_elf_asm_named_section (const char *name, unsigned int flags,
7967 tree decl)
7969 if (flags & SECTION_MERGE)
7971 /* entsize cannot be expressed in this section attributes
7972 encoding style. */
7973 default_elf_asm_named_section (name, flags, decl);
7974 return;
7977 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
7979 if (!(flags & SECTION_DEBUG))
7980 fputs (",#alloc", asm_out_file);
7981 if (flags & SECTION_WRITE)
7982 fputs (",#write", asm_out_file);
7983 if (flags & SECTION_TLS)
7984 fputs (",#tls", asm_out_file);
7985 if (flags & SECTION_CODE)
7986 fputs (",#execinstr", asm_out_file);
7988 /* ??? Handle SECTION_BSS. */
7990 fputc ('\n', asm_out_file);
7992 #endif /* OBJECT_FORMAT_ELF */
7994 /* We do not allow indirect calls to be optimized into sibling calls.
7996 We cannot use sibling calls when delayed branches are disabled
7997 because they will likely require the call delay slot to be filled.
7999 Also, on SPARC 32-bit we cannot emit a sibling call when the
8000 current function returns a structure. This is because the "unimp
8001 after call" convention would cause the callee to return to the
8002 wrong place. The generic code already disallows cases where the
8003 function being called returns a structure.
8005 It may seem strange how this last case could occur. Usually there
8006 is code after the call which jumps to epilogue code which dumps the
8007 return value into the struct return area. That ought to invalidate
8008 the sibling call right? Well, in the C++ case we can end up passing
8009 the pointer to the struct return area to a constructor (which returns
8010 void) and then nothing else happens. Such a sibling call would look
8011 valid without the added check here.
8013 VxWorks PIC PLT entries require the global pointer to be initialized
8014 on entry. We therefore can't emit sibling calls to them. */
8015 static bool
8016 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8018 return (decl
8019 && flag_delayed_branch
8020 && (TARGET_ARCH64 || ! cfun->returns_struct)
8021 && !(TARGET_VXWORKS_RTP
8022 && flag_pic
8023 && !targetm.binds_local_p (decl)));
8026 /* libfunc renaming. */
8027 #include "config/gofast.h"
8029 static void
8030 sparc_init_libfuncs (void)
8032 if (TARGET_ARCH32)
8034 /* Use the subroutines that Sun's library provides for integer
8035 multiply and divide. The `*' prevents an underscore from
8036 being prepended by the compiler. .umul is a little faster
8037 than .mul. */
8038 set_optab_libfunc (smul_optab, SImode, "*.umul");
8039 set_optab_libfunc (sdiv_optab, SImode, "*.div");
8040 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
8041 set_optab_libfunc (smod_optab, SImode, "*.rem");
8042 set_optab_libfunc (umod_optab, SImode, "*.urem");
8044 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
8045 set_optab_libfunc (add_optab, TFmode, "_Q_add");
8046 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
8047 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
8048 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
8049 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
8051 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
8052 is because with soft-float, the SFmode and DFmode sqrt
8053 instructions will be absent, and the compiler will notice and
8054 try to use the TFmode sqrt instruction for calls to the
8055 builtin function sqrt, but this fails. */
8056 if (TARGET_FPU)
8057 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
8059 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
8060 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
8061 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
8062 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
8063 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
8064 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
8066 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
8067 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
8068 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
8069 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
8071 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
8072 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
8073 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
8074 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
8076 if (DITF_CONVERSION_LIBFUNCS)
8078 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
8079 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
8080 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
8081 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
8084 if (SUN_CONVERSION_LIBFUNCS)
8086 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
8087 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
8088 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
8089 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
8092 if (TARGET_ARCH64)
8094 /* In the SPARC 64bit ABI, SImode multiply and divide functions
8095 do not exist in the library. Make sure the compiler does not
8096 emit calls to them by accident. (It should always use the
8097 hardware instructions.) */
8098 set_optab_libfunc (smul_optab, SImode, 0);
8099 set_optab_libfunc (sdiv_optab, SImode, 0);
8100 set_optab_libfunc (udiv_optab, SImode, 0);
8101 set_optab_libfunc (smod_optab, SImode, 0);
8102 set_optab_libfunc (umod_optab, SImode, 0);
8104 if (SUN_INTEGER_MULTIPLY_64)
8106 set_optab_libfunc (smul_optab, DImode, "__mul64");
8107 set_optab_libfunc (sdiv_optab, DImode, "__div64");
8108 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
8109 set_optab_libfunc (smod_optab, DImode, "__rem64");
8110 set_optab_libfunc (umod_optab, DImode, "__urem64");
8113 if (SUN_CONVERSION_LIBFUNCS)
8115 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
8116 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
8117 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
8118 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
8122 gofast_maybe_init_libfuncs ();
8125 #define def_builtin(NAME, CODE, TYPE) \
8126 add_builtin_function((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL, \
8127 NULL_TREE)
8129 /* Implement the TARGET_INIT_BUILTINS target hook.
8130 Create builtin functions for special SPARC instructions. */
8132 static void
8133 sparc_init_builtins (void)
8135 if (TARGET_VIS)
8136 sparc_vis_init_builtins ();
8139 /* Create builtin functions for VIS 1.0 instructions. */
8141 static void
8142 sparc_vis_init_builtins (void)
8144 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
8145 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
8146 tree v4hi = build_vector_type (intHI_type_node, 4);
8147 tree v2hi = build_vector_type (intHI_type_node, 2);
8148 tree v2si = build_vector_type (intSI_type_node, 2);
8150 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
8151 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
8152 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
8153 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
8154 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
8155 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
8156 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
8157 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
8158 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
8159 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
8160 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
8161 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
8162 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
8163 v8qi, v8qi,
8164 intDI_type_node, 0);
8165 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
8166 intDI_type_node,
8167 intDI_type_node, 0);
8168 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
8169 ptr_type_node,
8170 intSI_type_node, 0);
8171 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
8172 ptr_type_node,
8173 intDI_type_node, 0);
8175 /* Packing and expanding vectors. */
8176 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis, v4qi_ftype_v4hi);
8177 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
8178 v8qi_ftype_v2si_v8qi);
8179 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
8180 v2hi_ftype_v2si);
8181 def_builtin ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis, v4hi_ftype_v4qi);
8182 def_builtin ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
8183 v8qi_ftype_v4qi_v4qi);
8185 /* Multiplications. */
8186 def_builtin ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
8187 v4hi_ftype_v4qi_v4hi);
8188 def_builtin ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
8189 v4hi_ftype_v4qi_v2hi);
8190 def_builtin ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
8191 v4hi_ftype_v4qi_v2hi);
8192 def_builtin ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
8193 v4hi_ftype_v8qi_v4hi);
8194 def_builtin ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
8195 v4hi_ftype_v8qi_v4hi);
8196 def_builtin ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
8197 v2si_ftype_v4qi_v2hi);
8198 def_builtin ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
8199 v2si_ftype_v4qi_v2hi);
8201 /* Data aligning. */
8202 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
8203 v4hi_ftype_v4hi_v4hi);
8204 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
8205 v8qi_ftype_v8qi_v8qi);
8206 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
8207 v2si_ftype_v2si_v2si);
8208 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatadi_vis,
8209 di_ftype_di_di);
8210 if (TARGET_ARCH64)
8211 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
8212 ptr_ftype_ptr_di);
8213 else
8214 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
8215 ptr_ftype_ptr_si);
8217 /* Pixel distance. */
8218 def_builtin ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
8219 di_ftype_v8qi_v8qi_di);
8222 /* Handle TARGET_EXPAND_BUILTIN target hook.
8223 Expand builtin functions for sparc intrinsics. */
8225 static rtx
8226 sparc_expand_builtin (tree exp, rtx target,
8227 rtx subtarget ATTRIBUTE_UNUSED,
8228 enum machine_mode tmode ATTRIBUTE_UNUSED,
8229 int ignore ATTRIBUTE_UNUSED)
8231 tree arg;
8232 call_expr_arg_iterator iter;
8233 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
8234 unsigned int icode = DECL_FUNCTION_CODE (fndecl);
8235 rtx pat, op[4];
8236 enum machine_mode mode[4];
8237 int arg_count = 0;
8239 mode[0] = insn_data[icode].operand[0].mode;
8240 if (!target
8241 || GET_MODE (target) != mode[0]
8242 || ! (*insn_data[icode].operand[0].predicate) (target, mode[0]))
8243 op[0] = gen_reg_rtx (mode[0]);
8244 else
8245 op[0] = target;
8247 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
8249 arg_count++;
8250 mode[arg_count] = insn_data[icode].operand[arg_count].mode;
8251 op[arg_count] = expand_normal (arg);
8253 if (! (*insn_data[icode].operand[arg_count].predicate) (op[arg_count],
8254 mode[arg_count]))
8255 op[arg_count] = copy_to_mode_reg (mode[arg_count], op[arg_count]);
8258 switch (arg_count)
8260 case 1:
8261 pat = GEN_FCN (icode) (op[0], op[1]);
8262 break;
8263 case 2:
8264 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
8265 break;
8266 case 3:
8267 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
8268 break;
8269 default:
8270 gcc_unreachable ();
8273 if (!pat)
8274 return NULL_RTX;
8276 emit_insn (pat);
8278 return op[0];
8281 static int
8282 sparc_vis_mul8x16 (int e8, int e16)
8284 return (e8 * e16 + 128) / 256;
8287 /* Multiply the vector elements in ELTS0 to the elements in ELTS1 as specified
8288 by FNCODE. All of the elements in ELTS0 and ELTS1 lists must be integer
8289 constants. A tree list with the results of the multiplications is returned,
8290 and each element in the list is of INNER_TYPE. */
8292 static tree
8293 sparc_handle_vis_mul8x16 (int fncode, tree inner_type, tree elts0, tree elts1)
8295 tree n_elts = NULL_TREE;
8296 int scale;
8298 switch (fncode)
8300 case CODE_FOR_fmul8x16_vis:
8301 for (; elts0 && elts1;
8302 elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
8304 int val
8305 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8306 TREE_INT_CST_LOW (TREE_VALUE (elts1)));
8307 n_elts = tree_cons (NULL_TREE,
8308 build_int_cst (inner_type, val),
8309 n_elts);
8311 break;
8313 case CODE_FOR_fmul8x16au_vis:
8314 scale = TREE_INT_CST_LOW (TREE_VALUE (elts1));
8316 for (; elts0; elts0 = TREE_CHAIN (elts0))
8318 int val
8319 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8320 scale);
8321 n_elts = tree_cons (NULL_TREE,
8322 build_int_cst (inner_type, val),
8323 n_elts);
8325 break;
8327 case CODE_FOR_fmul8x16al_vis:
8328 scale = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (elts1)));
8330 for (; elts0; elts0 = TREE_CHAIN (elts0))
8332 int val
8333 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8334 scale);
8335 n_elts = tree_cons (NULL_TREE,
8336 build_int_cst (inner_type, val),
8337 n_elts);
8339 break;
8341 default:
8342 gcc_unreachable ();
8345 return nreverse (n_elts);
8348 /* Handle TARGET_FOLD_BUILTIN target hook.
8349 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
8350 result of the function call is ignored. NULL_TREE is returned if the
8351 function could not be folded. */
8353 static tree
8354 sparc_fold_builtin (tree fndecl, tree arglist, bool ignore)
8356 tree arg0, arg1, arg2;
8357 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
8358 enum insn_code icode = (enum insn_code) DECL_FUNCTION_CODE (fndecl);
8360 if (ignore
8361 && icode != CODE_FOR_alignaddrsi_vis
8362 && icode != CODE_FOR_alignaddrdi_vis)
8363 return fold_convert (rtype, integer_zero_node);
8365 switch (icode)
8367 case CODE_FOR_fexpand_vis:
8368 arg0 = TREE_VALUE (arglist);
8369 STRIP_NOPS (arg0);
8371 if (TREE_CODE (arg0) == VECTOR_CST)
8373 tree inner_type = TREE_TYPE (rtype);
8374 tree elts = TREE_VECTOR_CST_ELTS (arg0);
8375 tree n_elts = NULL_TREE;
8377 for (; elts; elts = TREE_CHAIN (elts))
8379 unsigned int val = TREE_INT_CST_LOW (TREE_VALUE (elts)) << 4;
8380 n_elts = tree_cons (NULL_TREE,
8381 build_int_cst (inner_type, val),
8382 n_elts);
8384 return build_vector (rtype, nreverse (n_elts));
8386 break;
8388 case CODE_FOR_fmul8x16_vis:
8389 case CODE_FOR_fmul8x16au_vis:
8390 case CODE_FOR_fmul8x16al_vis:
8391 arg0 = TREE_VALUE (arglist);
8392 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8393 STRIP_NOPS (arg0);
8394 STRIP_NOPS (arg1);
8396 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
8398 tree inner_type = TREE_TYPE (rtype);
8399 tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
8400 tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
8401 tree n_elts = sparc_handle_vis_mul8x16 (icode, inner_type, elts0,
8402 elts1);
8404 return build_vector (rtype, n_elts);
8406 break;
8408 case CODE_FOR_fpmerge_vis:
8409 arg0 = TREE_VALUE (arglist);
8410 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8411 STRIP_NOPS (arg0);
8412 STRIP_NOPS (arg1);
8414 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
8416 tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
8417 tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
8418 tree n_elts = NULL_TREE;
8420 for (; elts0 && elts1;
8421 elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
8423 n_elts = tree_cons (NULL_TREE, TREE_VALUE (elts0), n_elts);
8424 n_elts = tree_cons (NULL_TREE, TREE_VALUE (elts1), n_elts);
8427 return build_vector (rtype, nreverse (n_elts));
8429 break;
8431 case CODE_FOR_pdist_vis:
8432 arg0 = TREE_VALUE (arglist);
8433 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8434 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8435 STRIP_NOPS (arg0);
8436 STRIP_NOPS (arg1);
8437 STRIP_NOPS (arg2);
8439 if (TREE_CODE (arg0) == VECTOR_CST
8440 && TREE_CODE (arg1) == VECTOR_CST
8441 && TREE_CODE (arg2) == INTEGER_CST)
8443 int overflow = 0;
8444 unsigned HOST_WIDE_INT low = TREE_INT_CST_LOW (arg2);
8445 HOST_WIDE_INT high = TREE_INT_CST_HIGH (arg2);
8446 tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
8447 tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
8449 for (; elts0 && elts1;
8450 elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
8452 unsigned HOST_WIDE_INT
8453 low0 = TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8454 low1 = TREE_INT_CST_LOW (TREE_VALUE (elts1));
8455 HOST_WIDE_INT high0 = TREE_INT_CST_HIGH (TREE_VALUE (elts0));
8456 HOST_WIDE_INT high1 = TREE_INT_CST_HIGH (TREE_VALUE (elts1));
8458 unsigned HOST_WIDE_INT l;
8459 HOST_WIDE_INT h;
8461 overflow |= neg_double (low1, high1, &l, &h);
8462 overflow |= add_double (low0, high0, l, h, &l, &h);
8463 if (h < 0)
8464 overflow |= neg_double (l, h, &l, &h);
8466 overflow |= add_double (low, high, l, h, &low, &high);
8469 gcc_assert (overflow == 0);
8471 return build_int_cst_wide (rtype, low, high);
8474 default:
8475 break;
8478 return NULL_TREE;
8481 /* ??? This duplicates information provided to the compiler by the
8482 ??? scheduler description. Some day, teach genautomata to output
8483 ??? the latencies and then CSE will just use that. */
8485 static bool
8486 sparc_rtx_costs (rtx x, int code, int outer_code, int *total,
8487 bool speed ATTRIBUTE_UNUSED)
8489 enum machine_mode mode = GET_MODE (x);
8490 bool float_mode_p = FLOAT_MODE_P (mode);
8492 switch (code)
8494 case CONST_INT:
8495 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
8497 *total = 0;
8498 return true;
8500 /* FALLTHRU */
8502 case HIGH:
8503 *total = 2;
8504 return true;
8506 case CONST:
8507 case LABEL_REF:
8508 case SYMBOL_REF:
8509 *total = 4;
8510 return true;
8512 case CONST_DOUBLE:
8513 if (GET_MODE (x) == VOIDmode
8514 && ((CONST_DOUBLE_HIGH (x) == 0
8515 && CONST_DOUBLE_LOW (x) < 0x1000)
8516 || (CONST_DOUBLE_HIGH (x) == -1
8517 && CONST_DOUBLE_LOW (x) < 0
8518 && CONST_DOUBLE_LOW (x) >= -0x1000)))
8519 *total = 0;
8520 else
8521 *total = 8;
8522 return true;
8524 case MEM:
8525 /* If outer-code was a sign or zero extension, a cost
8526 of COSTS_N_INSNS (1) was already added in. This is
8527 why we are subtracting it back out. */
8528 if (outer_code == ZERO_EXTEND)
8530 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
8532 else if (outer_code == SIGN_EXTEND)
8534 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
8536 else if (float_mode_p)
8538 *total = sparc_costs->float_load;
8540 else
8542 *total = sparc_costs->int_load;
8545 return true;
8547 case PLUS:
8548 case MINUS:
8549 if (float_mode_p)
8550 *total = sparc_costs->float_plusminus;
8551 else
8552 *total = COSTS_N_INSNS (1);
8553 return false;
8555 case MULT:
8556 if (float_mode_p)
8557 *total = sparc_costs->float_mul;
8558 else if (! TARGET_HARD_MUL)
8559 *total = COSTS_N_INSNS (25);
8560 else
8562 int bit_cost;
8564 bit_cost = 0;
8565 if (sparc_costs->int_mul_bit_factor)
8567 int nbits;
8569 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8571 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
8572 for (nbits = 0; value != 0; value &= value - 1)
8573 nbits++;
8575 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
8576 && GET_MODE (XEXP (x, 1)) == VOIDmode)
8578 rtx x1 = XEXP (x, 1);
8579 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
8580 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
8582 for (nbits = 0; value1 != 0; value1 &= value1 - 1)
8583 nbits++;
8584 for (; value2 != 0; value2 &= value2 - 1)
8585 nbits++;
8587 else
8588 nbits = 7;
8590 if (nbits < 3)
8591 nbits = 3;
8592 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
8593 bit_cost = COSTS_N_INSNS (bit_cost);
8596 if (mode == DImode)
8597 *total = sparc_costs->int_mulX + bit_cost;
8598 else
8599 *total = sparc_costs->int_mul + bit_cost;
8601 return false;
8603 case ASHIFT:
8604 case ASHIFTRT:
8605 case LSHIFTRT:
8606 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
8607 return false;
8609 case DIV:
8610 case UDIV:
8611 case MOD:
8612 case UMOD:
8613 if (float_mode_p)
8615 if (mode == DFmode)
8616 *total = sparc_costs->float_div_df;
8617 else
8618 *total = sparc_costs->float_div_sf;
8620 else
8622 if (mode == DImode)
8623 *total = sparc_costs->int_divX;
8624 else
8625 *total = sparc_costs->int_div;
8627 return false;
8629 case NEG:
8630 if (! float_mode_p)
8632 *total = COSTS_N_INSNS (1);
8633 return false;
8635 /* FALLTHRU */
8637 case ABS:
8638 case FLOAT:
8639 case UNSIGNED_FLOAT:
8640 case FIX:
8641 case UNSIGNED_FIX:
8642 case FLOAT_EXTEND:
8643 case FLOAT_TRUNCATE:
8644 *total = sparc_costs->float_move;
8645 return false;
8647 case SQRT:
8648 if (mode == DFmode)
8649 *total = sparc_costs->float_sqrt_df;
8650 else
8651 *total = sparc_costs->float_sqrt_sf;
8652 return false;
8654 case COMPARE:
8655 if (float_mode_p)
8656 *total = sparc_costs->float_cmp;
8657 else
8658 *total = COSTS_N_INSNS (1);
8659 return false;
8661 case IF_THEN_ELSE:
8662 if (float_mode_p)
8663 *total = sparc_costs->float_cmove;
8664 else
8665 *total = sparc_costs->int_cmove;
8666 return false;
8668 case IOR:
8669 /* Handle the NAND vector patterns. */
8670 if (sparc_vector_mode_supported_p (GET_MODE (x))
8671 && GET_CODE (XEXP (x, 0)) == NOT
8672 && GET_CODE (XEXP (x, 1)) == NOT)
8674 *total = COSTS_N_INSNS (1);
8675 return true;
8677 else
8678 return false;
8680 default:
8681 return false;
8685 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
8686 This is achieved by means of a manual dynamic stack space allocation in
8687 the current frame. We make the assumption that SEQ doesn't contain any
8688 function calls, with the possible exception of calls to the PIC helper. */
8690 static void
8691 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
8693 /* We must preserve the lowest 16 words for the register save area. */
8694 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
8695 /* We really need only 2 words of fresh stack space. */
8696 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
8698 rtx slot
8699 = gen_rtx_MEM (word_mode, plus_constant (stack_pointer_rtx,
8700 SPARC_STACK_BIAS + offset));
8702 emit_insn (gen_stack_pointer_dec (GEN_INT (size)));
8703 emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
8704 if (reg2)
8705 emit_insn (gen_rtx_SET (VOIDmode,
8706 adjust_address (slot, word_mode, UNITS_PER_WORD),
8707 reg2));
8708 emit_insn (seq);
8709 if (reg2)
8710 emit_insn (gen_rtx_SET (VOIDmode,
8711 reg2,
8712 adjust_address (slot, word_mode, UNITS_PER_WORD)));
8713 emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
8714 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
8717 /* Output the assembler code for a thunk function. THUNK_DECL is the
8718 declaration for the thunk function itself, FUNCTION is the decl for
8719 the target function. DELTA is an immediate constant offset to be
8720 added to THIS. If VCALL_OFFSET is nonzero, the word at address
8721 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
8723 static void
8724 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8725 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8726 tree function)
8728 rtx this_rtx, insn, funexp;
8729 unsigned int int_arg_first;
8731 reload_completed = 1;
8732 epilogue_completed = 1;
8734 emit_note (NOTE_INSN_PROLOGUE_END);
8736 if (flag_delayed_branch)
8738 /* We will emit a regular sibcall below, so we need to instruct
8739 output_sibcall that we are in a leaf function. */
8740 sparc_leaf_function_p = current_function_uses_only_leaf_regs = 1;
8742 /* This will cause final.c to invoke leaf_renumber_regs so we
8743 must behave as if we were in a not-yet-leafified function. */
8744 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
8746 else
8748 /* We will emit the sibcall manually below, so we will need to
8749 manually spill non-leaf registers. */
8750 sparc_leaf_function_p = current_function_uses_only_leaf_regs = 0;
8752 /* We really are in a leaf function. */
8753 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
8756 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
8757 returns a structure, the structure return pointer is there instead. */
8758 if (TARGET_ARCH64 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8759 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
8760 else
8761 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
8763 /* Add DELTA. When possible use a plain add, otherwise load it into
8764 a register first. */
8765 if (delta)
8767 rtx delta_rtx = GEN_INT (delta);
8769 if (! SPARC_SIMM13_P (delta))
8771 rtx scratch = gen_rtx_REG (Pmode, 1);
8772 emit_move_insn (scratch, delta_rtx);
8773 delta_rtx = scratch;
8776 /* THIS_RTX += DELTA. */
8777 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
8780 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
8781 if (vcall_offset)
8783 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8784 rtx scratch = gen_rtx_REG (Pmode, 1);
8786 gcc_assert (vcall_offset < 0);
8788 /* SCRATCH = *THIS_RTX. */
8789 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
8791 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
8792 may not have any available scratch register at this point. */
8793 if (SPARC_SIMM13_P (vcall_offset))
8795 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
8796 else if (! fixed_regs[5]
8797 /* The below sequence is made up of at least 2 insns,
8798 while the default method may need only one. */
8799 && vcall_offset < -8192)
8801 rtx scratch2 = gen_rtx_REG (Pmode, 5);
8802 emit_move_insn (scratch2, vcall_offset_rtx);
8803 vcall_offset_rtx = scratch2;
8805 else
8807 rtx increment = GEN_INT (-4096);
8809 /* VCALL_OFFSET is a negative number whose typical range can be
8810 estimated as -32768..0 in 32-bit mode. In almost all cases
8811 it is therefore cheaper to emit multiple add insns than
8812 spilling and loading the constant into a register (at least
8813 6 insns). */
8814 while (! SPARC_SIMM13_P (vcall_offset))
8816 emit_insn (gen_add2_insn (scratch, increment));
8817 vcall_offset += 4096;
8819 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
8822 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
8823 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
8824 gen_rtx_PLUS (Pmode,
8825 scratch,
8826 vcall_offset_rtx)));
8828 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
8829 emit_insn (gen_add2_insn (this_rtx, scratch));
8832 /* Generate a tail call to the target function. */
8833 if (! TREE_USED (function))
8835 assemble_external (function);
8836 TREE_USED (function) = 1;
8838 funexp = XEXP (DECL_RTL (function), 0);
8840 if (flag_delayed_branch)
8842 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8843 insn = emit_call_insn (gen_sibcall (funexp));
8844 SIBLING_CALL_P (insn) = 1;
8846 else
8848 /* The hoops we have to jump through in order to generate a sibcall
8849 without using delay slots... */
8850 rtx spill_reg, spill_reg2, seq, scratch = gen_rtx_REG (Pmode, 1);
8852 if (flag_pic)
8854 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
8855 spill_reg2 = gen_rtx_REG (word_mode, PIC_OFFSET_TABLE_REGNUM);
8856 start_sequence ();
8857 /* Delay emitting the PIC helper function because it needs to
8858 change the section and we are emitting assembly code. */
8859 load_pic_register (true); /* clobbers %o7 */
8860 scratch = legitimize_pic_address (funexp, Pmode, scratch);
8861 seq = get_insns ();
8862 end_sequence ();
8863 emit_and_preserve (seq, spill_reg, spill_reg2);
8865 else if (TARGET_ARCH32)
8867 emit_insn (gen_rtx_SET (VOIDmode,
8868 scratch,
8869 gen_rtx_HIGH (SImode, funexp)));
8870 emit_insn (gen_rtx_SET (VOIDmode,
8871 scratch,
8872 gen_rtx_LO_SUM (SImode, scratch, funexp)));
8874 else /* TARGET_ARCH64 */
8876 switch (sparc_cmodel)
8878 case CM_MEDLOW:
8879 case CM_MEDMID:
8880 /* The destination can serve as a temporary. */
8881 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
8882 break;
8884 case CM_MEDANY:
8885 case CM_EMBMEDANY:
8886 /* The destination cannot serve as a temporary. */
8887 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
8888 start_sequence ();
8889 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
8890 seq = get_insns ();
8891 end_sequence ();
8892 emit_and_preserve (seq, spill_reg, 0);
8893 break;
8895 default:
8896 gcc_unreachable ();
8900 emit_jump_insn (gen_indirect_jump (scratch));
8903 emit_barrier ();
8905 /* Run just enough of rest_of_compilation to get the insns emitted.
8906 There's not really enough bulk here to make other passes such as
8907 instruction scheduling worth while. Note that use_thunk calls
8908 assemble_start_function and assemble_end_function. */
8909 insn = get_insns ();
8910 insn_locators_alloc ();
8911 shorten_branches (insn);
8912 final_start_function (insn, file, 1);
8913 final (insn, file, 1);
8914 final_end_function ();
8915 free_after_compilation (cfun);
8917 reload_completed = 0;
8918 epilogue_completed = 0;
8921 /* Return true if sparc_output_mi_thunk would be able to output the
8922 assembler code for the thunk function specified by the arguments
8923 it is passed, and false otherwise. */
8924 static bool
8925 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
8926 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
8927 HOST_WIDE_INT vcall_offset,
8928 const_tree function ATTRIBUTE_UNUSED)
8930 /* Bound the loop used in the default method above. */
8931 return (vcall_offset >= -32768 || ! fixed_regs[5]);
8934 /* How to allocate a 'struct machine_function'. */
8936 static struct machine_function *
8937 sparc_init_machine_status (void)
8939 return GGC_CNEW (struct machine_function);
8942 /* Locate some local-dynamic symbol still in use by this function
8943 so that we can print its name in local-dynamic base patterns. */
8945 static const char *
8946 get_some_local_dynamic_name (void)
8948 rtx insn;
8950 if (cfun->machine->some_ld_name)
8951 return cfun->machine->some_ld_name;
8953 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8954 if (INSN_P (insn)
8955 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8956 return cfun->machine->some_ld_name;
8958 gcc_unreachable ();
8961 static int
8962 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8964 rtx x = *px;
8966 if (x
8967 && GET_CODE (x) == SYMBOL_REF
8968 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8970 cfun->machine->some_ld_name = XSTR (x, 0);
8971 return 1;
8974 return 0;
8977 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
8978 This is called from dwarf2out.c to emit call frame instructions
8979 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
8980 static void
8981 sparc_dwarf_handle_frame_unspec (const char *label,
8982 rtx pattern ATTRIBUTE_UNUSED,
8983 int index ATTRIBUTE_UNUSED)
8985 gcc_assert (index == UNSPECV_SAVEW);
8986 dwarf2out_window_save (label);
8989 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8990 We need to emit DTP-relative relocations. */
8992 static void
8993 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
8995 switch (size)
8997 case 4:
8998 fputs ("\t.word\t%r_tls_dtpoff32(", file);
8999 break;
9000 case 8:
9001 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
9002 break;
9003 default:
9004 gcc_unreachable ();
9006 output_addr_const (file, x);
9007 fputs (")", file);
9010 /* Do whatever processing is required at the end of a file. */
9012 static void
9013 sparc_file_end (void)
9015 /* If we haven't emitted the special PIC helper function, do so now. */
9016 if (pic_helper_symbol_name[0] && !pic_helper_emitted_p)
9017 emit_pic_helper ();
9019 if (NEED_INDICATE_EXEC_STACK)
9020 file_end_indicate_exec_stack ();
9023 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
9024 /* Implement TARGET_MANGLE_TYPE. */
9026 static const char *
9027 sparc_mangle_type (const_tree type)
9029 if (!TARGET_64BIT
9030 && TYPE_MAIN_VARIANT (type) == long_double_type_node
9031 && TARGET_LONG_DOUBLE_128)
9032 return "g";
9034 /* For all other types, use normal C++ mangling. */
9035 return NULL;
9037 #endif
9039 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
9040 compare and swap on the word containing the byte or half-word. */
9042 void
9043 sparc_expand_compare_and_swap_12 (rtx result, rtx mem, rtx oldval, rtx newval)
9045 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
9046 rtx addr = gen_reg_rtx (Pmode);
9047 rtx off = gen_reg_rtx (SImode);
9048 rtx oldv = gen_reg_rtx (SImode);
9049 rtx newv = gen_reg_rtx (SImode);
9050 rtx oldvalue = gen_reg_rtx (SImode);
9051 rtx newvalue = gen_reg_rtx (SImode);
9052 rtx res = gen_reg_rtx (SImode);
9053 rtx resv = gen_reg_rtx (SImode);
9054 rtx memsi, val, mask, end_label, loop_label, cc;
9056 emit_insn (gen_rtx_SET (VOIDmode, addr,
9057 gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
9059 if (Pmode != SImode)
9060 addr1 = gen_lowpart (SImode, addr1);
9061 emit_insn (gen_rtx_SET (VOIDmode, off,
9062 gen_rtx_AND (SImode, addr1, GEN_INT (3))));
9064 memsi = gen_rtx_MEM (SImode, addr);
9065 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
9066 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
9068 val = force_reg (SImode, memsi);
9070 emit_insn (gen_rtx_SET (VOIDmode, off,
9071 gen_rtx_XOR (SImode, off,
9072 GEN_INT (GET_MODE (mem) == QImode
9073 ? 3 : 2))));
9075 emit_insn (gen_rtx_SET (VOIDmode, off,
9076 gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
9078 if (GET_MODE (mem) == QImode)
9079 mask = force_reg (SImode, GEN_INT (0xff));
9080 else
9081 mask = force_reg (SImode, GEN_INT (0xffff));
9083 emit_insn (gen_rtx_SET (VOIDmode, mask,
9084 gen_rtx_ASHIFT (SImode, mask, off)));
9086 emit_insn (gen_rtx_SET (VOIDmode, val,
9087 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
9088 val)));
9090 oldval = gen_lowpart (SImode, oldval);
9091 emit_insn (gen_rtx_SET (VOIDmode, oldv,
9092 gen_rtx_ASHIFT (SImode, oldval, off)));
9094 newval = gen_lowpart_common (SImode, newval);
9095 emit_insn (gen_rtx_SET (VOIDmode, newv,
9096 gen_rtx_ASHIFT (SImode, newval, off)));
9098 emit_insn (gen_rtx_SET (VOIDmode, oldv,
9099 gen_rtx_AND (SImode, oldv, mask)));
9101 emit_insn (gen_rtx_SET (VOIDmode, newv,
9102 gen_rtx_AND (SImode, newv, mask)));
9104 end_label = gen_label_rtx ();
9105 loop_label = gen_label_rtx ();
9106 emit_label (loop_label);
9108 emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
9109 gen_rtx_IOR (SImode, oldv, val)));
9111 emit_insn (gen_rtx_SET (VOIDmode, newvalue,
9112 gen_rtx_IOR (SImode, newv, val)));
9114 emit_insn (gen_sync_compare_and_swapsi (res, memsi, oldvalue, newvalue));
9116 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
9118 emit_insn (gen_rtx_SET (VOIDmode, resv,
9119 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
9120 res)));
9122 cc = gen_compare_reg_1 (NE, resv, val);
9123 emit_insn (gen_rtx_SET (VOIDmode, val, resv));
9125 /* Use cbranchcc4 to separate the compare and branch! */
9126 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
9127 cc, const0_rtx, loop_label));
9129 emit_label (end_label);
9131 emit_insn (gen_rtx_SET (VOIDmode, res,
9132 gen_rtx_AND (SImode, res, mask)));
9134 emit_insn (gen_rtx_SET (VOIDmode, res,
9135 gen_rtx_LSHIFTRT (SImode, res, off)));
9137 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
9140 #include "gt-sparc.h"