2009-07-17 Richard Guenther <rguenther@suse.de>
[official-gcc.git] / gcc / config / sparc / sparc.c
blobbaba1d9805709523f1443523f8b58358f3d92d2e
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5 Contributed by Michael Tiemann (tiemann@cygnus.com)
6 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
7 at Cygnus Support.
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 3, or (at your option)
14 any later version.
16 GCC is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "tree.h"
30 #include "rtl.h"
31 #include "regs.h"
32 #include "hard-reg-set.h"
33 #include "real.h"
34 #include "insn-config.h"
35 #include "insn-codes.h"
36 #include "conditions.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "recog.h"
44 #include "toplev.h"
45 #include "ggc.h"
46 #include "tm_p.h"
47 #include "debug.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "cfglayout.h"
51 #include "gimple.h"
52 #include "langhooks.h"
53 #include "params.h"
54 #include "df.h"
56 /* Processor costs */
57 static const
58 struct processor_costs cypress_costs = {
59 COSTS_N_INSNS (2), /* int load */
60 COSTS_N_INSNS (2), /* int signed load */
61 COSTS_N_INSNS (2), /* int zeroed load */
62 COSTS_N_INSNS (2), /* float load */
63 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
64 COSTS_N_INSNS (5), /* fadd, fsub */
65 COSTS_N_INSNS (1), /* fcmp */
66 COSTS_N_INSNS (1), /* fmov, fmovr */
67 COSTS_N_INSNS (7), /* fmul */
68 COSTS_N_INSNS (37), /* fdivs */
69 COSTS_N_INSNS (37), /* fdivd */
70 COSTS_N_INSNS (63), /* fsqrts */
71 COSTS_N_INSNS (63), /* fsqrtd */
72 COSTS_N_INSNS (1), /* imul */
73 COSTS_N_INSNS (1), /* imulX */
74 0, /* imul bit factor */
75 COSTS_N_INSNS (1), /* idiv */
76 COSTS_N_INSNS (1), /* idivX */
77 COSTS_N_INSNS (1), /* movcc/movr */
78 0, /* shift penalty */
81 static const
82 struct processor_costs supersparc_costs = {
83 COSTS_N_INSNS (1), /* int load */
84 COSTS_N_INSNS (1), /* int signed load */
85 COSTS_N_INSNS (1), /* int zeroed load */
86 COSTS_N_INSNS (0), /* float load */
87 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
88 COSTS_N_INSNS (3), /* fadd, fsub */
89 COSTS_N_INSNS (3), /* fcmp */
90 COSTS_N_INSNS (1), /* fmov, fmovr */
91 COSTS_N_INSNS (3), /* fmul */
92 COSTS_N_INSNS (6), /* fdivs */
93 COSTS_N_INSNS (9), /* fdivd */
94 COSTS_N_INSNS (12), /* fsqrts */
95 COSTS_N_INSNS (12), /* fsqrtd */
96 COSTS_N_INSNS (4), /* imul */
97 COSTS_N_INSNS (4), /* imulX */
98 0, /* imul bit factor */
99 COSTS_N_INSNS (4), /* idiv */
100 COSTS_N_INSNS (4), /* idivX */
101 COSTS_N_INSNS (1), /* movcc/movr */
102 1, /* shift penalty */
105 static const
106 struct processor_costs hypersparc_costs = {
107 COSTS_N_INSNS (1), /* int load */
108 COSTS_N_INSNS (1), /* int signed load */
109 COSTS_N_INSNS (1), /* int zeroed load */
110 COSTS_N_INSNS (1), /* float load */
111 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
112 COSTS_N_INSNS (1), /* fadd, fsub */
113 COSTS_N_INSNS (1), /* fcmp */
114 COSTS_N_INSNS (1), /* fmov, fmovr */
115 COSTS_N_INSNS (1), /* fmul */
116 COSTS_N_INSNS (8), /* fdivs */
117 COSTS_N_INSNS (12), /* fdivd */
118 COSTS_N_INSNS (17), /* fsqrts */
119 COSTS_N_INSNS (17), /* fsqrtd */
120 COSTS_N_INSNS (17), /* imul */
121 COSTS_N_INSNS (17), /* imulX */
122 0, /* imul bit factor */
123 COSTS_N_INSNS (17), /* idiv */
124 COSTS_N_INSNS (17), /* idivX */
125 COSTS_N_INSNS (1), /* movcc/movr */
126 0, /* shift penalty */
129 static const
130 struct processor_costs sparclet_costs = {
131 COSTS_N_INSNS (3), /* int load */
132 COSTS_N_INSNS (3), /* int signed load */
133 COSTS_N_INSNS (1), /* int zeroed load */
134 COSTS_N_INSNS (1), /* float load */
135 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
136 COSTS_N_INSNS (1), /* fadd, fsub */
137 COSTS_N_INSNS (1), /* fcmp */
138 COSTS_N_INSNS (1), /* fmov, fmovr */
139 COSTS_N_INSNS (1), /* fmul */
140 COSTS_N_INSNS (1), /* fdivs */
141 COSTS_N_INSNS (1), /* fdivd */
142 COSTS_N_INSNS (1), /* fsqrts */
143 COSTS_N_INSNS (1), /* fsqrtd */
144 COSTS_N_INSNS (5), /* imul */
145 COSTS_N_INSNS (5), /* imulX */
146 0, /* imul bit factor */
147 COSTS_N_INSNS (5), /* idiv */
148 COSTS_N_INSNS (5), /* idivX */
149 COSTS_N_INSNS (1), /* movcc/movr */
150 0, /* shift penalty */
153 static const
154 struct processor_costs ultrasparc_costs = {
155 COSTS_N_INSNS (2), /* int load */
156 COSTS_N_INSNS (3), /* int signed load */
157 COSTS_N_INSNS (2), /* int zeroed load */
158 COSTS_N_INSNS (2), /* float load */
159 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
160 COSTS_N_INSNS (4), /* fadd, fsub */
161 COSTS_N_INSNS (1), /* fcmp */
162 COSTS_N_INSNS (2), /* fmov, fmovr */
163 COSTS_N_INSNS (4), /* fmul */
164 COSTS_N_INSNS (13), /* fdivs */
165 COSTS_N_INSNS (23), /* fdivd */
166 COSTS_N_INSNS (13), /* fsqrts */
167 COSTS_N_INSNS (23), /* fsqrtd */
168 COSTS_N_INSNS (4), /* imul */
169 COSTS_N_INSNS (4), /* imulX */
170 2, /* imul bit factor */
171 COSTS_N_INSNS (37), /* idiv */
172 COSTS_N_INSNS (68), /* idivX */
173 COSTS_N_INSNS (2), /* movcc/movr */
174 2, /* shift penalty */
177 static const
178 struct processor_costs ultrasparc3_costs = {
179 COSTS_N_INSNS (2), /* int load */
180 COSTS_N_INSNS (3), /* int signed load */
181 COSTS_N_INSNS (3), /* int zeroed load */
182 COSTS_N_INSNS (2), /* float load */
183 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
184 COSTS_N_INSNS (4), /* fadd, fsub */
185 COSTS_N_INSNS (5), /* fcmp */
186 COSTS_N_INSNS (3), /* fmov, fmovr */
187 COSTS_N_INSNS (4), /* fmul */
188 COSTS_N_INSNS (17), /* fdivs */
189 COSTS_N_INSNS (20), /* fdivd */
190 COSTS_N_INSNS (20), /* fsqrts */
191 COSTS_N_INSNS (29), /* fsqrtd */
192 COSTS_N_INSNS (6), /* imul */
193 COSTS_N_INSNS (6), /* imulX */
194 0, /* imul bit factor */
195 COSTS_N_INSNS (40), /* idiv */
196 COSTS_N_INSNS (71), /* idivX */
197 COSTS_N_INSNS (2), /* movcc/movr */
198 0, /* shift penalty */
201 static const
202 struct processor_costs niagara_costs = {
203 COSTS_N_INSNS (3), /* int load */
204 COSTS_N_INSNS (3), /* int signed load */
205 COSTS_N_INSNS (3), /* int zeroed load */
206 COSTS_N_INSNS (9), /* float load */
207 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
208 COSTS_N_INSNS (8), /* fadd, fsub */
209 COSTS_N_INSNS (26), /* fcmp */
210 COSTS_N_INSNS (8), /* fmov, fmovr */
211 COSTS_N_INSNS (29), /* fmul */
212 COSTS_N_INSNS (54), /* fdivs */
213 COSTS_N_INSNS (83), /* fdivd */
214 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
215 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
216 COSTS_N_INSNS (11), /* imul */
217 COSTS_N_INSNS (11), /* imulX */
218 0, /* imul bit factor */
219 COSTS_N_INSNS (72), /* idiv */
220 COSTS_N_INSNS (72), /* idivX */
221 COSTS_N_INSNS (1), /* movcc/movr */
222 0, /* shift penalty */
225 static const
226 struct processor_costs niagara2_costs = {
227 COSTS_N_INSNS (3), /* int load */
228 COSTS_N_INSNS (3), /* int signed load */
229 COSTS_N_INSNS (3), /* int zeroed load */
230 COSTS_N_INSNS (3), /* float load */
231 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
232 COSTS_N_INSNS (6), /* fadd, fsub */
233 COSTS_N_INSNS (6), /* fcmp */
234 COSTS_N_INSNS (6), /* fmov, fmovr */
235 COSTS_N_INSNS (6), /* fmul */
236 COSTS_N_INSNS (19), /* fdivs */
237 COSTS_N_INSNS (33), /* fdivd */
238 COSTS_N_INSNS (19), /* fsqrts */
239 COSTS_N_INSNS (33), /* fsqrtd */
240 COSTS_N_INSNS (5), /* imul */
241 COSTS_N_INSNS (5), /* imulX */
242 0, /* imul bit factor */
243 COSTS_N_INSNS (31), /* idiv, average of 12 - 41 cycle range */
244 COSTS_N_INSNS (31), /* idivX, average of 12 - 41 cycle range */
245 COSTS_N_INSNS (1), /* movcc/movr */
246 0, /* shift penalty */
249 const struct processor_costs *sparc_costs = &cypress_costs;
251 #ifdef HAVE_AS_RELAX_OPTION
252 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
253 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
254 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
255 somebody does not branch between the sethi and jmp. */
256 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
257 #else
258 #define LEAF_SIBCALL_SLOT_RESERVED_P \
259 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
260 #endif
262 /* Global variables for machine-dependent things. */
264 /* Size of frame. Need to know this to emit return insns from leaf procedures.
265 ACTUAL_FSIZE is set by sparc_compute_frame_size() which is called during the
266 reload pass. This is important as the value is later used for scheduling
267 (to see what can go in a delay slot).
268 APPARENT_FSIZE is the size of the stack less the register save area and less
269 the outgoing argument area. It is used when saving call preserved regs. */
270 static HOST_WIDE_INT apparent_fsize;
271 static HOST_WIDE_INT actual_fsize;
273 /* Number of live general or floating point registers needed to be
274 saved (as 4-byte quantities). */
275 static int num_gfregs;
277 /* The alias set for prologue/epilogue register save/restore. */
278 static GTY(()) alias_set_type sparc_sr_alias_set;
280 /* The alias set for the structure return value. */
281 static GTY(()) alias_set_type struct_value_alias_set;
283 /* Vector to say how input registers are mapped to output registers.
284 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
285 eliminate it. You must use -fomit-frame-pointer to get that. */
286 char leaf_reg_remap[] =
287 { 0, 1, 2, 3, 4, 5, 6, 7,
288 -1, -1, -1, -1, -1, -1, 14, -1,
289 -1, -1, -1, -1, -1, -1, -1, -1,
290 8, 9, 10, 11, 12, 13, -1, 15,
292 32, 33, 34, 35, 36, 37, 38, 39,
293 40, 41, 42, 43, 44, 45, 46, 47,
294 48, 49, 50, 51, 52, 53, 54, 55,
295 56, 57, 58, 59, 60, 61, 62, 63,
296 64, 65, 66, 67, 68, 69, 70, 71,
297 72, 73, 74, 75, 76, 77, 78, 79,
298 80, 81, 82, 83, 84, 85, 86, 87,
299 88, 89, 90, 91, 92, 93, 94, 95,
300 96, 97, 98, 99, 100};
302 /* Vector, indexed by hard register number, which contains 1
303 for a register that is allowable in a candidate for leaf
304 function treatment. */
305 char sparc_leaf_regs[] =
306 { 1, 1, 1, 1, 1, 1, 1, 1,
307 0, 0, 0, 0, 0, 0, 1, 0,
308 0, 0, 0, 0, 0, 0, 0, 0,
309 1, 1, 1, 1, 1, 1, 0, 1,
310 1, 1, 1, 1, 1, 1, 1, 1,
311 1, 1, 1, 1, 1, 1, 1, 1,
312 1, 1, 1, 1, 1, 1, 1, 1,
313 1, 1, 1, 1, 1, 1, 1, 1,
314 1, 1, 1, 1, 1, 1, 1, 1,
315 1, 1, 1, 1, 1, 1, 1, 1,
316 1, 1, 1, 1, 1, 1, 1, 1,
317 1, 1, 1, 1, 1, 1, 1, 1,
318 1, 1, 1, 1, 1};
320 struct GTY(()) machine_function
322 /* Some local-dynamic TLS symbol name. */
323 const char *some_ld_name;
325 /* True if the current function is leaf and uses only leaf regs,
326 so that the SPARC leaf function optimization can be applied.
327 Private version of current_function_uses_only_leaf_regs, see
328 sparc_expand_prologue for the rationale. */
329 int leaf_function_p;
331 /* True if the data calculated by sparc_expand_prologue are valid. */
332 bool prologue_data_valid_p;
335 #define sparc_leaf_function_p cfun->machine->leaf_function_p
336 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
338 /* Register we pretend to think the frame pointer is allocated to.
339 Normally, this is %fp, but if we are in a leaf procedure, this
340 is %sp+"something". We record "something" separately as it may
341 be too big for reg+constant addressing. */
342 static rtx frame_base_reg;
343 static HOST_WIDE_INT frame_base_offset;
345 /* 1 if the next opcode is to be specially indented. */
346 int sparc_indent_opcode = 0;
348 static bool sparc_handle_option (size_t, const char *, int);
349 static void sparc_init_modes (void);
350 static void scan_record_type (tree, int *, int *, int *);
351 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
352 tree, int, int, int *, int *);
354 static int supersparc_adjust_cost (rtx, rtx, rtx, int);
355 static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
357 static void sparc_output_addr_vec (rtx);
358 static void sparc_output_addr_diff_vec (rtx);
359 static void sparc_output_deferred_case_vectors (void);
360 static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
361 static rtx sparc_builtin_saveregs (void);
362 static int epilogue_renumber (rtx *, int);
363 static bool sparc_assemble_integer (rtx, unsigned int, int);
364 static int set_extends (rtx);
365 static void emit_pic_helper (void);
366 static void load_pic_register (bool);
367 static int save_or_restore_regs (int, int, rtx, int, int);
368 static void emit_save_or_restore_regs (int);
369 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
370 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
371 #ifdef OBJECT_FORMAT_ELF
372 static void sparc_elf_asm_named_section (const char *, unsigned int, tree);
373 #endif
375 static int sparc_adjust_cost (rtx, rtx, rtx, int);
376 static int sparc_issue_rate (void);
377 static void sparc_sched_init (FILE *, int, int);
378 static int sparc_use_sched_lookahead (void);
380 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
381 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
382 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
383 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
384 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
386 static bool sparc_function_ok_for_sibcall (tree, tree);
387 static void sparc_init_libfuncs (void);
388 static void sparc_init_builtins (void);
389 static void sparc_vis_init_builtins (void);
390 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
391 static tree sparc_fold_builtin (tree, tree, bool);
392 static int sparc_vis_mul8x16 (int, int);
393 static tree sparc_handle_vis_mul8x16 (int, tree, tree, tree);
394 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
395 HOST_WIDE_INT, tree);
396 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
397 HOST_WIDE_INT, const_tree);
398 static struct machine_function * sparc_init_machine_status (void);
399 static bool sparc_cannot_force_const_mem (rtx);
400 static rtx sparc_tls_get_addr (void);
401 static rtx sparc_tls_got (void);
402 static const char *get_some_local_dynamic_name (void);
403 static int get_some_local_dynamic_name_1 (rtx *, void *);
404 static bool sparc_rtx_costs (rtx, int, int, int *, bool);
405 static bool sparc_promote_prototypes (const_tree);
406 static rtx sparc_struct_value_rtx (tree, int);
407 static bool sparc_return_in_memory (const_tree, const_tree);
408 static bool sparc_strict_argument_naming (CUMULATIVE_ARGS *);
409 static void sparc_va_start (tree, rtx);
410 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
411 static bool sparc_vector_mode_supported_p (enum machine_mode);
412 static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
413 static bool sparc_pass_by_reference (CUMULATIVE_ARGS *,
414 enum machine_mode, const_tree, bool);
415 static int sparc_arg_partial_bytes (CUMULATIVE_ARGS *,
416 enum machine_mode, tree, bool);
417 static void sparc_dwarf_handle_frame_unspec (const char *, rtx, int);
418 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
419 static void sparc_file_end (void);
420 static bool sparc_frame_pointer_required (void);
421 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
422 static const char *sparc_mangle_type (const_tree);
423 #endif
425 #ifdef SUBTARGET_ATTRIBUTE_TABLE
426 /* Table of valid machine attributes. */
427 static const struct attribute_spec sparc_attribute_table[] =
429 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
430 SUBTARGET_ATTRIBUTE_TABLE,
431 { NULL, 0, 0, false, false, false, NULL }
433 #endif
435 /* Option handling. */
437 /* Parsed value. */
438 enum cmodel sparc_cmodel;
440 char sparc_hard_reg_printed[8];
442 struct sparc_cpu_select sparc_select[] =
444 /* switch name, tune arch */
445 { (char *)0, "default", 1, 1 },
446 { (char *)0, "-mcpu=", 1, 1 },
447 { (char *)0, "-mtune=", 1, 0 },
448 { 0, 0, 0, 0 }
451 /* CPU type. This is set from TARGET_CPU_DEFAULT and -m{cpu,tune}=xxx. */
452 enum processor_type sparc_cpu;
454 /* Whether\fan FPU option was specified. */
455 static bool fpu_option_set = false;
457 /* Initialize the GCC target structure. */
459 /* The sparc default is to use .half rather than .short for aligned
460 HI objects. Use .word instead of .long on non-ELF systems. */
461 #undef TARGET_ASM_ALIGNED_HI_OP
462 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
463 #ifndef OBJECT_FORMAT_ELF
464 #undef TARGET_ASM_ALIGNED_SI_OP
465 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
466 #endif
468 #undef TARGET_ASM_UNALIGNED_HI_OP
469 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
470 #undef TARGET_ASM_UNALIGNED_SI_OP
471 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
472 #undef TARGET_ASM_UNALIGNED_DI_OP
473 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
475 /* The target hook has to handle DI-mode values. */
476 #undef TARGET_ASM_INTEGER
477 #define TARGET_ASM_INTEGER sparc_assemble_integer
479 #undef TARGET_ASM_FUNCTION_PROLOGUE
480 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
481 #undef TARGET_ASM_FUNCTION_EPILOGUE
482 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
484 #undef TARGET_SCHED_ADJUST_COST
485 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
486 #undef TARGET_SCHED_ISSUE_RATE
487 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
488 #undef TARGET_SCHED_INIT
489 #define TARGET_SCHED_INIT sparc_sched_init
490 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
491 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
493 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
494 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
496 #undef TARGET_INIT_LIBFUNCS
497 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
498 #undef TARGET_INIT_BUILTINS
499 #define TARGET_INIT_BUILTINS sparc_init_builtins
501 #undef TARGET_LEGITIMIZE_ADDRESS
502 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
504 #undef TARGET_EXPAND_BUILTIN
505 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
506 #undef TARGET_FOLD_BUILTIN
507 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
509 #if TARGET_TLS
510 #undef TARGET_HAVE_TLS
511 #define TARGET_HAVE_TLS true
512 #endif
514 #undef TARGET_CANNOT_FORCE_CONST_MEM
515 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
517 #undef TARGET_ASM_OUTPUT_MI_THUNK
518 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
519 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
520 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
522 #undef TARGET_RTX_COSTS
523 #define TARGET_RTX_COSTS sparc_rtx_costs
524 #undef TARGET_ADDRESS_COST
525 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
527 /* This is only needed for TARGET_ARCH64, but since PROMOTE_FUNCTION_MODE is a
528 no-op for TARGET_ARCH32 this is ok. Otherwise we'd need to add a runtime
529 test for this value. */
530 #undef TARGET_PROMOTE_FUNCTION_ARGS
531 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
533 /* This is only needed for TARGET_ARCH64, but since PROMOTE_FUNCTION_MODE is a
534 no-op for TARGET_ARCH32 this is ok. Otherwise we'd need to add a runtime
535 test for this value. */
536 #undef TARGET_PROMOTE_FUNCTION_RETURN
537 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
539 #undef TARGET_PROMOTE_PROTOTYPES
540 #define TARGET_PROMOTE_PROTOTYPES sparc_promote_prototypes
542 #undef TARGET_STRUCT_VALUE_RTX
543 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
544 #undef TARGET_RETURN_IN_MEMORY
545 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
546 #undef TARGET_MUST_PASS_IN_STACK
547 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
548 #undef TARGET_PASS_BY_REFERENCE
549 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
550 #undef TARGET_ARG_PARTIAL_BYTES
551 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
553 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
554 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
555 #undef TARGET_STRICT_ARGUMENT_NAMING
556 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
558 #undef TARGET_EXPAND_BUILTIN_VA_START
559 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
560 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
561 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
563 #undef TARGET_VECTOR_MODE_SUPPORTED_P
564 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
566 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
567 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC sparc_dwarf_handle_frame_unspec
569 #ifdef SUBTARGET_INSERT_ATTRIBUTES
570 #undef TARGET_INSERT_ATTRIBUTES
571 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
572 #endif
574 #ifdef SUBTARGET_ATTRIBUTE_TABLE
575 #undef TARGET_ATTRIBUTE_TABLE
576 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
577 #endif
579 #undef TARGET_RELAXED_ORDERING
580 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
582 #undef TARGET_DEFAULT_TARGET_FLAGS
583 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
584 #undef TARGET_HANDLE_OPTION
585 #define TARGET_HANDLE_OPTION sparc_handle_option
587 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
588 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
589 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
590 #endif
592 #undef TARGET_ASM_FILE_END
593 #define TARGET_ASM_FILE_END sparc_file_end
595 #undef TARGET_FRAME_POINTER_REQUIRED
596 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
598 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
599 #undef TARGET_MANGLE_TYPE
600 #define TARGET_MANGLE_TYPE sparc_mangle_type
601 #endif
603 #undef TARGET_LEGITIMATE_ADDRESS_P
604 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
606 struct gcc_target targetm = TARGET_INITIALIZER;
608 /* Implement TARGET_HANDLE_OPTION. */
610 static bool
611 sparc_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
613 switch (code)
615 case OPT_mfpu:
616 case OPT_mhard_float:
617 case OPT_msoft_float:
618 fpu_option_set = true;
619 break;
621 case OPT_mcpu_:
622 sparc_select[1].string = arg;
623 break;
625 case OPT_mtune_:
626 sparc_select[2].string = arg;
627 break;
630 return true;
633 /* Validate and override various options, and do some machine dependent
634 initialization. */
636 void
637 sparc_override_options (void)
639 static struct code_model {
640 const char *const name;
641 const enum cmodel value;
642 } const cmodels[] = {
643 { "32", CM_32 },
644 { "medlow", CM_MEDLOW },
645 { "medmid", CM_MEDMID },
646 { "medany", CM_MEDANY },
647 { "embmedany", CM_EMBMEDANY },
648 { NULL, (enum cmodel) 0 }
650 const struct code_model *cmodel;
651 /* Map TARGET_CPU_DEFAULT to value for -m{arch,tune}=. */
652 static struct cpu_default {
653 const int cpu;
654 const char *const name;
655 } const cpu_default[] = {
656 /* There must be one entry here for each TARGET_CPU value. */
657 { TARGET_CPU_sparc, "cypress" },
658 { TARGET_CPU_sparclet, "tsc701" },
659 { TARGET_CPU_sparclite, "f930" },
660 { TARGET_CPU_v8, "v8" },
661 { TARGET_CPU_hypersparc, "hypersparc" },
662 { TARGET_CPU_sparclite86x, "sparclite86x" },
663 { TARGET_CPU_supersparc, "supersparc" },
664 { TARGET_CPU_v9, "v9" },
665 { TARGET_CPU_ultrasparc, "ultrasparc" },
666 { TARGET_CPU_ultrasparc3, "ultrasparc3" },
667 { TARGET_CPU_niagara, "niagara" },
668 { TARGET_CPU_niagara2, "niagara2" },
669 { 0, 0 }
671 const struct cpu_default *def;
672 /* Table of values for -m{cpu,tune}=. */
673 static struct cpu_table {
674 const char *const name;
675 const enum processor_type processor;
676 const int disable;
677 const int enable;
678 } const cpu_table[] = {
679 { "v7", PROCESSOR_V7, MASK_ISA, 0 },
680 { "cypress", PROCESSOR_CYPRESS, MASK_ISA, 0 },
681 { "v8", PROCESSOR_V8, MASK_ISA, MASK_V8 },
682 /* TI TMS390Z55 supersparc */
683 { "supersparc", PROCESSOR_SUPERSPARC, MASK_ISA, MASK_V8 },
684 { "sparclite", PROCESSOR_SPARCLITE, MASK_ISA, MASK_SPARCLITE },
685 /* The Fujitsu MB86930 is the original sparclite chip, with no fpu.
686 The Fujitsu MB86934 is the recent sparclite chip, with an fpu. */
687 { "f930", PROCESSOR_F930, MASK_ISA|MASK_FPU, MASK_SPARCLITE },
688 { "f934", PROCESSOR_F934, MASK_ISA, MASK_SPARCLITE|MASK_FPU },
689 { "hypersparc", PROCESSOR_HYPERSPARC, MASK_ISA, MASK_V8|MASK_FPU },
690 { "sparclite86x", PROCESSOR_SPARCLITE86X, MASK_ISA|MASK_FPU,
691 MASK_SPARCLITE },
692 { "sparclet", PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
693 /* TEMIC sparclet */
694 { "tsc701", PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
695 { "v9", PROCESSOR_V9, MASK_ISA, MASK_V9 },
696 /* TI ultrasparc I, II, IIi */
697 { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9
698 /* Although insns using %y are deprecated, it is a clear win on current
699 ultrasparcs. */
700 |MASK_DEPRECATED_V8_INSNS},
701 /* TI ultrasparc III */
702 /* ??? Check if %y issue still holds true in ultra3. */
703 { "ultrasparc3", PROCESSOR_ULTRASPARC3, MASK_ISA, MASK_V9|MASK_DEPRECATED_V8_INSNS},
704 /* UltraSPARC T1 */
705 { "niagara", PROCESSOR_NIAGARA, MASK_ISA, MASK_V9|MASK_DEPRECATED_V8_INSNS},
706 { "niagara2", PROCESSOR_NIAGARA, MASK_ISA, MASK_V9},
707 { 0, (enum processor_type) 0, 0, 0 }
709 const struct cpu_table *cpu;
710 const struct sparc_cpu_select *sel;
711 int fpu;
713 #ifndef SPARC_BI_ARCH
714 /* Check for unsupported architecture size. */
715 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
716 error ("%s is not supported by this configuration",
717 DEFAULT_ARCH32_P ? "-m64" : "-m32");
718 #endif
720 /* We force all 64bit archs to use 128 bit long double */
721 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
723 error ("-mlong-double-64 not allowed with -m64");
724 target_flags |= MASK_LONG_DOUBLE_128;
727 /* Code model selection. */
728 sparc_cmodel = SPARC_DEFAULT_CMODEL;
730 #ifdef SPARC_BI_ARCH
731 if (TARGET_ARCH32)
732 sparc_cmodel = CM_32;
733 #endif
735 if (sparc_cmodel_string != NULL)
737 if (TARGET_ARCH64)
739 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
740 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
741 break;
742 if (cmodel->name == NULL)
743 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
744 else
745 sparc_cmodel = cmodel->value;
747 else
748 error ("-mcmodel= is not supported on 32 bit systems");
751 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
753 /* Set the default CPU. */
754 for (def = &cpu_default[0]; def->name; ++def)
755 if (def->cpu == TARGET_CPU_DEFAULT)
756 break;
757 gcc_assert (def->name);
758 sparc_select[0].string = def->name;
760 for (sel = &sparc_select[0]; sel->name; ++sel)
762 if (sel->string)
764 for (cpu = &cpu_table[0]; cpu->name; ++cpu)
765 if (! strcmp (sel->string, cpu->name))
767 if (sel->set_tune_p)
768 sparc_cpu = cpu->processor;
770 if (sel->set_arch_p)
772 target_flags &= ~cpu->disable;
773 target_flags |= cpu->enable;
775 break;
778 if (! cpu->name)
779 error ("bad value (%s) for %s switch", sel->string, sel->name);
783 /* If -mfpu or -mno-fpu was explicitly used, don't override with
784 the processor default. */
785 if (fpu_option_set)
786 target_flags = (target_flags & ~MASK_FPU) | fpu;
788 /* Don't allow -mvis if FPU is disabled. */
789 if (! TARGET_FPU)
790 target_flags &= ~MASK_VIS;
792 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
793 are available.
794 -m64 also implies v9. */
795 if (TARGET_VIS || TARGET_ARCH64)
797 target_flags |= MASK_V9;
798 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
801 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
802 if (TARGET_V9 && TARGET_ARCH32)
803 target_flags |= MASK_DEPRECATED_V8_INSNS;
805 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
806 if (! TARGET_V9 || TARGET_ARCH64)
807 target_flags &= ~MASK_V8PLUS;
809 /* Don't use stack biasing in 32 bit mode. */
810 if (TARGET_ARCH32)
811 target_flags &= ~MASK_STACK_BIAS;
813 /* Supply a default value for align_functions. */
814 if (align_functions == 0
815 && (sparc_cpu == PROCESSOR_ULTRASPARC
816 || sparc_cpu == PROCESSOR_ULTRASPARC3
817 || sparc_cpu == PROCESSOR_NIAGARA
818 || sparc_cpu == PROCESSOR_NIAGARA2))
819 align_functions = 32;
821 /* Validate PCC_STRUCT_RETURN. */
822 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
823 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
825 /* Only use .uaxword when compiling for a 64-bit target. */
826 if (!TARGET_ARCH64)
827 targetm.asm_out.unaligned_op.di = NULL;
829 /* Do various machine dependent initializations. */
830 sparc_init_modes ();
832 /* Acquire unique alias sets for our private stuff. */
833 sparc_sr_alias_set = new_alias_set ();
834 struct_value_alias_set = new_alias_set ();
836 /* Set up function hooks. */
837 init_machine_status = sparc_init_machine_status;
839 switch (sparc_cpu)
841 case PROCESSOR_V7:
842 case PROCESSOR_CYPRESS:
843 sparc_costs = &cypress_costs;
844 break;
845 case PROCESSOR_V8:
846 case PROCESSOR_SPARCLITE:
847 case PROCESSOR_SUPERSPARC:
848 sparc_costs = &supersparc_costs;
849 break;
850 case PROCESSOR_F930:
851 case PROCESSOR_F934:
852 case PROCESSOR_HYPERSPARC:
853 case PROCESSOR_SPARCLITE86X:
854 sparc_costs = &hypersparc_costs;
855 break;
856 case PROCESSOR_SPARCLET:
857 case PROCESSOR_TSC701:
858 sparc_costs = &sparclet_costs;
859 break;
860 case PROCESSOR_V9:
861 case PROCESSOR_ULTRASPARC:
862 sparc_costs = &ultrasparc_costs;
863 break;
864 case PROCESSOR_ULTRASPARC3:
865 sparc_costs = &ultrasparc3_costs;
866 break;
867 case PROCESSOR_NIAGARA:
868 sparc_costs = &niagara_costs;
869 break;
870 case PROCESSOR_NIAGARA2:
871 sparc_costs = &niagara2_costs;
872 break;
875 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
876 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
877 target_flags |= MASK_LONG_DOUBLE_128;
878 #endif
880 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
881 set_param_value ("simultaneous-prefetches",
882 ((sparc_cpu == PROCESSOR_ULTRASPARC
883 || sparc_cpu == PROCESSOR_NIAGARA
884 || sparc_cpu == PROCESSOR_NIAGARA2)
886 : (sparc_cpu == PROCESSOR_ULTRASPARC3
887 ? 8 : 3)));
888 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
889 set_param_value ("l1-cache-line-size",
890 ((sparc_cpu == PROCESSOR_ULTRASPARC
891 || sparc_cpu == PROCESSOR_ULTRASPARC3
892 || sparc_cpu == PROCESSOR_NIAGARA
893 || sparc_cpu == PROCESSOR_NIAGARA2)
894 ? 64 : 32));
897 /* Miscellaneous utilities. */
899 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
900 or branch on register contents instructions. */
903 v9_regcmp_p (enum rtx_code code)
905 return (code == EQ || code == NE || code == GE || code == LT
906 || code == LE || code == GT);
909 /* Nonzero if OP is a floating point constant which can
910 be loaded into an integer register using a single
911 sethi instruction. */
914 fp_sethi_p (rtx op)
916 if (GET_CODE (op) == CONST_DOUBLE)
918 REAL_VALUE_TYPE r;
919 long i;
921 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
922 REAL_VALUE_TO_TARGET_SINGLE (r, i);
923 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
926 return 0;
929 /* Nonzero if OP is a floating point constant which can
930 be loaded into an integer register using a single
931 mov instruction. */
934 fp_mov_p (rtx op)
936 if (GET_CODE (op) == CONST_DOUBLE)
938 REAL_VALUE_TYPE r;
939 long i;
941 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
942 REAL_VALUE_TO_TARGET_SINGLE (r, i);
943 return SPARC_SIMM13_P (i);
946 return 0;
949 /* Nonzero if OP is a floating point constant which can
950 be loaded into an integer register using a high/losum
951 instruction sequence. */
954 fp_high_losum_p (rtx op)
956 /* The constraints calling this should only be in
957 SFmode move insns, so any constant which cannot
958 be moved using a single insn will do. */
959 if (GET_CODE (op) == CONST_DOUBLE)
961 REAL_VALUE_TYPE r;
962 long i;
964 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
965 REAL_VALUE_TO_TARGET_SINGLE (r, i);
966 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
969 return 0;
972 /* Expand a move instruction. Return true if all work is done. */
974 bool
975 sparc_expand_move (enum machine_mode mode, rtx *operands)
977 /* Handle sets of MEM first. */
978 if (GET_CODE (operands[0]) == MEM)
980 /* 0 is a register (or a pair of registers) on SPARC. */
981 if (register_or_zero_operand (operands[1], mode))
982 return false;
984 if (!reload_in_progress)
986 operands[0] = validize_mem (operands[0]);
987 operands[1] = force_reg (mode, operands[1]);
991 /* Fixup TLS cases. */
992 if (TARGET_HAVE_TLS
993 && CONSTANT_P (operands[1])
994 && GET_CODE (operands[1]) != HIGH
995 && sparc_tls_referenced_p (operands [1]))
997 rtx sym = operands[1];
998 rtx addend = NULL;
1000 if (GET_CODE (sym) == CONST && GET_CODE (XEXP (sym, 0)) == PLUS)
1002 addend = XEXP (XEXP (sym, 0), 1);
1003 sym = XEXP (XEXP (sym, 0), 0);
1006 gcc_assert (SPARC_SYMBOL_REF_TLS_P (sym));
1008 sym = legitimize_tls_address (sym);
1009 if (addend)
1011 sym = gen_rtx_PLUS (mode, sym, addend);
1012 sym = force_operand (sym, operands[0]);
1014 operands[1] = sym;
1017 /* Fixup PIC cases. */
1018 if (flag_pic && CONSTANT_P (operands[1]))
1020 if (pic_address_needs_scratch (operands[1]))
1021 operands[1] = legitimize_pic_address (operands[1], mode, 0);
1023 /* VxWorks does not impose a fixed gap between segments; the run-time
1024 gap can be different from the object-file gap. We therefore can't
1025 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1026 are absolutely sure that X is in the same segment as the GOT.
1027 Unfortunately, the flexibility of linker scripts means that we
1028 can't be sure of that in general, so assume that _G_O_T_-relative
1029 accesses are never valid on VxWorks. */
1030 if (GET_CODE (operands[1]) == LABEL_REF && !TARGET_VXWORKS_RTP)
1032 if (mode == SImode)
1034 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1035 return true;
1038 if (mode == DImode)
1040 gcc_assert (TARGET_ARCH64);
1041 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1042 return true;
1046 if (symbolic_operand (operands[1], mode))
1048 operands[1] = legitimize_pic_address (operands[1],
1049 mode,
1050 (reload_in_progress ?
1051 operands[0] :
1052 NULL_RTX));
1053 return false;
1057 /* If we are trying to toss an integer constant into FP registers,
1058 or loading a FP or vector constant, force it into memory. */
1059 if (CONSTANT_P (operands[1])
1060 && REG_P (operands[0])
1061 && (SPARC_FP_REG_P (REGNO (operands[0]))
1062 || SCALAR_FLOAT_MODE_P (mode)
1063 || VECTOR_MODE_P (mode)))
1065 /* emit_group_store will send such bogosity to us when it is
1066 not storing directly into memory. So fix this up to avoid
1067 crashes in output_constant_pool. */
1068 if (operands [1] == const0_rtx)
1069 operands[1] = CONST0_RTX (mode);
1071 /* We can clear FP registers if TARGET_VIS, and always other regs. */
1072 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1073 && const_zero_operand (operands[1], mode))
1074 return false;
1076 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1077 /* We are able to build any SF constant in integer registers
1078 with at most 2 instructions. */
1079 && (mode == SFmode
1080 /* And any DF constant in integer registers. */
1081 || (mode == DFmode
1082 && (reload_completed || reload_in_progress))))
1083 return false;
1085 operands[1] = force_const_mem (mode, operands[1]);
1086 if (!reload_in_progress)
1087 operands[1] = validize_mem (operands[1]);
1088 return false;
1091 /* Accept non-constants and valid constants unmodified. */
1092 if (!CONSTANT_P (operands[1])
1093 || GET_CODE (operands[1]) == HIGH
1094 || input_operand (operands[1], mode))
1095 return false;
1097 switch (mode)
1099 case QImode:
1100 /* All QImode constants require only one insn, so proceed. */
1101 break;
1103 case HImode:
1104 case SImode:
1105 sparc_emit_set_const32 (operands[0], operands[1]);
1106 return true;
1108 case DImode:
1109 /* input_operand should have filtered out 32-bit mode. */
1110 sparc_emit_set_const64 (operands[0], operands[1]);
1111 return true;
1113 default:
1114 gcc_unreachable ();
1117 return false;
1120 /* Load OP1, a 32-bit constant, into OP0, a register.
1121 We know it can't be done in one insn when we get
1122 here, the move expander guarantees this. */
1124 void
1125 sparc_emit_set_const32 (rtx op0, rtx op1)
1127 enum machine_mode mode = GET_MODE (op0);
1128 rtx temp;
1130 if (reload_in_progress || reload_completed)
1131 temp = op0;
1132 else
1133 temp = gen_reg_rtx (mode);
1135 if (GET_CODE (op1) == CONST_INT)
1137 gcc_assert (!small_int_operand (op1, mode)
1138 && !const_high_operand (op1, mode));
1140 /* Emit them as real moves instead of a HIGH/LO_SUM,
1141 this way CSE can see everything and reuse intermediate
1142 values if it wants. */
1143 emit_insn (gen_rtx_SET (VOIDmode, temp,
1144 GEN_INT (INTVAL (op1)
1145 & ~(HOST_WIDE_INT)0x3ff)));
1147 emit_insn (gen_rtx_SET (VOIDmode,
1148 op0,
1149 gen_rtx_IOR (mode, temp,
1150 GEN_INT (INTVAL (op1) & 0x3ff))));
1152 else
1154 /* A symbol, emit in the traditional way. */
1155 emit_insn (gen_rtx_SET (VOIDmode, temp,
1156 gen_rtx_HIGH (mode, op1)));
1157 emit_insn (gen_rtx_SET (VOIDmode,
1158 op0, gen_rtx_LO_SUM (mode, temp, op1)));
1162 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1163 If TEMP is nonzero, we are forbidden to use any other scratch
1164 registers. Otherwise, we are allowed to generate them as needed.
1166 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1167 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1169 void
1170 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1172 rtx temp1, temp2, temp3, temp4, temp5;
1173 rtx ti_temp = 0;
1175 if (temp && GET_MODE (temp) == TImode)
1177 ti_temp = temp;
1178 temp = gen_rtx_REG (DImode, REGNO (temp));
1181 /* SPARC-V9 code-model support. */
1182 switch (sparc_cmodel)
1184 case CM_MEDLOW:
1185 /* The range spanned by all instructions in the object is less
1186 than 2^31 bytes (2GB) and the distance from any instruction
1187 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1188 than 2^31 bytes (2GB).
1190 The executable must be in the low 4TB of the virtual address
1191 space.
1193 sethi %hi(symbol), %temp1
1194 or %temp1, %lo(symbol), %reg */
1195 if (temp)
1196 temp1 = temp; /* op0 is allowed. */
1197 else
1198 temp1 = gen_reg_rtx (DImode);
1200 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1201 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1202 break;
1204 case CM_MEDMID:
1205 /* The range spanned by all instructions in the object is less
1206 than 2^31 bytes (2GB) and the distance from any instruction
1207 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1208 than 2^31 bytes (2GB).
1210 The executable must be in the low 16TB of the virtual address
1211 space.
1213 sethi %h44(symbol), %temp1
1214 or %temp1, %m44(symbol), %temp2
1215 sllx %temp2, 12, %temp3
1216 or %temp3, %l44(symbol), %reg */
1217 if (temp)
1219 temp1 = op0;
1220 temp2 = op0;
1221 temp3 = temp; /* op0 is allowed. */
1223 else
1225 temp1 = gen_reg_rtx (DImode);
1226 temp2 = gen_reg_rtx (DImode);
1227 temp3 = gen_reg_rtx (DImode);
1230 emit_insn (gen_seth44 (temp1, op1));
1231 emit_insn (gen_setm44 (temp2, temp1, op1));
1232 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1233 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1234 emit_insn (gen_setl44 (op0, temp3, op1));
1235 break;
1237 case CM_MEDANY:
1238 /* The range spanned by all instructions in the object is less
1239 than 2^31 bytes (2GB) and the distance from any instruction
1240 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1241 than 2^31 bytes (2GB).
1243 The executable can be placed anywhere in the virtual address
1244 space.
1246 sethi %hh(symbol), %temp1
1247 sethi %lm(symbol), %temp2
1248 or %temp1, %hm(symbol), %temp3
1249 sllx %temp3, 32, %temp4
1250 or %temp4, %temp2, %temp5
1251 or %temp5, %lo(symbol), %reg */
1252 if (temp)
1254 /* It is possible that one of the registers we got for operands[2]
1255 might coincide with that of operands[0] (which is why we made
1256 it TImode). Pick the other one to use as our scratch. */
1257 if (rtx_equal_p (temp, op0))
1259 gcc_assert (ti_temp);
1260 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1262 temp1 = op0;
1263 temp2 = temp; /* op0 is _not_ allowed, see above. */
1264 temp3 = op0;
1265 temp4 = op0;
1266 temp5 = op0;
1268 else
1270 temp1 = gen_reg_rtx (DImode);
1271 temp2 = gen_reg_rtx (DImode);
1272 temp3 = gen_reg_rtx (DImode);
1273 temp4 = gen_reg_rtx (DImode);
1274 temp5 = gen_reg_rtx (DImode);
1277 emit_insn (gen_sethh (temp1, op1));
1278 emit_insn (gen_setlm (temp2, op1));
1279 emit_insn (gen_sethm (temp3, temp1, op1));
1280 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1281 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1282 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1283 gen_rtx_PLUS (DImode, temp4, temp2)));
1284 emit_insn (gen_setlo (op0, temp5, op1));
1285 break;
1287 case CM_EMBMEDANY:
1288 /* Old old old backwards compatibility kruft here.
1289 Essentially it is MEDLOW with a fixed 64-bit
1290 virtual base added to all data segment addresses.
1291 Text-segment stuff is computed like MEDANY, we can't
1292 reuse the code above because the relocation knobs
1293 look different.
1295 Data segment: sethi %hi(symbol), %temp1
1296 add %temp1, EMBMEDANY_BASE_REG, %temp2
1297 or %temp2, %lo(symbol), %reg */
1298 if (data_segment_operand (op1, GET_MODE (op1)))
1300 if (temp)
1302 temp1 = temp; /* op0 is allowed. */
1303 temp2 = op0;
1305 else
1307 temp1 = gen_reg_rtx (DImode);
1308 temp2 = gen_reg_rtx (DImode);
1311 emit_insn (gen_embmedany_sethi (temp1, op1));
1312 emit_insn (gen_embmedany_brsum (temp2, temp1));
1313 emit_insn (gen_embmedany_losum (op0, temp2, op1));
1316 /* Text segment: sethi %uhi(symbol), %temp1
1317 sethi %hi(symbol), %temp2
1318 or %temp1, %ulo(symbol), %temp3
1319 sllx %temp3, 32, %temp4
1320 or %temp4, %temp2, %temp5
1321 or %temp5, %lo(symbol), %reg */
1322 else
1324 if (temp)
1326 /* It is possible that one of the registers we got for operands[2]
1327 might coincide with that of operands[0] (which is why we made
1328 it TImode). Pick the other one to use as our scratch. */
1329 if (rtx_equal_p (temp, op0))
1331 gcc_assert (ti_temp);
1332 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1334 temp1 = op0;
1335 temp2 = temp; /* op0 is _not_ allowed, see above. */
1336 temp3 = op0;
1337 temp4 = op0;
1338 temp5 = op0;
1340 else
1342 temp1 = gen_reg_rtx (DImode);
1343 temp2 = gen_reg_rtx (DImode);
1344 temp3 = gen_reg_rtx (DImode);
1345 temp4 = gen_reg_rtx (DImode);
1346 temp5 = gen_reg_rtx (DImode);
1349 emit_insn (gen_embmedany_textuhi (temp1, op1));
1350 emit_insn (gen_embmedany_texthi (temp2, op1));
1351 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
1352 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1353 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1354 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1355 gen_rtx_PLUS (DImode, temp4, temp2)));
1356 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
1358 break;
1360 default:
1361 gcc_unreachable ();
1365 #if HOST_BITS_PER_WIDE_INT == 32
1366 void
1367 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
1369 gcc_unreachable ();
1371 #else
1372 /* These avoid problems when cross compiling. If we do not
1373 go through all this hair then the optimizer will see
1374 invalid REG_EQUAL notes or in some cases none at all. */
1375 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
1376 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
1377 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
1378 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
1380 /* The optimizer is not to assume anything about exactly
1381 which bits are set for a HIGH, they are unspecified.
1382 Unfortunately this leads to many missed optimizations
1383 during CSE. We mask out the non-HIGH bits, and matches
1384 a plain movdi, to alleviate this problem. */
1385 static rtx
1386 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
1388 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
1391 static rtx
1392 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
1394 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
1397 static rtx
1398 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
1400 return gen_rtx_IOR (DImode, src, GEN_INT (val));
1403 static rtx
1404 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
1406 return gen_rtx_XOR (DImode, src, GEN_INT (val));
1409 /* Worker routines for 64-bit constant formation on arch64.
1410 One of the key things to be doing in these emissions is
1411 to create as many temp REGs as possible. This makes it
1412 possible for half-built constants to be used later when
1413 such values are similar to something required later on.
1414 Without doing this, the optimizer cannot see such
1415 opportunities. */
1417 static void sparc_emit_set_const64_quick1 (rtx, rtx,
1418 unsigned HOST_WIDE_INT, int);
1420 static void
1421 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
1422 unsigned HOST_WIDE_INT low_bits, int is_neg)
1424 unsigned HOST_WIDE_INT high_bits;
1426 if (is_neg)
1427 high_bits = (~low_bits) & 0xffffffff;
1428 else
1429 high_bits = low_bits;
1431 emit_insn (gen_safe_HIGH64 (temp, high_bits));
1432 if (!is_neg)
1434 emit_insn (gen_rtx_SET (VOIDmode, op0,
1435 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1437 else
1439 /* If we are XOR'ing with -1, then we should emit a one's complement
1440 instead. This way the combiner will notice logical operations
1441 such as ANDN later on and substitute. */
1442 if ((low_bits & 0x3ff) == 0x3ff)
1444 emit_insn (gen_rtx_SET (VOIDmode, op0,
1445 gen_rtx_NOT (DImode, temp)));
1447 else
1449 emit_insn (gen_rtx_SET (VOIDmode, op0,
1450 gen_safe_XOR64 (temp,
1451 (-(HOST_WIDE_INT)0x400
1452 | (low_bits & 0x3ff)))));
1457 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
1458 unsigned HOST_WIDE_INT, int);
1460 static void
1461 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
1462 unsigned HOST_WIDE_INT high_bits,
1463 unsigned HOST_WIDE_INT low_immediate,
1464 int shift_count)
1466 rtx temp2 = op0;
1468 if ((high_bits & 0xfffffc00) != 0)
1470 emit_insn (gen_safe_HIGH64 (temp, high_bits));
1471 if ((high_bits & ~0xfffffc00) != 0)
1472 emit_insn (gen_rtx_SET (VOIDmode, op0,
1473 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1474 else
1475 temp2 = temp;
1477 else
1479 emit_insn (gen_safe_SET64 (temp, high_bits));
1480 temp2 = temp;
1483 /* Now shift it up into place. */
1484 emit_insn (gen_rtx_SET (VOIDmode, op0,
1485 gen_rtx_ASHIFT (DImode, temp2,
1486 GEN_INT (shift_count))));
1488 /* If there is a low immediate part piece, finish up by
1489 putting that in as well. */
1490 if (low_immediate != 0)
1491 emit_insn (gen_rtx_SET (VOIDmode, op0,
1492 gen_safe_OR64 (op0, low_immediate)));
1495 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
1496 unsigned HOST_WIDE_INT);
1498 /* Full 64-bit constant decomposition. Even though this is the
1499 'worst' case, we still optimize a few things away. */
1500 static void
1501 sparc_emit_set_const64_longway (rtx op0, rtx temp,
1502 unsigned HOST_WIDE_INT high_bits,
1503 unsigned HOST_WIDE_INT low_bits)
1505 rtx sub_temp;
1507 if (reload_in_progress || reload_completed)
1508 sub_temp = op0;
1509 else
1510 sub_temp = gen_reg_rtx (DImode);
1512 if ((high_bits & 0xfffffc00) != 0)
1514 emit_insn (gen_safe_HIGH64 (temp, high_bits));
1515 if ((high_bits & ~0xfffffc00) != 0)
1516 emit_insn (gen_rtx_SET (VOIDmode,
1517 sub_temp,
1518 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1519 else
1520 sub_temp = temp;
1522 else
1524 emit_insn (gen_safe_SET64 (temp, high_bits));
1525 sub_temp = temp;
1528 if (!reload_in_progress && !reload_completed)
1530 rtx temp2 = gen_reg_rtx (DImode);
1531 rtx temp3 = gen_reg_rtx (DImode);
1532 rtx temp4 = gen_reg_rtx (DImode);
1534 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1535 gen_rtx_ASHIFT (DImode, sub_temp,
1536 GEN_INT (32))));
1538 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
1539 if ((low_bits & ~0xfffffc00) != 0)
1541 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1542 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
1543 emit_insn (gen_rtx_SET (VOIDmode, op0,
1544 gen_rtx_PLUS (DImode, temp4, temp3)));
1546 else
1548 emit_insn (gen_rtx_SET (VOIDmode, op0,
1549 gen_rtx_PLUS (DImode, temp4, temp2)));
1552 else
1554 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
1555 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
1556 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
1557 int to_shift = 12;
1559 /* We are in the middle of reload, so this is really
1560 painful. However we do still make an attempt to
1561 avoid emitting truly stupid code. */
1562 if (low1 != const0_rtx)
1564 emit_insn (gen_rtx_SET (VOIDmode, op0,
1565 gen_rtx_ASHIFT (DImode, sub_temp,
1566 GEN_INT (to_shift))));
1567 emit_insn (gen_rtx_SET (VOIDmode, op0,
1568 gen_rtx_IOR (DImode, op0, low1)));
1569 sub_temp = op0;
1570 to_shift = 12;
1572 else
1574 to_shift += 12;
1576 if (low2 != const0_rtx)
1578 emit_insn (gen_rtx_SET (VOIDmode, op0,
1579 gen_rtx_ASHIFT (DImode, sub_temp,
1580 GEN_INT (to_shift))));
1581 emit_insn (gen_rtx_SET (VOIDmode, op0,
1582 gen_rtx_IOR (DImode, op0, low2)));
1583 sub_temp = op0;
1584 to_shift = 8;
1586 else
1588 to_shift += 8;
1590 emit_insn (gen_rtx_SET (VOIDmode, op0,
1591 gen_rtx_ASHIFT (DImode, sub_temp,
1592 GEN_INT (to_shift))));
1593 if (low3 != const0_rtx)
1594 emit_insn (gen_rtx_SET (VOIDmode, op0,
1595 gen_rtx_IOR (DImode, op0, low3)));
1596 /* phew... */
1600 /* Analyze a 64-bit constant for certain properties. */
1601 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
1602 unsigned HOST_WIDE_INT,
1603 int *, int *, int *);
1605 static void
1606 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
1607 unsigned HOST_WIDE_INT low_bits,
1608 int *hbsp, int *lbsp, int *abbasp)
1610 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
1611 int i;
1613 lowest_bit_set = highest_bit_set = -1;
1614 i = 0;
1617 if ((lowest_bit_set == -1)
1618 && ((low_bits >> i) & 1))
1619 lowest_bit_set = i;
1620 if ((highest_bit_set == -1)
1621 && ((high_bits >> (32 - i - 1)) & 1))
1622 highest_bit_set = (64 - i - 1);
1624 while (++i < 32
1625 && ((highest_bit_set == -1)
1626 || (lowest_bit_set == -1)));
1627 if (i == 32)
1629 i = 0;
1632 if ((lowest_bit_set == -1)
1633 && ((high_bits >> i) & 1))
1634 lowest_bit_set = i + 32;
1635 if ((highest_bit_set == -1)
1636 && ((low_bits >> (32 - i - 1)) & 1))
1637 highest_bit_set = 32 - i - 1;
1639 while (++i < 32
1640 && ((highest_bit_set == -1)
1641 || (lowest_bit_set == -1)));
1643 /* If there are no bits set this should have gone out
1644 as one instruction! */
1645 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
1646 all_bits_between_are_set = 1;
1647 for (i = lowest_bit_set; i <= highest_bit_set; i++)
1649 if (i < 32)
1651 if ((low_bits & (1 << i)) != 0)
1652 continue;
1654 else
1656 if ((high_bits & (1 << (i - 32))) != 0)
1657 continue;
1659 all_bits_between_are_set = 0;
1660 break;
1662 *hbsp = highest_bit_set;
1663 *lbsp = lowest_bit_set;
1664 *abbasp = all_bits_between_are_set;
1667 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
1669 static int
1670 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
1671 unsigned HOST_WIDE_INT low_bits)
1673 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
1675 if (high_bits == 0
1676 || high_bits == 0xffffffff)
1677 return 1;
1679 analyze_64bit_constant (high_bits, low_bits,
1680 &highest_bit_set, &lowest_bit_set,
1681 &all_bits_between_are_set);
1683 if ((highest_bit_set == 63
1684 || lowest_bit_set == 0)
1685 && all_bits_between_are_set != 0)
1686 return 1;
1688 if ((highest_bit_set - lowest_bit_set) < 21)
1689 return 1;
1691 return 0;
1694 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
1695 unsigned HOST_WIDE_INT,
1696 int, int);
1698 static unsigned HOST_WIDE_INT
1699 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
1700 unsigned HOST_WIDE_INT low_bits,
1701 int lowest_bit_set, int shift)
1703 HOST_WIDE_INT hi, lo;
1705 if (lowest_bit_set < 32)
1707 lo = (low_bits >> lowest_bit_set) << shift;
1708 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
1710 else
1712 lo = 0;
1713 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
1715 gcc_assert (! (hi & lo));
1716 return (hi | lo);
1719 /* Here we are sure to be arch64 and this is an integer constant
1720 being loaded into a register. Emit the most efficient
1721 insn sequence possible. Detection of all the 1-insn cases
1722 has been done already. */
1723 void
1724 sparc_emit_set_const64 (rtx op0, rtx op1)
1726 unsigned HOST_WIDE_INT high_bits, low_bits;
1727 int lowest_bit_set, highest_bit_set;
1728 int all_bits_between_are_set;
1729 rtx temp = 0;
1731 /* Sanity check that we know what we are working with. */
1732 gcc_assert (TARGET_ARCH64
1733 && (GET_CODE (op0) == SUBREG
1734 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
1736 if (reload_in_progress || reload_completed)
1737 temp = op0;
1739 if (GET_CODE (op1) != CONST_INT)
1741 sparc_emit_set_symbolic_const64 (op0, op1, temp);
1742 return;
1745 if (! temp)
1746 temp = gen_reg_rtx (DImode);
1748 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
1749 low_bits = (INTVAL (op1) & 0xffffffff);
1751 /* low_bits bits 0 --> 31
1752 high_bits bits 32 --> 63 */
1754 analyze_64bit_constant (high_bits, low_bits,
1755 &highest_bit_set, &lowest_bit_set,
1756 &all_bits_between_are_set);
1758 /* First try for a 2-insn sequence. */
1760 /* These situations are preferred because the optimizer can
1761 * do more things with them:
1762 * 1) mov -1, %reg
1763 * sllx %reg, shift, %reg
1764 * 2) mov -1, %reg
1765 * srlx %reg, shift, %reg
1766 * 3) mov some_small_const, %reg
1767 * sllx %reg, shift, %reg
1769 if (((highest_bit_set == 63
1770 || lowest_bit_set == 0)
1771 && all_bits_between_are_set != 0)
1772 || ((highest_bit_set - lowest_bit_set) < 12))
1774 HOST_WIDE_INT the_const = -1;
1775 int shift = lowest_bit_set;
1777 if ((highest_bit_set != 63
1778 && lowest_bit_set != 0)
1779 || all_bits_between_are_set == 0)
1781 the_const =
1782 create_simple_focus_bits (high_bits, low_bits,
1783 lowest_bit_set, 0);
1785 else if (lowest_bit_set == 0)
1786 shift = -(63 - highest_bit_set);
1788 gcc_assert (SPARC_SIMM13_P (the_const));
1789 gcc_assert (shift != 0);
1791 emit_insn (gen_safe_SET64 (temp, the_const));
1792 if (shift > 0)
1793 emit_insn (gen_rtx_SET (VOIDmode,
1794 op0,
1795 gen_rtx_ASHIFT (DImode,
1796 temp,
1797 GEN_INT (shift))));
1798 else if (shift < 0)
1799 emit_insn (gen_rtx_SET (VOIDmode,
1800 op0,
1801 gen_rtx_LSHIFTRT (DImode,
1802 temp,
1803 GEN_INT (-shift))));
1804 return;
1807 /* Now a range of 22 or less bits set somewhere.
1808 * 1) sethi %hi(focus_bits), %reg
1809 * sllx %reg, shift, %reg
1810 * 2) sethi %hi(focus_bits), %reg
1811 * srlx %reg, shift, %reg
1813 if ((highest_bit_set - lowest_bit_set) < 21)
1815 unsigned HOST_WIDE_INT focus_bits =
1816 create_simple_focus_bits (high_bits, low_bits,
1817 lowest_bit_set, 10);
1819 gcc_assert (SPARC_SETHI_P (focus_bits));
1820 gcc_assert (lowest_bit_set != 10);
1822 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
1824 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
1825 if (lowest_bit_set < 10)
1826 emit_insn (gen_rtx_SET (VOIDmode,
1827 op0,
1828 gen_rtx_LSHIFTRT (DImode, temp,
1829 GEN_INT (10 - lowest_bit_set))));
1830 else if (lowest_bit_set > 10)
1831 emit_insn (gen_rtx_SET (VOIDmode,
1832 op0,
1833 gen_rtx_ASHIFT (DImode, temp,
1834 GEN_INT (lowest_bit_set - 10))));
1835 return;
1838 /* 1) sethi %hi(low_bits), %reg
1839 * or %reg, %lo(low_bits), %reg
1840 * 2) sethi %hi(~low_bits), %reg
1841 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
1843 if (high_bits == 0
1844 || high_bits == 0xffffffff)
1846 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
1847 (high_bits == 0xffffffff));
1848 return;
1851 /* Now, try 3-insn sequences. */
1853 /* 1) sethi %hi(high_bits), %reg
1854 * or %reg, %lo(high_bits), %reg
1855 * sllx %reg, 32, %reg
1857 if (low_bits == 0)
1859 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
1860 return;
1863 /* We may be able to do something quick
1864 when the constant is negated, so try that. */
1865 if (const64_is_2insns ((~high_bits) & 0xffffffff,
1866 (~low_bits) & 0xfffffc00))
1868 /* NOTE: The trailing bits get XOR'd so we need the
1869 non-negated bits, not the negated ones. */
1870 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
1872 if ((((~high_bits) & 0xffffffff) == 0
1873 && ((~low_bits) & 0x80000000) == 0)
1874 || (((~high_bits) & 0xffffffff) == 0xffffffff
1875 && ((~low_bits) & 0x80000000) != 0))
1877 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
1879 if ((SPARC_SETHI_P (fast_int)
1880 && (~high_bits & 0xffffffff) == 0)
1881 || SPARC_SIMM13_P (fast_int))
1882 emit_insn (gen_safe_SET64 (temp, fast_int));
1883 else
1884 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
1886 else
1888 rtx negated_const;
1889 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
1890 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
1891 sparc_emit_set_const64 (temp, negated_const);
1894 /* If we are XOR'ing with -1, then we should emit a one's complement
1895 instead. This way the combiner will notice logical operations
1896 such as ANDN later on and substitute. */
1897 if (trailing_bits == 0x3ff)
1899 emit_insn (gen_rtx_SET (VOIDmode, op0,
1900 gen_rtx_NOT (DImode, temp)));
1902 else
1904 emit_insn (gen_rtx_SET (VOIDmode,
1905 op0,
1906 gen_safe_XOR64 (temp,
1907 (-0x400 | trailing_bits))));
1909 return;
1912 /* 1) sethi %hi(xxx), %reg
1913 * or %reg, %lo(xxx), %reg
1914 * sllx %reg, yyy, %reg
1916 * ??? This is just a generalized version of the low_bits==0
1917 * thing above, FIXME...
1919 if ((highest_bit_set - lowest_bit_set) < 32)
1921 unsigned HOST_WIDE_INT focus_bits =
1922 create_simple_focus_bits (high_bits, low_bits,
1923 lowest_bit_set, 0);
1925 /* We can't get here in this state. */
1926 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
1928 /* So what we know is that the set bits straddle the
1929 middle of the 64-bit word. */
1930 sparc_emit_set_const64_quick2 (op0, temp,
1931 focus_bits, 0,
1932 lowest_bit_set);
1933 return;
1936 /* 1) sethi %hi(high_bits), %reg
1937 * or %reg, %lo(high_bits), %reg
1938 * sllx %reg, 32, %reg
1939 * or %reg, low_bits, %reg
1941 if (SPARC_SIMM13_P(low_bits)
1942 && ((int)low_bits > 0))
1944 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
1945 return;
1948 /* The easiest way when all else fails, is full decomposition. */
1949 #if 0
1950 printf ("sparc_emit_set_const64: Hard constant [%08lx%08lx] neg[%08lx%08lx]\n",
1951 high_bits, low_bits, ~high_bits, ~low_bits);
1952 #endif
1953 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
1955 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
1957 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
1958 return the mode to be used for the comparison. For floating-point,
1959 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
1960 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
1961 processing is needed. */
1963 enum machine_mode
1964 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
1966 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1968 switch (op)
1970 case EQ:
1971 case NE:
1972 case UNORDERED:
1973 case ORDERED:
1974 case UNLT:
1975 case UNLE:
1976 case UNGT:
1977 case UNGE:
1978 case UNEQ:
1979 case LTGT:
1980 return CCFPmode;
1982 case LT:
1983 case LE:
1984 case GT:
1985 case GE:
1986 return CCFPEmode;
1988 default:
1989 gcc_unreachable ();
1992 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
1993 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
1995 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
1996 return CCX_NOOVmode;
1997 else
1998 return CC_NOOVmode;
2000 else
2002 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2003 return CCXmode;
2004 else
2005 return CCmode;
2009 /* Emit the compare insn and return the CC reg for a CODE comparison
2010 with operands X and Y. */
2012 static rtx
2013 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2015 enum machine_mode mode;
2016 rtx cc_reg;
2018 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2019 return x;
2021 mode = SELECT_CC_MODE (code, x, y);
2023 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2024 fcc regs (cse can't tell they're really call clobbered regs and will
2025 remove a duplicate comparison even if there is an intervening function
2026 call - it will then try to reload the cc reg via an int reg which is why
2027 we need the movcc patterns). It is possible to provide the movcc
2028 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2029 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2030 to tell cse that CCFPE mode registers (even pseudos) are call
2031 clobbered. */
2033 /* ??? This is an experiment. Rather than making changes to cse which may
2034 or may not be easy/clean, we do our own cse. This is possible because
2035 we will generate hard registers. Cse knows they're call clobbered (it
2036 doesn't know the same thing about pseudos). If we guess wrong, no big
2037 deal, but if we win, great! */
2039 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2040 #if 1 /* experiment */
2042 int reg;
2043 /* We cycle through the registers to ensure they're all exercised. */
2044 static int next_fcc_reg = 0;
2045 /* Previous x,y for each fcc reg. */
2046 static rtx prev_args[4][2];
2048 /* Scan prev_args for x,y. */
2049 for (reg = 0; reg < 4; reg++)
2050 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2051 break;
2052 if (reg == 4)
2054 reg = next_fcc_reg;
2055 prev_args[reg][0] = x;
2056 prev_args[reg][1] = y;
2057 next_fcc_reg = (next_fcc_reg + 1) & 3;
2059 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2061 #else
2062 cc_reg = gen_reg_rtx (mode);
2063 #endif /* ! experiment */
2064 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2065 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2066 else
2067 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2069 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2070 will only result in an unrecognizable insn so no point in asserting. */
2071 emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2073 return cc_reg;
2077 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2080 gen_compare_reg (rtx cmp)
2082 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2085 /* This function is used for v9 only.
2086 DEST is the target of the Scc insn.
2087 CODE is the code for an Scc's comparison.
2088 X and Y are the values we compare.
2090 This function is needed to turn
2092 (set (reg:SI 110)
2093 (gt (reg:CCX 100 %icc)
2094 (const_int 0)))
2095 into
2096 (set (reg:SI 110)
2097 (gt:DI (reg:CCX 100 %icc)
2098 (const_int 0)))
2100 IE: The instruction recognizer needs to see the mode of the comparison to
2101 find the right instruction. We could use "gt:DI" right in the
2102 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2104 static int
2105 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2107 if (! TARGET_ARCH64
2108 && (GET_MODE (x) == DImode
2109 || GET_MODE (dest) == DImode))
2110 return 0;
2112 /* Try to use the movrCC insns. */
2113 if (TARGET_ARCH64
2114 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2115 && y == const0_rtx
2116 && v9_regcmp_p (compare_code))
2118 rtx op0 = x;
2119 rtx temp;
2121 /* Special case for op0 != 0. This can be done with one instruction if
2122 dest == x. */
2124 if (compare_code == NE
2125 && GET_MODE (dest) == DImode
2126 && rtx_equal_p (op0, dest))
2128 emit_insn (gen_rtx_SET (VOIDmode, dest,
2129 gen_rtx_IF_THEN_ELSE (DImode,
2130 gen_rtx_fmt_ee (compare_code, DImode,
2131 op0, const0_rtx),
2132 const1_rtx,
2133 dest)));
2134 return 1;
2137 if (reg_overlap_mentioned_p (dest, op0))
2139 /* Handle the case where dest == x.
2140 We "early clobber" the result. */
2141 op0 = gen_reg_rtx (GET_MODE (x));
2142 emit_move_insn (op0, x);
2145 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2146 if (GET_MODE (op0) != DImode)
2148 temp = gen_reg_rtx (DImode);
2149 convert_move (temp, op0, 0);
2151 else
2152 temp = op0;
2153 emit_insn (gen_rtx_SET (VOIDmode, dest,
2154 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2155 gen_rtx_fmt_ee (compare_code, DImode,
2156 temp, const0_rtx),
2157 const1_rtx,
2158 dest)));
2159 return 1;
2161 else
2163 x = gen_compare_reg_1 (compare_code, x, y);
2164 y = const0_rtx;
2166 gcc_assert (GET_MODE (x) != CC_NOOVmode
2167 && GET_MODE (x) != CCX_NOOVmode);
2169 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2170 emit_insn (gen_rtx_SET (VOIDmode, dest,
2171 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2172 gen_rtx_fmt_ee (compare_code,
2173 GET_MODE (x), x, y),
2174 const1_rtx, dest)));
2175 return 1;
2180 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2181 without jumps using the addx/subx instructions. */
2183 bool
2184 emit_scc_insn (rtx operands[])
2186 rtx tem;
2187 rtx x;
2188 rtx y;
2189 enum rtx_code code;
2191 /* The quad-word fp compare library routines all return nonzero to indicate
2192 true, which is different from the equivalent libgcc routines, so we must
2193 handle them specially here. */
2194 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2196 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2197 GET_CODE (operands[1]));
2198 operands[2] = XEXP (operands[1], 0);
2199 operands[3] = XEXP (operands[1], 1);
2202 code = GET_CODE (operands[1]);
2203 x = operands[2];
2204 y = operands[3];
2206 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2207 more applications). The exception to this is "reg != 0" which can
2208 be done in one instruction on v9 (so we do it). */
2209 if (code == EQ)
2211 if (GET_MODE (x) == SImode)
2213 rtx pat = gen_seqsi_special (operands[0], x, y);
2214 emit_insn (pat);
2215 return true;
2217 else if (GET_MODE (x) == DImode)
2219 rtx pat = gen_seqdi_special (operands[0], x, y);
2220 emit_insn (pat);
2221 return true;
2225 if (code == NE)
2227 if (GET_MODE (x) == SImode)
2229 rtx pat = gen_snesi_special (operands[0], x, y);
2230 emit_insn (pat);
2231 return true;
2233 else if (GET_MODE (x) == DImode)
2235 rtx pat = gen_snedi_special (operands[0], x, y);
2236 emit_insn (pat);
2237 return true;
2241 /* For the rest, on v9 we can use conditional moves. */
2243 if (TARGET_V9)
2245 if (gen_v9_scc (operands[0], code, x, y))
2246 return true;
2249 /* We can do LTU and GEU using the addx/subx instructions too. And
2250 for GTU/LEU, if both operands are registers swap them and fall
2251 back to the easy case. */
2252 if (code == GTU || code == LEU)
2254 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
2255 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
2257 tem = x;
2258 x = y;
2259 y = tem;
2260 code = swap_condition (code);
2264 if (code == LTU || code == GEU)
2266 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2267 gen_rtx_fmt_ee (code, SImode,
2268 gen_compare_reg_1 (code, x, y),
2269 const0_rtx)));
2270 return true;
2273 /* Nope, do branches. */
2274 return false;
2277 /* Emit a conditional jump insn for the v9 architecture using comparison code
2278 CODE and jump target LABEL.
2279 This function exists to take advantage of the v9 brxx insns. */
2281 static void
2282 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
2284 emit_jump_insn (gen_rtx_SET (VOIDmode,
2285 pc_rtx,
2286 gen_rtx_IF_THEN_ELSE (VOIDmode,
2287 gen_rtx_fmt_ee (code, GET_MODE (op0),
2288 op0, const0_rtx),
2289 gen_rtx_LABEL_REF (VOIDmode, label),
2290 pc_rtx)));
2293 void
2294 emit_conditional_branch_insn (rtx operands[])
2296 /* The quad-word fp compare library routines all return nonzero to indicate
2297 true, which is different from the equivalent libgcc routines, so we must
2298 handle them specially here. */
2299 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
2301 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
2302 GET_CODE (operands[0]));
2303 operands[1] = XEXP (operands[0], 0);
2304 operands[2] = XEXP (operands[0], 1);
2307 if (TARGET_ARCH64 && operands[2] == const0_rtx
2308 && GET_CODE (operands[1]) == REG
2309 && GET_MODE (operands[1]) == DImode)
2311 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
2312 return;
2315 operands[1] = gen_compare_reg (operands[0]);
2316 operands[2] = const0_rtx;
2317 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
2318 operands[1], operands[2]);
2319 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
2320 operands[3]));
2324 /* Generate a DFmode part of a hard TFmode register.
2325 REG is the TFmode hard register, LOW is 1 for the
2326 low 64bit of the register and 0 otherwise.
2329 gen_df_reg (rtx reg, int low)
2331 int regno = REGNO (reg);
2333 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
2334 regno += (TARGET_ARCH64 && regno < 32) ? 1 : 2;
2335 return gen_rtx_REG (DFmode, regno);
2338 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
2339 Unlike normal calls, TFmode operands are passed by reference. It is
2340 assumed that no more than 3 operands are required. */
2342 static void
2343 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
2345 rtx ret_slot = NULL, arg[3], func_sym;
2346 int i;
2348 /* We only expect to be called for conversions, unary, and binary ops. */
2349 gcc_assert (nargs == 2 || nargs == 3);
2351 for (i = 0; i < nargs; ++i)
2353 rtx this_arg = operands[i];
2354 rtx this_slot;
2356 /* TFmode arguments and return values are passed by reference. */
2357 if (GET_MODE (this_arg) == TFmode)
2359 int force_stack_temp;
2361 force_stack_temp = 0;
2362 if (TARGET_BUGGY_QP_LIB && i == 0)
2363 force_stack_temp = 1;
2365 if (GET_CODE (this_arg) == MEM
2366 && ! force_stack_temp)
2367 this_arg = XEXP (this_arg, 0);
2368 else if (CONSTANT_P (this_arg)
2369 && ! force_stack_temp)
2371 this_slot = force_const_mem (TFmode, this_arg);
2372 this_arg = XEXP (this_slot, 0);
2374 else
2376 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode), 0);
2378 /* Operand 0 is the return value. We'll copy it out later. */
2379 if (i > 0)
2380 emit_move_insn (this_slot, this_arg);
2381 else
2382 ret_slot = this_slot;
2384 this_arg = XEXP (this_slot, 0);
2388 arg[i] = this_arg;
2391 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
2393 if (GET_MODE (operands[0]) == TFmode)
2395 if (nargs == 2)
2396 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
2397 arg[0], GET_MODE (arg[0]),
2398 arg[1], GET_MODE (arg[1]));
2399 else
2400 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
2401 arg[0], GET_MODE (arg[0]),
2402 arg[1], GET_MODE (arg[1]),
2403 arg[2], GET_MODE (arg[2]));
2405 if (ret_slot)
2406 emit_move_insn (operands[0], ret_slot);
2408 else
2410 rtx ret;
2412 gcc_assert (nargs == 2);
2414 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
2415 GET_MODE (operands[0]), 1,
2416 arg[1], GET_MODE (arg[1]));
2418 if (ret != operands[0])
2419 emit_move_insn (operands[0], ret);
2423 /* Expand soft-float TFmode calls to sparc abi routines. */
2425 static void
2426 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
2428 const char *func;
2430 switch (code)
2432 case PLUS:
2433 func = "_Qp_add";
2434 break;
2435 case MINUS:
2436 func = "_Qp_sub";
2437 break;
2438 case MULT:
2439 func = "_Qp_mul";
2440 break;
2441 case DIV:
2442 func = "_Qp_div";
2443 break;
2444 default:
2445 gcc_unreachable ();
2448 emit_soft_tfmode_libcall (func, 3, operands);
2451 static void
2452 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
2454 const char *func;
2456 gcc_assert (code == SQRT);
2457 func = "_Qp_sqrt";
2459 emit_soft_tfmode_libcall (func, 2, operands);
2462 static void
2463 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
2465 const char *func;
2467 switch (code)
2469 case FLOAT_EXTEND:
2470 switch (GET_MODE (operands[1]))
2472 case SFmode:
2473 func = "_Qp_stoq";
2474 break;
2475 case DFmode:
2476 func = "_Qp_dtoq";
2477 break;
2478 default:
2479 gcc_unreachable ();
2481 break;
2483 case FLOAT_TRUNCATE:
2484 switch (GET_MODE (operands[0]))
2486 case SFmode:
2487 func = "_Qp_qtos";
2488 break;
2489 case DFmode:
2490 func = "_Qp_qtod";
2491 break;
2492 default:
2493 gcc_unreachable ();
2495 break;
2497 case FLOAT:
2498 switch (GET_MODE (operands[1]))
2500 case SImode:
2501 func = "_Qp_itoq";
2502 if (TARGET_ARCH64)
2503 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
2504 break;
2505 case DImode:
2506 func = "_Qp_xtoq";
2507 break;
2508 default:
2509 gcc_unreachable ();
2511 break;
2513 case UNSIGNED_FLOAT:
2514 switch (GET_MODE (operands[1]))
2516 case SImode:
2517 func = "_Qp_uitoq";
2518 if (TARGET_ARCH64)
2519 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
2520 break;
2521 case DImode:
2522 func = "_Qp_uxtoq";
2523 break;
2524 default:
2525 gcc_unreachable ();
2527 break;
2529 case FIX:
2530 switch (GET_MODE (operands[0]))
2532 case SImode:
2533 func = "_Qp_qtoi";
2534 break;
2535 case DImode:
2536 func = "_Qp_qtox";
2537 break;
2538 default:
2539 gcc_unreachable ();
2541 break;
2543 case UNSIGNED_FIX:
2544 switch (GET_MODE (operands[0]))
2546 case SImode:
2547 func = "_Qp_qtoui";
2548 break;
2549 case DImode:
2550 func = "_Qp_qtoux";
2551 break;
2552 default:
2553 gcc_unreachable ();
2555 break;
2557 default:
2558 gcc_unreachable ();
2561 emit_soft_tfmode_libcall (func, 2, operands);
2564 /* Expand a hard-float tfmode operation. All arguments must be in
2565 registers. */
2567 static void
2568 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
2570 rtx op, dest;
2572 if (GET_RTX_CLASS (code) == RTX_UNARY)
2574 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2575 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
2577 else
2579 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2580 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
2581 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
2582 operands[1], operands[2]);
2585 if (register_operand (operands[0], VOIDmode))
2586 dest = operands[0];
2587 else
2588 dest = gen_reg_rtx (GET_MODE (operands[0]));
2590 emit_insn (gen_rtx_SET (VOIDmode, dest, op));
2592 if (dest != operands[0])
2593 emit_move_insn (operands[0], dest);
2596 void
2597 emit_tfmode_binop (enum rtx_code code, rtx *operands)
2599 if (TARGET_HARD_QUAD)
2600 emit_hard_tfmode_operation (code, operands);
2601 else
2602 emit_soft_tfmode_binop (code, operands);
2605 void
2606 emit_tfmode_unop (enum rtx_code code, rtx *operands)
2608 if (TARGET_HARD_QUAD)
2609 emit_hard_tfmode_operation (code, operands);
2610 else
2611 emit_soft_tfmode_unop (code, operands);
2614 void
2615 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
2617 if (TARGET_HARD_QUAD)
2618 emit_hard_tfmode_operation (code, operands);
2619 else
2620 emit_soft_tfmode_cvt (code, operands);
2623 /* Return nonzero if a branch/jump/call instruction will be emitting
2624 nop into its delay slot. */
2627 empty_delay_slot (rtx insn)
2629 rtx seq;
2631 /* If no previous instruction (should not happen), return true. */
2632 if (PREV_INSN (insn) == NULL)
2633 return 1;
2635 seq = NEXT_INSN (PREV_INSN (insn));
2636 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
2637 return 0;
2639 return 1;
2642 /* Return nonzero if TRIAL can go into the call delay slot. */
2645 tls_call_delay (rtx trial)
2647 rtx pat;
2649 /* Binutils allows
2650 call __tls_get_addr, %tgd_call (foo)
2651 add %l7, %o0, %o0, %tgd_add (foo)
2652 while Sun as/ld does not. */
2653 if (TARGET_GNU_TLS || !TARGET_TLS)
2654 return 1;
2656 pat = PATTERN (trial);
2658 /* We must reject tgd_add{32|64}, i.e.
2659 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
2660 and tldm_add{32|64}, i.e.
2661 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
2662 for Sun as/ld. */
2663 if (GET_CODE (pat) == SET
2664 && GET_CODE (SET_SRC (pat)) == PLUS)
2666 rtx unspec = XEXP (SET_SRC (pat), 1);
2668 if (GET_CODE (unspec) == UNSPEC
2669 && (XINT (unspec, 1) == UNSPEC_TLSGD
2670 || XINT (unspec, 1) == UNSPEC_TLSLDM))
2671 return 0;
2674 return 1;
2677 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
2678 instruction. RETURN_P is true if the v9 variant 'return' is to be
2679 considered in the test too.
2681 TRIAL must be a SET whose destination is a REG appropriate for the
2682 'restore' instruction or, if RETURN_P is true, for the 'return'
2683 instruction. */
2685 static int
2686 eligible_for_restore_insn (rtx trial, bool return_p)
2688 rtx pat = PATTERN (trial);
2689 rtx src = SET_SRC (pat);
2691 /* The 'restore src,%g0,dest' pattern for word mode and below. */
2692 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2693 && arith_operand (src, GET_MODE (src)))
2695 if (TARGET_ARCH64)
2696 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2697 else
2698 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
2701 /* The 'restore src,%g0,dest' pattern for double-word mode. */
2702 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2703 && arith_double_operand (src, GET_MODE (src)))
2704 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2706 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
2707 else if (! TARGET_FPU && register_operand (src, SFmode))
2708 return 1;
2710 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
2711 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
2712 return 1;
2714 /* If we have the 'return' instruction, anything that does not use
2715 local or output registers and can go into a delay slot wins. */
2716 else if (return_p && TARGET_V9 && ! epilogue_renumber (&pat, 1)
2717 && (get_attr_in_uncond_branch_delay (trial)
2718 == IN_UNCOND_BRANCH_DELAY_TRUE))
2719 return 1;
2721 /* The 'restore src1,src2,dest' pattern for SImode. */
2722 else if (GET_CODE (src) == PLUS
2723 && register_operand (XEXP (src, 0), SImode)
2724 && arith_operand (XEXP (src, 1), SImode))
2725 return 1;
2727 /* The 'restore src1,src2,dest' pattern for DImode. */
2728 else if (GET_CODE (src) == PLUS
2729 && register_operand (XEXP (src, 0), DImode)
2730 && arith_double_operand (XEXP (src, 1), DImode))
2731 return 1;
2733 /* The 'restore src1,%lo(src2),dest' pattern. */
2734 else if (GET_CODE (src) == LO_SUM
2735 && ! TARGET_CM_MEDMID
2736 && ((register_operand (XEXP (src, 0), SImode)
2737 && immediate_operand (XEXP (src, 1), SImode))
2738 || (TARGET_ARCH64
2739 && register_operand (XEXP (src, 0), DImode)
2740 && immediate_operand (XEXP (src, 1), DImode))))
2741 return 1;
2743 /* The 'restore src,src,dest' pattern. */
2744 else if (GET_CODE (src) == ASHIFT
2745 && (register_operand (XEXP (src, 0), SImode)
2746 || register_operand (XEXP (src, 0), DImode))
2747 && XEXP (src, 1) == const1_rtx)
2748 return 1;
2750 return 0;
2753 /* Return nonzero if TRIAL can go into the function return's
2754 delay slot. */
2757 eligible_for_return_delay (rtx trial)
2759 rtx pat;
2761 if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2762 return 0;
2764 if (get_attr_length (trial) != 1)
2765 return 0;
2767 /* If there are any call-saved registers, we should scan TRIAL if it
2768 does not reference them. For now just make it easy. */
2769 if (num_gfregs)
2770 return 0;
2772 /* If the function uses __builtin_eh_return, the eh_return machinery
2773 occupies the delay slot. */
2774 if (crtl->calls_eh_return)
2775 return 0;
2777 /* In the case of a true leaf function, anything can go into the slot. */
2778 if (sparc_leaf_function_p)
2779 return get_attr_in_uncond_branch_delay (trial)
2780 == IN_UNCOND_BRANCH_DELAY_TRUE;
2782 pat = PATTERN (trial);
2784 /* Otherwise, only operations which can be done in tandem with
2785 a `restore' or `return' insn can go into the delay slot. */
2786 if (GET_CODE (SET_DEST (pat)) != REG
2787 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24))
2788 return 0;
2790 /* If this instruction sets up floating point register and we have a return
2791 instruction, it can probably go in. But restore will not work
2792 with FP_REGS. */
2793 if (REGNO (SET_DEST (pat)) >= 32)
2794 return (TARGET_V9
2795 && ! epilogue_renumber (&pat, 1)
2796 && (get_attr_in_uncond_branch_delay (trial)
2797 == IN_UNCOND_BRANCH_DELAY_TRUE));
2799 return eligible_for_restore_insn (trial, true);
2802 /* Return nonzero if TRIAL can go into the sibling call's
2803 delay slot. */
2806 eligible_for_sibcall_delay (rtx trial)
2808 rtx pat;
2810 if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2811 return 0;
2813 if (get_attr_length (trial) != 1)
2814 return 0;
2816 pat = PATTERN (trial);
2818 if (sparc_leaf_function_p)
2820 /* If the tail call is done using the call instruction,
2821 we have to restore %o7 in the delay slot. */
2822 if (LEAF_SIBCALL_SLOT_RESERVED_P)
2823 return 0;
2825 /* %g1 is used to build the function address */
2826 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
2827 return 0;
2829 return 1;
2832 /* Otherwise, only operations which can be done in tandem with
2833 a `restore' insn can go into the delay slot. */
2834 if (GET_CODE (SET_DEST (pat)) != REG
2835 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
2836 || REGNO (SET_DEST (pat)) >= 32)
2837 return 0;
2839 /* If it mentions %o7, it can't go in, because sibcall will clobber it
2840 in most cases. */
2841 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
2842 return 0;
2844 return eligible_for_restore_insn (trial, false);
2848 short_branch (int uid1, int uid2)
2850 int delta = INSN_ADDRESSES (uid1) - INSN_ADDRESSES (uid2);
2852 /* Leave a few words of "slop". */
2853 if (delta >= -1023 && delta <= 1022)
2854 return 1;
2856 return 0;
2859 /* Return nonzero if REG is not used after INSN.
2860 We assume REG is a reload reg, and therefore does
2861 not live past labels or calls or jumps. */
2863 reg_unused_after (rtx reg, rtx insn)
2865 enum rtx_code code, prev_code = UNKNOWN;
2867 while ((insn = NEXT_INSN (insn)))
2869 if (prev_code == CALL_INSN && call_used_regs[REGNO (reg)])
2870 return 1;
2872 code = GET_CODE (insn);
2873 if (GET_CODE (insn) == CODE_LABEL)
2874 return 1;
2876 if (INSN_P (insn))
2878 rtx set = single_set (insn);
2879 int in_src = set && reg_overlap_mentioned_p (reg, SET_SRC (set));
2880 if (set && in_src)
2881 return 0;
2882 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
2883 return 1;
2884 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
2885 return 0;
2887 prev_code = code;
2889 return 1;
2892 /* Determine if it's legal to put X into the constant pool. This
2893 is not possible if X contains the address of a symbol that is
2894 not constant (TLS) or not known at final link time (PIC). */
2896 static bool
2897 sparc_cannot_force_const_mem (rtx x)
2899 switch (GET_CODE (x))
2901 case CONST_INT:
2902 case CONST_DOUBLE:
2903 case CONST_VECTOR:
2904 /* Accept all non-symbolic constants. */
2905 return false;
2907 case LABEL_REF:
2908 /* Labels are OK iff we are non-PIC. */
2909 return flag_pic != 0;
2911 case SYMBOL_REF:
2912 /* 'Naked' TLS symbol references are never OK,
2913 non-TLS symbols are OK iff we are non-PIC. */
2914 if (SYMBOL_REF_TLS_MODEL (x))
2915 return true;
2916 else
2917 return flag_pic != 0;
2919 case CONST:
2920 return sparc_cannot_force_const_mem (XEXP (x, 0));
2921 case PLUS:
2922 case MINUS:
2923 return sparc_cannot_force_const_mem (XEXP (x, 0))
2924 || sparc_cannot_force_const_mem (XEXP (x, 1));
2925 case UNSPEC:
2926 return true;
2927 default:
2928 gcc_unreachable ();
2932 /* PIC support. */
2933 static GTY(()) char pic_helper_symbol_name[256];
2934 static GTY(()) rtx pic_helper_symbol;
2935 static GTY(()) bool pic_helper_emitted_p = false;
2936 static GTY(()) rtx global_offset_table;
2938 /* Ensure that we are not using patterns that are not OK with PIC. */
2941 check_pic (int i)
2943 switch (flag_pic)
2945 case 1:
2946 gcc_assert (GET_CODE (recog_data.operand[i]) != SYMBOL_REF
2947 && (GET_CODE (recog_data.operand[i]) != CONST
2948 || (GET_CODE (XEXP (recog_data.operand[i], 0)) == MINUS
2949 && (XEXP (XEXP (recog_data.operand[i], 0), 0)
2950 == global_offset_table)
2951 && (GET_CODE (XEXP (XEXP (recog_data.operand[i], 0), 1))
2952 == CONST))));
2953 case 2:
2954 default:
2955 return 1;
2959 /* Return true if X is an address which needs a temporary register when
2960 reloaded while generating PIC code. */
2963 pic_address_needs_scratch (rtx x)
2965 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
2966 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
2967 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
2968 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2969 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
2970 return 1;
2972 return 0;
2975 /* Determine if a given RTX is a valid constant. We already know this
2976 satisfies CONSTANT_P. */
2978 bool
2979 legitimate_constant_p (rtx x)
2981 rtx inner;
2983 switch (GET_CODE (x))
2985 case SYMBOL_REF:
2986 /* TLS symbols are not constant. */
2987 if (SYMBOL_REF_TLS_MODEL (x))
2988 return false;
2989 break;
2991 case CONST:
2992 inner = XEXP (x, 0);
2994 /* Offsets of TLS symbols are never valid.
2995 Discourage CSE from creating them. */
2996 if (GET_CODE (inner) == PLUS
2997 && SPARC_SYMBOL_REF_TLS_P (XEXP (inner, 0)))
2998 return false;
2999 break;
3001 case CONST_DOUBLE:
3002 if (GET_MODE (x) == VOIDmode)
3003 return true;
3005 /* Floating point constants are generally not ok.
3006 The only exception is 0.0 in VIS. */
3007 if (TARGET_VIS
3008 && SCALAR_FLOAT_MODE_P (GET_MODE (x))
3009 && const_zero_operand (x, GET_MODE (x)))
3010 return true;
3012 return false;
3014 case CONST_VECTOR:
3015 /* Vector constants are generally not ok.
3016 The only exception is 0 in VIS. */
3017 if (TARGET_VIS
3018 && const_zero_operand (x, GET_MODE (x)))
3019 return true;
3021 return false;
3023 default:
3024 break;
3027 return true;
3030 /* Determine if a given RTX is a valid constant address. */
3032 bool
3033 constant_address_p (rtx x)
3035 switch (GET_CODE (x))
3037 case LABEL_REF:
3038 case CONST_INT:
3039 case HIGH:
3040 return true;
3042 case CONST:
3043 if (flag_pic && pic_address_needs_scratch (x))
3044 return false;
3045 return legitimate_constant_p (x);
3047 case SYMBOL_REF:
3048 return !flag_pic && legitimate_constant_p (x);
3050 default:
3051 return false;
3055 /* Nonzero if the constant value X is a legitimate general operand
3056 when generating PIC code. It is given that flag_pic is on and
3057 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
3059 bool
3060 legitimate_pic_operand_p (rtx x)
3062 if (pic_address_needs_scratch (x))
3063 return false;
3064 if (SPARC_SYMBOL_REF_TLS_P (x)
3065 || (GET_CODE (x) == CONST
3066 && GET_CODE (XEXP (x, 0)) == PLUS
3067 && SPARC_SYMBOL_REF_TLS_P (XEXP (XEXP (x, 0), 0))))
3068 return false;
3069 return true;
3072 /* Return nonzero if ADDR is a valid memory address.
3073 STRICT specifies whether strict register checking applies. */
3075 static bool
3076 sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3078 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3080 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3081 rs1 = addr;
3082 else if (GET_CODE (addr) == PLUS)
3084 rs1 = XEXP (addr, 0);
3085 rs2 = XEXP (addr, 1);
3087 /* Canonicalize. REG comes first, if there are no regs,
3088 LO_SUM comes first. */
3089 if (!REG_P (rs1)
3090 && GET_CODE (rs1) != SUBREG
3091 && (REG_P (rs2)
3092 || GET_CODE (rs2) == SUBREG
3093 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3095 rs1 = XEXP (addr, 1);
3096 rs2 = XEXP (addr, 0);
3099 if ((flag_pic == 1
3100 && rs1 == pic_offset_table_rtx
3101 && !REG_P (rs2)
3102 && GET_CODE (rs2) != SUBREG
3103 && GET_CODE (rs2) != LO_SUM
3104 && GET_CODE (rs2) != MEM
3105 && ! SPARC_SYMBOL_REF_TLS_P (rs2)
3106 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3107 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3108 || ((REG_P (rs1)
3109 || GET_CODE (rs1) == SUBREG)
3110 && RTX_OK_FOR_OFFSET_P (rs2)))
3112 imm1 = rs2;
3113 rs2 = NULL;
3115 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3116 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3118 /* We prohibit REG + REG for TFmode when there are no quad move insns
3119 and we consequently need to split. We do this because REG+REG
3120 is not an offsettable address. If we get the situation in reload
3121 where source and destination of a movtf pattern are both MEMs with
3122 REG+REG address, then only one of them gets converted to an
3123 offsettable address. */
3124 if (mode == TFmode
3125 && ! (TARGET_FPU && TARGET_ARCH64 && TARGET_HARD_QUAD))
3126 return 0;
3128 /* We prohibit REG + REG on ARCH32 if not optimizing for
3129 DFmode/DImode because then mem_min_alignment is likely to be zero
3130 after reload and the forced split would lack a matching splitter
3131 pattern. */
3132 if (TARGET_ARCH32 && !optimize
3133 && (mode == DFmode || mode == DImode))
3134 return 0;
3136 else if (USE_AS_OFFSETABLE_LO10
3137 && GET_CODE (rs1) == LO_SUM
3138 && TARGET_ARCH64
3139 && ! TARGET_CM_MEDMID
3140 && RTX_OK_FOR_OLO10_P (rs2))
3142 rs2 = NULL;
3143 imm1 = XEXP (rs1, 1);
3144 rs1 = XEXP (rs1, 0);
3145 if (! CONSTANT_P (imm1) || SPARC_SYMBOL_REF_TLS_P (rs1))
3146 return 0;
3149 else if (GET_CODE (addr) == LO_SUM)
3151 rs1 = XEXP (addr, 0);
3152 imm1 = XEXP (addr, 1);
3154 if (! CONSTANT_P (imm1) || SPARC_SYMBOL_REF_TLS_P (rs1))
3155 return 0;
3157 /* We can't allow TFmode in 32-bit mode, because an offset greater
3158 than the alignment (8) may cause the LO_SUM to overflow. */
3159 if (mode == TFmode && TARGET_ARCH32)
3160 return 0;
3162 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
3163 return 1;
3164 else
3165 return 0;
3167 if (GET_CODE (rs1) == SUBREG)
3168 rs1 = SUBREG_REG (rs1);
3169 if (!REG_P (rs1))
3170 return 0;
3172 if (rs2)
3174 if (GET_CODE (rs2) == SUBREG)
3175 rs2 = SUBREG_REG (rs2);
3176 if (!REG_P (rs2))
3177 return 0;
3180 if (strict)
3182 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
3183 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
3184 return 0;
3186 else
3188 if ((REGNO (rs1) >= 32
3189 && REGNO (rs1) != FRAME_POINTER_REGNUM
3190 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
3191 || (rs2
3192 && (REGNO (rs2) >= 32
3193 && REGNO (rs2) != FRAME_POINTER_REGNUM
3194 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
3195 return 0;
3197 return 1;
3200 /* Construct the SYMBOL_REF for the tls_get_offset function. */
3202 static GTY(()) rtx sparc_tls_symbol;
3204 static rtx
3205 sparc_tls_get_addr (void)
3207 if (!sparc_tls_symbol)
3208 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
3210 return sparc_tls_symbol;
3213 static rtx
3214 sparc_tls_got (void)
3216 rtx temp;
3217 if (flag_pic)
3219 crtl->uses_pic_offset_table = 1;
3220 return pic_offset_table_rtx;
3223 if (!global_offset_table)
3224 global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3225 temp = gen_reg_rtx (Pmode);
3226 emit_move_insn (temp, global_offset_table);
3227 return temp;
3230 /* Return 1 if *X is a thread-local symbol. */
3232 static int
3233 sparc_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
3235 return SPARC_SYMBOL_REF_TLS_P (*x);
3238 /* Return 1 if X contains a thread-local symbol. */
3240 bool
3241 sparc_tls_referenced_p (rtx x)
3243 if (!TARGET_HAVE_TLS)
3244 return false;
3246 return for_each_rtx (&x, &sparc_tls_symbol_ref_1, 0);
3249 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
3250 this (thread-local) address. */
3253 legitimize_tls_address (rtx addr)
3255 rtx temp1, temp2, temp3, ret, o0, got, insn;
3257 gcc_assert (can_create_pseudo_p ());
3259 if (GET_CODE (addr) == SYMBOL_REF)
3260 switch (SYMBOL_REF_TLS_MODEL (addr))
3262 case TLS_MODEL_GLOBAL_DYNAMIC:
3263 start_sequence ();
3264 temp1 = gen_reg_rtx (SImode);
3265 temp2 = gen_reg_rtx (SImode);
3266 ret = gen_reg_rtx (Pmode);
3267 o0 = gen_rtx_REG (Pmode, 8);
3268 got = sparc_tls_got ();
3269 emit_insn (gen_tgd_hi22 (temp1, addr));
3270 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
3271 if (TARGET_ARCH32)
3273 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
3274 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
3275 addr, const1_rtx));
3277 else
3279 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
3280 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
3281 addr, const1_rtx));
3283 CALL_INSN_FUNCTION_USAGE (insn)
3284 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, o0),
3285 CALL_INSN_FUNCTION_USAGE (insn));
3286 insn = get_insns ();
3287 end_sequence ();
3288 emit_libcall_block (insn, ret, o0, addr);
3289 break;
3291 case TLS_MODEL_LOCAL_DYNAMIC:
3292 start_sequence ();
3293 temp1 = gen_reg_rtx (SImode);
3294 temp2 = gen_reg_rtx (SImode);
3295 temp3 = gen_reg_rtx (Pmode);
3296 ret = gen_reg_rtx (Pmode);
3297 o0 = gen_rtx_REG (Pmode, 8);
3298 got = sparc_tls_got ();
3299 emit_insn (gen_tldm_hi22 (temp1));
3300 emit_insn (gen_tldm_lo10 (temp2, temp1));
3301 if (TARGET_ARCH32)
3303 emit_insn (gen_tldm_add32 (o0, got, temp2));
3304 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
3305 const1_rtx));
3307 else
3309 emit_insn (gen_tldm_add64 (o0, got, temp2));
3310 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
3311 const1_rtx));
3313 CALL_INSN_FUNCTION_USAGE (insn)
3314 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, o0),
3315 CALL_INSN_FUNCTION_USAGE (insn));
3316 insn = get_insns ();
3317 end_sequence ();
3318 emit_libcall_block (insn, temp3, o0,
3319 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
3320 UNSPEC_TLSLD_BASE));
3321 temp1 = gen_reg_rtx (SImode);
3322 temp2 = gen_reg_rtx (SImode);
3323 emit_insn (gen_tldo_hix22 (temp1, addr));
3324 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
3325 if (TARGET_ARCH32)
3326 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
3327 else
3328 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
3329 break;
3331 case TLS_MODEL_INITIAL_EXEC:
3332 temp1 = gen_reg_rtx (SImode);
3333 temp2 = gen_reg_rtx (SImode);
3334 temp3 = gen_reg_rtx (Pmode);
3335 got = sparc_tls_got ();
3336 emit_insn (gen_tie_hi22 (temp1, addr));
3337 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
3338 if (TARGET_ARCH32)
3339 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
3340 else
3341 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
3342 if (TARGET_SUN_TLS)
3344 ret = gen_reg_rtx (Pmode);
3345 if (TARGET_ARCH32)
3346 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
3347 temp3, addr));
3348 else
3349 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
3350 temp3, addr));
3352 else
3353 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
3354 break;
3356 case TLS_MODEL_LOCAL_EXEC:
3357 temp1 = gen_reg_rtx (Pmode);
3358 temp2 = gen_reg_rtx (Pmode);
3359 if (TARGET_ARCH32)
3361 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
3362 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
3364 else
3366 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
3367 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
3369 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
3370 break;
3372 default:
3373 gcc_unreachable ();
3376 else
3377 gcc_unreachable (); /* for now ... */
3379 return ret;
3383 /* Legitimize PIC addresses. If the address is already position-independent,
3384 we return ORIG. Newly generated position-independent addresses go into a
3385 reg. This is REG if nonzero, otherwise we allocate register(s) as
3386 necessary. */
3389 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
3390 rtx reg)
3392 if (GET_CODE (orig) == SYMBOL_REF
3393 /* See the comment in sparc_expand_move. */
3394 || (TARGET_VXWORKS_RTP && GET_CODE (orig) == LABEL_REF))
3396 rtx pic_ref, address;
3397 rtx insn;
3399 if (reg == 0)
3401 gcc_assert (! reload_in_progress && ! reload_completed);
3402 reg = gen_reg_rtx (Pmode);
3405 if (flag_pic == 2)
3407 /* If not during reload, allocate another temp reg here for loading
3408 in the address, so that these instructions can be optimized
3409 properly. */
3410 rtx temp_reg = ((reload_in_progress || reload_completed)
3411 ? reg : gen_reg_rtx (Pmode));
3413 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
3414 won't get confused into thinking that these two instructions
3415 are loading in the true address of the symbol. If in the
3416 future a PIC rtx exists, that should be used instead. */
3417 if (TARGET_ARCH64)
3419 emit_insn (gen_movdi_high_pic (temp_reg, orig));
3420 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
3422 else
3424 emit_insn (gen_movsi_high_pic (temp_reg, orig));
3425 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
3427 address = temp_reg;
3429 else
3430 address = orig;
3432 pic_ref = gen_const_mem (Pmode,
3433 gen_rtx_PLUS (Pmode,
3434 pic_offset_table_rtx, address));
3435 crtl->uses_pic_offset_table = 1;
3436 insn = emit_move_insn (reg, pic_ref);
3437 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3438 by loop. */
3439 set_unique_reg_note (insn, REG_EQUAL, orig);
3440 return reg;
3442 else if (GET_CODE (orig) == CONST)
3444 rtx base, offset;
3446 if (GET_CODE (XEXP (orig, 0)) == PLUS
3447 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
3448 return orig;
3450 if (reg == 0)
3452 gcc_assert (! reload_in_progress && ! reload_completed);
3453 reg = gen_reg_rtx (Pmode);
3456 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3457 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3458 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3459 base == reg ? 0 : reg);
3461 if (GET_CODE (offset) == CONST_INT)
3463 if (SMALL_INT (offset))
3464 return plus_constant (base, INTVAL (offset));
3465 else if (! reload_in_progress && ! reload_completed)
3466 offset = force_reg (Pmode, offset);
3467 else
3468 /* If we reach here, then something is seriously wrong. */
3469 gcc_unreachable ();
3471 return gen_rtx_PLUS (Pmode, base, offset);
3473 else if (GET_CODE (orig) == LABEL_REF)
3474 /* ??? Why do we do this? */
3475 /* Now movsi_pic_label_ref uses it, but we ought to be checking that
3476 the register is live instead, in case it is eliminated. */
3477 crtl->uses_pic_offset_table = 1;
3479 return orig;
3482 /* Try machine-dependent ways of modifying an illegitimate address X
3483 to be legitimate. If we find one, return the new, valid address.
3485 OLDX is the address as it was before break_out_memory_refs was called.
3486 In some cases it is useful to look at this to decide what needs to be done.
3488 MODE is the mode of the operand pointed to by X.
3490 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
3493 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3494 enum machine_mode mode)
3496 rtx orig_x = x;
3498 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
3499 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
3500 force_operand (XEXP (x, 0), NULL_RTX));
3501 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
3502 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3503 force_operand (XEXP (x, 1), NULL_RTX));
3504 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
3505 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
3506 XEXP (x, 1));
3507 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
3508 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3509 force_operand (XEXP (x, 1), NULL_RTX));
3511 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
3512 return x;
3514 if (SPARC_SYMBOL_REF_TLS_P (x))
3515 x = legitimize_tls_address (x);
3516 else if (flag_pic)
3517 x = legitimize_pic_address (x, mode, 0);
3518 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
3519 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3520 copy_to_mode_reg (Pmode, XEXP (x, 1)));
3521 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
3522 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
3523 copy_to_mode_reg (Pmode, XEXP (x, 0)));
3524 else if (GET_CODE (x) == SYMBOL_REF
3525 || GET_CODE (x) == CONST
3526 || GET_CODE (x) == LABEL_REF)
3527 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
3528 return x;
3531 /* Emit the special PIC helper function. */
3533 static void
3534 emit_pic_helper (void)
3536 const char *pic_name = reg_names[REGNO (pic_offset_table_rtx)];
3537 int align;
3539 switch_to_section (text_section);
3541 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
3542 if (align > 0)
3543 ASM_OUTPUT_ALIGN (asm_out_file, align);
3544 ASM_OUTPUT_LABEL (asm_out_file, pic_helper_symbol_name);
3545 if (flag_delayed_branch)
3546 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
3547 pic_name, pic_name);
3548 else
3549 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
3550 pic_name, pic_name);
3552 pic_helper_emitted_p = true;
3555 /* Emit code to load the PIC register. */
3557 static void
3558 load_pic_register (bool delay_pic_helper)
3560 int orig_flag_pic = flag_pic;
3562 if (TARGET_VXWORKS_RTP)
3564 emit_insn (gen_vxworks_load_got ());
3565 emit_use (pic_offset_table_rtx);
3566 return;
3569 /* If we haven't initialized the special PIC symbols, do so now. */
3570 if (!pic_helper_symbol_name[0])
3572 ASM_GENERATE_INTERNAL_LABEL (pic_helper_symbol_name, "LADDPC", 0);
3573 pic_helper_symbol = gen_rtx_SYMBOL_REF (Pmode, pic_helper_symbol_name);
3574 global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3577 /* If we haven't emitted the special PIC helper function, do so now unless
3578 we are requested to delay it. */
3579 if (!delay_pic_helper && !pic_helper_emitted_p)
3580 emit_pic_helper ();
3582 flag_pic = 0;
3583 if (TARGET_ARCH64)
3584 emit_insn (gen_load_pcrel_symdi (pic_offset_table_rtx, global_offset_table,
3585 pic_helper_symbol));
3586 else
3587 emit_insn (gen_load_pcrel_symsi (pic_offset_table_rtx, global_offset_table,
3588 pic_helper_symbol));
3589 flag_pic = orig_flag_pic;
3591 /* Need to emit this whether or not we obey regdecls,
3592 since setjmp/longjmp can cause life info to screw up.
3593 ??? In the case where we don't obey regdecls, this is not sufficient
3594 since we may not fall out the bottom. */
3595 emit_use (pic_offset_table_rtx);
3598 /* Emit a call instruction with the pattern given by PAT. ADDR is the
3599 address of the call target. */
3601 void
3602 sparc_emit_call_insn (rtx pat, rtx addr)
3604 rtx insn;
3606 insn = emit_call_insn (pat);
3608 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
3609 if (TARGET_VXWORKS_RTP
3610 && flag_pic
3611 && GET_CODE (addr) == SYMBOL_REF
3612 && (SYMBOL_REF_DECL (addr)
3613 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
3614 : !SYMBOL_REF_LOCAL_P (addr)))
3616 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
3617 crtl->uses_pic_offset_table = 1;
3621 /* Return 1 if RTX is a MEM which is known to be aligned to at
3622 least a DESIRED byte boundary. */
3625 mem_min_alignment (rtx mem, int desired)
3627 rtx addr, base, offset;
3629 /* If it's not a MEM we can't accept it. */
3630 if (GET_CODE (mem) != MEM)
3631 return 0;
3633 /* Obviously... */
3634 if (!TARGET_UNALIGNED_DOUBLES
3635 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
3636 return 1;
3638 /* ??? The rest of the function predates MEM_ALIGN so
3639 there is probably a bit of redundancy. */
3640 addr = XEXP (mem, 0);
3641 base = offset = NULL_RTX;
3642 if (GET_CODE (addr) == PLUS)
3644 if (GET_CODE (XEXP (addr, 0)) == REG)
3646 base = XEXP (addr, 0);
3648 /* What we are saying here is that if the base
3649 REG is aligned properly, the compiler will make
3650 sure any REG based index upon it will be so
3651 as well. */
3652 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
3653 offset = XEXP (addr, 1);
3654 else
3655 offset = const0_rtx;
3658 else if (GET_CODE (addr) == REG)
3660 base = addr;
3661 offset = const0_rtx;
3664 if (base != NULL_RTX)
3666 int regno = REGNO (base);
3668 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
3670 /* Check if the compiler has recorded some information
3671 about the alignment of the base REG. If reload has
3672 completed, we already matched with proper alignments.
3673 If not running global_alloc, reload might give us
3674 unaligned pointer to local stack though. */
3675 if (((cfun != 0
3676 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
3677 || (optimize && reload_completed))
3678 && (INTVAL (offset) & (desired - 1)) == 0)
3679 return 1;
3681 else
3683 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
3684 return 1;
3687 else if (! TARGET_UNALIGNED_DOUBLES
3688 || CONSTANT_P (addr)
3689 || GET_CODE (addr) == LO_SUM)
3691 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
3692 is true, in which case we can only assume that an access is aligned if
3693 it is to a constant address, or the address involves a LO_SUM. */
3694 return 1;
3697 /* An obviously unaligned address. */
3698 return 0;
3702 /* Vectors to keep interesting information about registers where it can easily
3703 be got. We used to use the actual mode value as the bit number, but there
3704 are more than 32 modes now. Instead we use two tables: one indexed by
3705 hard register number, and one indexed by mode. */
3707 /* The purpose of sparc_mode_class is to shrink the range of modes so that
3708 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
3709 mapped into one sparc_mode_class mode. */
3711 enum sparc_mode_class {
3712 S_MODE, D_MODE, T_MODE, O_MODE,
3713 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
3714 CC_MODE, CCFP_MODE
3717 /* Modes for single-word and smaller quantities. */
3718 #define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
3720 /* Modes for double-word and smaller quantities. */
3721 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
3723 /* Modes for quad-word and smaller quantities. */
3724 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
3726 /* Modes for 8-word and smaller quantities. */
3727 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
3729 /* Modes for single-float quantities. We must allow any single word or
3730 smaller quantity. This is because the fix/float conversion instructions
3731 take integer inputs/outputs from the float registers. */
3732 #define SF_MODES (S_MODES)
3734 /* Modes for double-float and smaller quantities. */
3735 #define DF_MODES (S_MODES | D_MODES)
3737 /* Modes for double-float only quantities. */
3738 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
3740 /* Modes for quad-float only quantities. */
3741 #define TF_ONLY_MODES (1 << (int) TF_MODE)
3743 /* Modes for quad-float and smaller quantities. */
3744 #define TF_MODES (DF_MODES | TF_ONLY_MODES)
3746 /* Modes for quad-float and double-float quantities. */
3747 #define TF_MODES_NO_S (DF_MODES_NO_S | TF_ONLY_MODES)
3749 /* Modes for quad-float pair only quantities. */
3750 #define OF_ONLY_MODES (1 << (int) OF_MODE)
3752 /* Modes for quad-float pairs and smaller quantities. */
3753 #define OF_MODES (TF_MODES | OF_ONLY_MODES)
3755 #define OF_MODES_NO_S (TF_MODES_NO_S | OF_ONLY_MODES)
3757 /* Modes for condition codes. */
3758 #define CC_MODES (1 << (int) CC_MODE)
3759 #define CCFP_MODES (1 << (int) CCFP_MODE)
3761 /* Value is 1 if register/mode pair is acceptable on sparc.
3762 The funny mixture of D and T modes is because integer operations
3763 do not specially operate on tetra quantities, so non-quad-aligned
3764 registers can hold quadword quantities (except %o4 and %i4 because
3765 they cross fixed registers). */
3767 /* This points to either the 32 bit or the 64 bit version. */
3768 const int *hard_regno_mode_classes;
3770 static const int hard_32bit_mode_classes[] = {
3771 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
3772 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
3773 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
3774 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
3776 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3777 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3778 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3779 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
3781 /* FP regs f32 to f63. Only the even numbered registers actually exist,
3782 and none can hold SFmode/SImode values. */
3783 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3784 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3785 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3786 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3788 /* %fcc[0123] */
3789 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
3791 /* %icc */
3792 CC_MODES
3795 static const int hard_64bit_mode_classes[] = {
3796 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3797 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3798 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3799 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3801 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3802 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3803 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3804 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
3806 /* FP regs f32 to f63. Only the even numbered registers actually exist,
3807 and none can hold SFmode/SImode values. */
3808 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3809 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3810 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3811 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3813 /* %fcc[0123] */
3814 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
3816 /* %icc */
3817 CC_MODES
3820 int sparc_mode_class [NUM_MACHINE_MODES];
3822 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
3824 static void
3825 sparc_init_modes (void)
3827 int i;
3829 for (i = 0; i < NUM_MACHINE_MODES; i++)
3831 switch (GET_MODE_CLASS (i))
3833 case MODE_INT:
3834 case MODE_PARTIAL_INT:
3835 case MODE_COMPLEX_INT:
3836 if (GET_MODE_SIZE (i) <= 4)
3837 sparc_mode_class[i] = 1 << (int) S_MODE;
3838 else if (GET_MODE_SIZE (i) == 8)
3839 sparc_mode_class[i] = 1 << (int) D_MODE;
3840 else if (GET_MODE_SIZE (i) == 16)
3841 sparc_mode_class[i] = 1 << (int) T_MODE;
3842 else if (GET_MODE_SIZE (i) == 32)
3843 sparc_mode_class[i] = 1 << (int) O_MODE;
3844 else
3845 sparc_mode_class[i] = 0;
3846 break;
3847 case MODE_VECTOR_INT:
3848 if (GET_MODE_SIZE (i) <= 4)
3849 sparc_mode_class[i] = 1 << (int)SF_MODE;
3850 else if (GET_MODE_SIZE (i) == 8)
3851 sparc_mode_class[i] = 1 << (int)DF_MODE;
3852 break;
3853 case MODE_FLOAT:
3854 case MODE_COMPLEX_FLOAT:
3855 if (GET_MODE_SIZE (i) <= 4)
3856 sparc_mode_class[i] = 1 << (int) SF_MODE;
3857 else if (GET_MODE_SIZE (i) == 8)
3858 sparc_mode_class[i] = 1 << (int) DF_MODE;
3859 else if (GET_MODE_SIZE (i) == 16)
3860 sparc_mode_class[i] = 1 << (int) TF_MODE;
3861 else if (GET_MODE_SIZE (i) == 32)
3862 sparc_mode_class[i] = 1 << (int) OF_MODE;
3863 else
3864 sparc_mode_class[i] = 0;
3865 break;
3866 case MODE_CC:
3867 if (i == (int) CCFPmode || i == (int) CCFPEmode)
3868 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
3869 else
3870 sparc_mode_class[i] = 1 << (int) CC_MODE;
3871 break;
3872 default:
3873 sparc_mode_class[i] = 0;
3874 break;
3878 if (TARGET_ARCH64)
3879 hard_regno_mode_classes = hard_64bit_mode_classes;
3880 else
3881 hard_regno_mode_classes = hard_32bit_mode_classes;
3883 /* Initialize the array used by REGNO_REG_CLASS. */
3884 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3886 if (i < 16 && TARGET_V8PLUS)
3887 sparc_regno_reg_class[i] = I64_REGS;
3888 else if (i < 32 || i == FRAME_POINTER_REGNUM)
3889 sparc_regno_reg_class[i] = GENERAL_REGS;
3890 else if (i < 64)
3891 sparc_regno_reg_class[i] = FP_REGS;
3892 else if (i < 96)
3893 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
3894 else if (i < 100)
3895 sparc_regno_reg_class[i] = FPCC_REGS;
3896 else
3897 sparc_regno_reg_class[i] = NO_REGS;
3901 /* Compute the frame size required by the function. This function is called
3902 during the reload pass and also by sparc_expand_prologue. */
3904 HOST_WIDE_INT
3905 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function_p)
3907 int outgoing_args_size = (crtl->outgoing_args_size
3908 + REG_PARM_STACK_SPACE (current_function_decl));
3909 int n_regs = 0; /* N_REGS is the number of 4-byte regs saved thus far. */
3910 int i;
3912 if (TARGET_ARCH64)
3914 for (i = 0; i < 8; i++)
3915 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3916 n_regs += 2;
3918 else
3920 for (i = 0; i < 8; i += 2)
3921 if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
3922 || (df_regs_ever_live_p (i+1) && ! call_used_regs[i+1]))
3923 n_regs += 2;
3926 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
3927 if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
3928 || (df_regs_ever_live_p (i+1) && ! call_used_regs[i+1]))
3929 n_regs += 2;
3931 /* Set up values for use in prologue and epilogue. */
3932 num_gfregs = n_regs;
3934 if (leaf_function_p
3935 && n_regs == 0
3936 && size == 0
3937 && crtl->outgoing_args_size == 0)
3938 actual_fsize = apparent_fsize = 0;
3939 else
3941 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
3942 apparent_fsize = (size - STARTING_FRAME_OFFSET + 7) & -8;
3943 apparent_fsize += n_regs * 4;
3944 actual_fsize = apparent_fsize + ((outgoing_args_size + 7) & -8);
3947 /* Make sure nothing can clobber our register windows.
3948 If a SAVE must be done, or there is a stack-local variable,
3949 the register window area must be allocated. */
3950 if (! leaf_function_p || size > 0)
3951 actual_fsize += FIRST_PARM_OFFSET (current_function_decl);
3953 return SPARC_STACK_ALIGN (actual_fsize);
3956 /* Output any necessary .register pseudo-ops. */
3958 void
3959 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
3961 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
3962 int i;
3964 if (TARGET_ARCH32)
3965 return;
3967 /* Check if %g[2367] were used without
3968 .register being printed for them already. */
3969 for (i = 2; i < 8; i++)
3971 if (df_regs_ever_live_p (i)
3972 && ! sparc_hard_reg_printed [i])
3974 sparc_hard_reg_printed [i] = 1;
3975 /* %g7 is used as TLS base register, use #ignore
3976 for it instead of #scratch. */
3977 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
3978 i == 7 ? "ignore" : "scratch");
3980 if (i == 3) i = 5;
3982 #endif
3985 /* Save/restore call-saved registers from LOW to HIGH at BASE+OFFSET
3986 as needed. LOW should be double-word aligned for 32-bit registers.
3987 Return the new OFFSET. */
3989 #define SORR_SAVE 0
3990 #define SORR_RESTORE 1
3992 static int
3993 save_or_restore_regs (int low, int high, rtx base, int offset, int action)
3995 rtx mem, insn;
3996 int i;
3998 if (TARGET_ARCH64 && high <= 32)
4000 for (i = low; i < high; i++)
4002 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4004 mem = gen_rtx_MEM (DImode, plus_constant (base, offset));
4005 set_mem_alias_set (mem, sparc_sr_alias_set);
4006 if (action == SORR_SAVE)
4008 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
4009 RTX_FRAME_RELATED_P (insn) = 1;
4011 else /* action == SORR_RESTORE */
4012 emit_move_insn (gen_rtx_REG (DImode, i), mem);
4013 offset += 8;
4017 else
4019 for (i = low; i < high; i += 2)
4021 bool reg0 = df_regs_ever_live_p (i) && ! call_used_regs[i];
4022 bool reg1 = df_regs_ever_live_p (i+1) && ! call_used_regs[i+1];
4023 enum machine_mode mode;
4024 int regno;
4026 if (reg0 && reg1)
4028 mode = i < 32 ? DImode : DFmode;
4029 regno = i;
4031 else if (reg0)
4033 mode = i < 32 ? SImode : SFmode;
4034 regno = i;
4036 else if (reg1)
4038 mode = i < 32 ? SImode : SFmode;
4039 regno = i + 1;
4040 offset += 4;
4042 else
4043 continue;
4045 mem = gen_rtx_MEM (mode, plus_constant (base, offset));
4046 set_mem_alias_set (mem, sparc_sr_alias_set);
4047 if (action == SORR_SAVE)
4049 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
4050 RTX_FRAME_RELATED_P (insn) = 1;
4052 else /* action == SORR_RESTORE */
4053 emit_move_insn (gen_rtx_REG (mode, regno), mem);
4055 /* Always preserve double-word alignment. */
4056 offset = (offset + 7) & -8;
4060 return offset;
4063 /* Emit code to save call-saved registers. */
4065 static void
4066 emit_save_or_restore_regs (int action)
4068 HOST_WIDE_INT offset;
4069 rtx base;
4071 offset = frame_base_offset - apparent_fsize;
4073 if (offset < -4096 || offset + num_gfregs * 4 > 4095)
4075 /* ??? This might be optimized a little as %g1 might already have a
4076 value close enough that a single add insn will do. */
4077 /* ??? Although, all of this is probably only a temporary fix
4078 because if %g1 can hold a function result, then
4079 sparc_expand_epilogue will lose (the result will be
4080 clobbered). */
4081 base = gen_rtx_REG (Pmode, 1);
4082 emit_move_insn (base, GEN_INT (offset));
4083 emit_insn (gen_rtx_SET (VOIDmode,
4084 base,
4085 gen_rtx_PLUS (Pmode, frame_base_reg, base)));
4086 offset = 0;
4088 else
4089 base = frame_base_reg;
4091 offset = save_or_restore_regs (0, 8, base, offset, action);
4092 save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, action);
4095 /* Generate a save_register_window insn. */
4097 static rtx
4098 gen_save_register_window (rtx increment)
4100 if (TARGET_ARCH64)
4101 return gen_save_register_windowdi (increment);
4102 else
4103 return gen_save_register_windowsi (increment);
4106 /* Generate an increment for the stack pointer. */
4108 static rtx
4109 gen_stack_pointer_inc (rtx increment)
4111 return gen_rtx_SET (VOIDmode,
4112 stack_pointer_rtx,
4113 gen_rtx_PLUS (Pmode,
4114 stack_pointer_rtx,
4115 increment));
4118 /* Generate a decrement for the stack pointer. */
4120 static rtx
4121 gen_stack_pointer_dec (rtx decrement)
4123 return gen_rtx_SET (VOIDmode,
4124 stack_pointer_rtx,
4125 gen_rtx_MINUS (Pmode,
4126 stack_pointer_rtx,
4127 decrement));
4130 /* Expand the function prologue. The prologue is responsible for reserving
4131 storage for the frame, saving the call-saved registers and loading the
4132 PIC register if needed. */
4134 void
4135 sparc_expand_prologue (void)
4137 rtx insn;
4138 int i;
4140 /* Compute a snapshot of current_function_uses_only_leaf_regs. Relying
4141 on the final value of the flag means deferring the prologue/epilogue
4142 expansion until just before the second scheduling pass, which is too
4143 late to emit multiple epilogues or return insns.
4145 Of course we are making the assumption that the value of the flag
4146 will not change between now and its final value. Of the three parts
4147 of the formula, only the last one can reasonably vary. Let's take a
4148 closer look, after assuming that the first two ones are set to true
4149 (otherwise the last value is effectively silenced).
4151 If only_leaf_regs_used returns false, the global predicate will also
4152 be false so the actual frame size calculated below will be positive.
4153 As a consequence, the save_register_window insn will be emitted in
4154 the instruction stream; now this insn explicitly references %fp
4155 which is not a leaf register so only_leaf_regs_used will always
4156 return false subsequently.
4158 If only_leaf_regs_used returns true, we hope that the subsequent
4159 optimization passes won't cause non-leaf registers to pop up. For
4160 example, the regrename pass has special provisions to not rename to
4161 non-leaf registers in a leaf function. */
4162 sparc_leaf_function_p
4163 = optimize > 0 && leaf_function_p () && only_leaf_regs_used ();
4165 /* Need to use actual_fsize, since we are also allocating
4166 space for our callee (and our own register save area). */
4167 actual_fsize
4168 = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
4170 /* Advertise that the data calculated just above are now valid. */
4171 sparc_prologue_data_valid_p = true;
4173 if (sparc_leaf_function_p)
4175 frame_base_reg = stack_pointer_rtx;
4176 frame_base_offset = actual_fsize + SPARC_STACK_BIAS;
4178 else
4180 frame_base_reg = hard_frame_pointer_rtx;
4181 frame_base_offset = SPARC_STACK_BIAS;
4184 if (actual_fsize == 0)
4185 /* do nothing. */ ;
4186 else if (sparc_leaf_function_p)
4188 if (actual_fsize <= 4096)
4189 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-actual_fsize)));
4190 else if (actual_fsize <= 8192)
4192 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
4193 /* %sp is still the CFA register. */
4194 RTX_FRAME_RELATED_P (insn) = 1;
4195 insn
4196 = emit_insn (gen_stack_pointer_inc (GEN_INT (4096-actual_fsize)));
4198 else
4200 rtx reg = gen_rtx_REG (Pmode, 1);
4201 emit_move_insn (reg, GEN_INT (-actual_fsize));
4202 insn = emit_insn (gen_stack_pointer_inc (reg));
4203 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4204 gen_stack_pointer_inc (GEN_INT (-actual_fsize)));
4207 RTX_FRAME_RELATED_P (insn) = 1;
4209 else
4211 if (actual_fsize <= 4096)
4212 insn = emit_insn (gen_save_register_window (GEN_INT (-actual_fsize)));
4213 else if (actual_fsize <= 8192)
4215 insn = emit_insn (gen_save_register_window (GEN_INT (-4096)));
4216 /* %sp is not the CFA register anymore. */
4217 emit_insn (gen_stack_pointer_inc (GEN_INT (4096-actual_fsize)));
4219 else
4221 rtx reg = gen_rtx_REG (Pmode, 1);
4222 emit_move_insn (reg, GEN_INT (-actual_fsize));
4223 insn = emit_insn (gen_save_register_window (reg));
4226 RTX_FRAME_RELATED_P (insn) = 1;
4227 for (i=0; i < XVECLEN (PATTERN (insn), 0); i++)
4228 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, i)) = 1;
4231 if (num_gfregs)
4232 emit_save_or_restore_regs (SORR_SAVE);
4234 /* Load the PIC register if needed. */
4235 if (flag_pic && crtl->uses_pic_offset_table)
4236 load_pic_register (false);
4239 /* This function generates the assembly code for function entry, which boils
4240 down to emitting the necessary .register directives. */
4242 static void
4243 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4245 /* Check that the assumption we made in sparc_expand_prologue is valid. */
4246 gcc_assert (sparc_leaf_function_p == current_function_uses_only_leaf_regs);
4248 sparc_output_scratch_registers (file);
4251 /* Expand the function epilogue, either normal or part of a sibcall.
4252 We emit all the instructions except the return or the call. */
4254 void
4255 sparc_expand_epilogue (void)
4257 if (num_gfregs)
4258 emit_save_or_restore_regs (SORR_RESTORE);
4260 if (actual_fsize == 0)
4261 /* do nothing. */ ;
4262 else if (sparc_leaf_function_p)
4264 if (actual_fsize <= 4096)
4265 emit_insn (gen_stack_pointer_dec (GEN_INT (- actual_fsize)));
4266 else if (actual_fsize <= 8192)
4268 emit_insn (gen_stack_pointer_dec (GEN_INT (-4096)));
4269 emit_insn (gen_stack_pointer_dec (GEN_INT (4096 - actual_fsize)));
4271 else
4273 rtx reg = gen_rtx_REG (Pmode, 1);
4274 emit_move_insn (reg, GEN_INT (-actual_fsize));
4275 emit_insn (gen_stack_pointer_dec (reg));
4280 /* Return true if it is appropriate to emit `return' instructions in the
4281 body of a function. */
4283 bool
4284 sparc_can_use_return_insn_p (void)
4286 return sparc_prologue_data_valid_p
4287 && (actual_fsize == 0 || !sparc_leaf_function_p);
4290 /* This function generates the assembly code for function exit. */
4292 static void
4293 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4295 /* If code does not drop into the epilogue, we have to still output
4296 a dummy nop for the sake of sane backtraces. Otherwise, if the
4297 last two instructions of a function were "call foo; dslot;" this
4298 can make the return PC of foo (i.e. address of call instruction
4299 plus 8) point to the first instruction in the next function. */
4301 rtx insn, last_real_insn;
4303 insn = get_last_insn ();
4305 last_real_insn = prev_real_insn (insn);
4306 if (last_real_insn
4307 && GET_CODE (last_real_insn) == INSN
4308 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
4309 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
4311 if (last_real_insn && GET_CODE (last_real_insn) == CALL_INSN)
4312 fputs("\tnop\n", file);
4314 sparc_output_deferred_case_vectors ();
4317 /* Output a 'restore' instruction. */
4319 static void
4320 output_restore (rtx pat)
4322 rtx operands[3];
4324 if (! pat)
4326 fputs ("\t restore\n", asm_out_file);
4327 return;
4330 gcc_assert (GET_CODE (pat) == SET);
4332 operands[0] = SET_DEST (pat);
4333 pat = SET_SRC (pat);
4335 switch (GET_CODE (pat))
4337 case PLUS:
4338 operands[1] = XEXP (pat, 0);
4339 operands[2] = XEXP (pat, 1);
4340 output_asm_insn (" restore %r1, %2, %Y0", operands);
4341 break;
4342 case LO_SUM:
4343 operands[1] = XEXP (pat, 0);
4344 operands[2] = XEXP (pat, 1);
4345 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
4346 break;
4347 case ASHIFT:
4348 operands[1] = XEXP (pat, 0);
4349 gcc_assert (XEXP (pat, 1) == const1_rtx);
4350 output_asm_insn (" restore %r1, %r1, %Y0", operands);
4351 break;
4352 default:
4353 operands[1] = pat;
4354 output_asm_insn (" restore %%g0, %1, %Y0", operands);
4355 break;
4359 /* Output a return. */
4361 const char *
4362 output_return (rtx insn)
4364 if (sparc_leaf_function_p)
4366 /* This is a leaf function so we don't have to bother restoring the
4367 register window, which frees us from dealing with the convoluted
4368 semantics of restore/return. We simply output the jump to the
4369 return address and the insn in the delay slot (if any). */
4371 gcc_assert (! crtl->calls_eh_return);
4373 return "jmp\t%%o7+%)%#";
4375 else
4377 /* This is a regular function so we have to restore the register window.
4378 We may have a pending insn for the delay slot, which will be either
4379 combined with the 'restore' instruction or put in the delay slot of
4380 the 'return' instruction. */
4382 if (crtl->calls_eh_return)
4384 /* If the function uses __builtin_eh_return, the eh_return
4385 machinery occupies the delay slot. */
4386 gcc_assert (! final_sequence);
4388 if (! flag_delayed_branch)
4389 fputs ("\tadd\t%fp, %g1, %fp\n", asm_out_file);
4391 if (TARGET_V9)
4392 fputs ("\treturn\t%i7+8\n", asm_out_file);
4393 else
4394 fputs ("\trestore\n\tjmp\t%o7+8\n", asm_out_file);
4396 if (flag_delayed_branch)
4397 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
4398 else
4399 fputs ("\t nop\n", asm_out_file);
4401 else if (final_sequence)
4403 rtx delay, pat;
4405 delay = NEXT_INSN (insn);
4406 gcc_assert (delay);
4408 pat = PATTERN (delay);
4410 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
4412 epilogue_renumber (&pat, 0);
4413 return "return\t%%i7+%)%#";
4415 else
4417 output_asm_insn ("jmp\t%%i7+%)", NULL);
4418 output_restore (pat);
4419 PATTERN (delay) = gen_blockage ();
4420 INSN_CODE (delay) = -1;
4423 else
4425 /* The delay slot is empty. */
4426 if (TARGET_V9)
4427 return "return\t%%i7+%)\n\t nop";
4428 else if (flag_delayed_branch)
4429 return "jmp\t%%i7+%)\n\t restore";
4430 else
4431 return "restore\n\tjmp\t%%o7+%)\n\t nop";
4435 return "";
4438 /* Output a sibling call. */
4440 const char *
4441 output_sibcall (rtx insn, rtx call_operand)
4443 rtx operands[1];
4445 gcc_assert (flag_delayed_branch);
4447 operands[0] = call_operand;
4449 if (sparc_leaf_function_p)
4451 /* This is a leaf function so we don't have to bother restoring the
4452 register window. We simply output the jump to the function and
4453 the insn in the delay slot (if any). */
4455 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
4457 if (final_sequence)
4458 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
4459 operands);
4460 else
4461 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
4462 it into branch if possible. */
4463 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
4464 operands);
4466 else
4468 /* This is a regular function so we have to restore the register window.
4469 We may have a pending insn for the delay slot, which will be combined
4470 with the 'restore' instruction. */
4472 output_asm_insn ("call\t%a0, 0", operands);
4474 if (final_sequence)
4476 rtx delay = NEXT_INSN (insn);
4477 gcc_assert (delay);
4479 output_restore (PATTERN (delay));
4481 PATTERN (delay) = gen_blockage ();
4482 INSN_CODE (delay) = -1;
4484 else
4485 output_restore (NULL_RTX);
4488 return "";
4491 /* Functions for handling argument passing.
4493 For 32-bit, the first 6 args are normally in registers and the rest are
4494 pushed. Any arg that starts within the first 6 words is at least
4495 partially passed in a register unless its data type forbids.
4497 For 64-bit, the argument registers are laid out as an array of 16 elements
4498 and arguments are added sequentially. The first 6 int args and up to the
4499 first 16 fp args (depending on size) are passed in regs.
4501 Slot Stack Integral Float Float in structure Double Long Double
4502 ---- ----- -------- ----- ------------------ ------ -----------
4503 15 [SP+248] %f31 %f30,%f31 %d30
4504 14 [SP+240] %f29 %f28,%f29 %d28 %q28
4505 13 [SP+232] %f27 %f26,%f27 %d26
4506 12 [SP+224] %f25 %f24,%f25 %d24 %q24
4507 11 [SP+216] %f23 %f22,%f23 %d22
4508 10 [SP+208] %f21 %f20,%f21 %d20 %q20
4509 9 [SP+200] %f19 %f18,%f19 %d18
4510 8 [SP+192] %f17 %f16,%f17 %d16 %q16
4511 7 [SP+184] %f15 %f14,%f15 %d14
4512 6 [SP+176] %f13 %f12,%f13 %d12 %q12
4513 5 [SP+168] %o5 %f11 %f10,%f11 %d10
4514 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
4515 3 [SP+152] %o3 %f7 %f6,%f7 %d6
4516 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
4517 1 [SP+136] %o1 %f3 %f2,%f3 %d2
4518 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
4520 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
4522 Integral arguments are always passed as 64-bit quantities appropriately
4523 extended.
4525 Passing of floating point values is handled as follows.
4526 If a prototype is in scope:
4527 If the value is in a named argument (i.e. not a stdarg function or a
4528 value not part of the `...') then the value is passed in the appropriate
4529 fp reg.
4530 If the value is part of the `...' and is passed in one of the first 6
4531 slots then the value is passed in the appropriate int reg.
4532 If the value is part of the `...' and is not passed in one of the first 6
4533 slots then the value is passed in memory.
4534 If a prototype is not in scope:
4535 If the value is one of the first 6 arguments the value is passed in the
4536 appropriate integer reg and the appropriate fp reg.
4537 If the value is not one of the first 6 arguments the value is passed in
4538 the appropriate fp reg and in memory.
4541 Summary of the calling conventions implemented by GCC on SPARC:
4543 32-bit ABI:
4544 size argument return value
4546 small integer <4 int. reg. int. reg.
4547 word 4 int. reg. int. reg.
4548 double word 8 int. reg. int. reg.
4550 _Complex small integer <8 int. reg. int. reg.
4551 _Complex word 8 int. reg. int. reg.
4552 _Complex double word 16 memory int. reg.
4554 vector integer <=8 int. reg. FP reg.
4555 vector integer >8 memory memory
4557 float 4 int. reg. FP reg.
4558 double 8 int. reg. FP reg.
4559 long double 16 memory memory
4561 _Complex float 8 memory FP reg.
4562 _Complex double 16 memory FP reg.
4563 _Complex long double 32 memory FP reg.
4565 vector float any memory memory
4567 aggregate any memory memory
4571 64-bit ABI:
4572 size argument return value
4574 small integer <8 int. reg. int. reg.
4575 word 8 int. reg. int. reg.
4576 double word 16 int. reg. int. reg.
4578 _Complex small integer <16 int. reg. int. reg.
4579 _Complex word 16 int. reg. int. reg.
4580 _Complex double word 32 memory int. reg.
4582 vector integer <=16 FP reg. FP reg.
4583 vector integer 16<s<=32 memory FP reg.
4584 vector integer >32 memory memory
4586 float 4 FP reg. FP reg.
4587 double 8 FP reg. FP reg.
4588 long double 16 FP reg. FP reg.
4590 _Complex float 8 FP reg. FP reg.
4591 _Complex double 16 FP reg. FP reg.
4592 _Complex long double 32 memory FP reg.
4594 vector float <=16 FP reg. FP reg.
4595 vector float 16<s<=32 memory FP reg.
4596 vector float >32 memory memory
4598 aggregate <=16 reg. reg.
4599 aggregate 16<s<=32 memory reg.
4600 aggregate >32 memory memory
4604 Note #1: complex floating-point types follow the extended SPARC ABIs as
4605 implemented by the Sun compiler.
4607 Note #2: integral vector types follow the scalar floating-point types
4608 conventions to match what is implemented by the Sun VIS SDK.
4610 Note #3: floating-point vector types follow the aggregate types
4611 conventions. */
4614 /* Maximum number of int regs for args. */
4615 #define SPARC_INT_ARG_MAX 6
4616 /* Maximum number of fp regs for args. */
4617 #define SPARC_FP_ARG_MAX 16
4619 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
4621 /* Handle the INIT_CUMULATIVE_ARGS macro.
4622 Initialize a variable CUM of type CUMULATIVE_ARGS
4623 for a call to a function whose data type is FNTYPE.
4624 For a library call, FNTYPE is 0. */
4626 void
4627 init_cumulative_args (struct sparc_args *cum, tree fntype,
4628 rtx libname ATTRIBUTE_UNUSED,
4629 tree fndecl ATTRIBUTE_UNUSED)
4631 cum->words = 0;
4632 cum->prototype_p = fntype && TYPE_ARG_TYPES (fntype);
4633 cum->libcall_p = fntype == 0;
4636 /* Handle the TARGET_PROMOTE_PROTOTYPES target hook.
4637 When a prototype says `char' or `short', really pass an `int'. */
4639 static bool
4640 sparc_promote_prototypes (const_tree fntype ATTRIBUTE_UNUSED)
4642 return TARGET_ARCH32 ? true : false;
4645 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
4647 static bool
4648 sparc_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
4650 return TARGET_ARCH64 ? true : false;
4653 /* Scan the record type TYPE and return the following predicates:
4654 - INTREGS_P: the record contains at least one field or sub-field
4655 that is eligible for promotion in integer registers.
4656 - FP_REGS_P: the record contains at least one field or sub-field
4657 that is eligible for promotion in floating-point registers.
4658 - PACKED_P: the record contains at least one field that is packed.
4660 Sub-fields are not taken into account for the PACKED_P predicate. */
4662 static void
4663 scan_record_type (tree type, int *intregs_p, int *fpregs_p, int *packed_p)
4665 tree field;
4667 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4669 if (TREE_CODE (field) == FIELD_DECL)
4671 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
4672 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
4673 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
4674 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
4675 && TARGET_FPU)
4676 *fpregs_p = 1;
4677 else
4678 *intregs_p = 1;
4680 if (packed_p && DECL_PACKED (field))
4681 *packed_p = 1;
4686 /* Compute the slot number to pass an argument in.
4687 Return the slot number or -1 if passing on the stack.
4689 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4690 the preceding args and about the function being called.
4691 MODE is the argument's machine mode.
4692 TYPE is the data type of the argument (as a tree).
4693 This is null for libcalls where that information may
4694 not be available.
4695 NAMED is nonzero if this argument is a named parameter
4696 (otherwise it is an extra parameter matching an ellipsis).
4697 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
4698 *PREGNO records the register number to use if scalar type.
4699 *PPADDING records the amount of padding needed in words. */
4701 static int
4702 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
4703 tree type, int named, int incoming_p,
4704 int *pregno, int *ppadding)
4706 int regbase = (incoming_p
4707 ? SPARC_INCOMING_INT_ARG_FIRST
4708 : SPARC_OUTGOING_INT_ARG_FIRST);
4709 int slotno = cum->words;
4710 enum mode_class mclass;
4711 int regno;
4713 *ppadding = 0;
4715 if (type && TREE_ADDRESSABLE (type))
4716 return -1;
4718 if (TARGET_ARCH32
4719 && mode == BLKmode
4720 && type
4721 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
4722 return -1;
4724 /* For SPARC64, objects requiring 16-byte alignment get it. */
4725 if (TARGET_ARCH64
4726 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
4727 && (slotno & 1) != 0)
4728 slotno++, *ppadding = 1;
4730 mclass = GET_MODE_CLASS (mode);
4731 if (type && TREE_CODE (type) == VECTOR_TYPE)
4733 /* Vector types deserve special treatment because they are
4734 polymorphic wrt their mode, depending upon whether VIS
4735 instructions are enabled. */
4736 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4738 /* The SPARC port defines no floating-point vector modes. */
4739 gcc_assert (mode == BLKmode);
4741 else
4743 /* Integral vector types should either have a vector
4744 mode or an integral mode, because we are guaranteed
4745 by pass_by_reference that their size is not greater
4746 than 16 bytes and TImode is 16-byte wide. */
4747 gcc_assert (mode != BLKmode);
4749 /* Vector integers are handled like floats according to
4750 the Sun VIS SDK. */
4751 mclass = MODE_FLOAT;
4755 switch (mclass)
4757 case MODE_FLOAT:
4758 case MODE_COMPLEX_FLOAT:
4759 case MODE_VECTOR_INT:
4760 if (TARGET_ARCH64 && TARGET_FPU && named)
4762 if (slotno >= SPARC_FP_ARG_MAX)
4763 return -1;
4764 regno = SPARC_FP_ARG_FIRST + slotno * 2;
4765 /* Arguments filling only one single FP register are
4766 right-justified in the outer double FP register. */
4767 if (GET_MODE_SIZE (mode) <= 4)
4768 regno++;
4769 break;
4771 /* fallthrough */
4773 case MODE_INT:
4774 case MODE_COMPLEX_INT:
4775 if (slotno >= SPARC_INT_ARG_MAX)
4776 return -1;
4777 regno = regbase + slotno;
4778 break;
4780 case MODE_RANDOM:
4781 if (mode == VOIDmode)
4782 /* MODE is VOIDmode when generating the actual call. */
4783 return -1;
4785 gcc_assert (mode == BLKmode);
4787 if (TARGET_ARCH32
4788 || !type
4789 || (TREE_CODE (type) != VECTOR_TYPE
4790 && TREE_CODE (type) != RECORD_TYPE))
4792 if (slotno >= SPARC_INT_ARG_MAX)
4793 return -1;
4794 regno = regbase + slotno;
4796 else /* TARGET_ARCH64 && type */
4798 int intregs_p = 0, fpregs_p = 0, packed_p = 0;
4800 /* First see what kinds of registers we would need. */
4801 if (TREE_CODE (type) == VECTOR_TYPE)
4802 fpregs_p = 1;
4803 else
4804 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
4806 /* The ABI obviously doesn't specify how packed structures
4807 are passed. These are defined to be passed in int regs
4808 if possible, otherwise memory. */
4809 if (packed_p || !named)
4810 fpregs_p = 0, intregs_p = 1;
4812 /* If all arg slots are filled, then must pass on stack. */
4813 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
4814 return -1;
4816 /* If there are only int args and all int arg slots are filled,
4817 then must pass on stack. */
4818 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
4819 return -1;
4821 /* Note that even if all int arg slots are filled, fp members may
4822 still be passed in regs if such regs are available.
4823 *PREGNO isn't set because there may be more than one, it's up
4824 to the caller to compute them. */
4825 return slotno;
4827 break;
4829 default :
4830 gcc_unreachable ();
4833 *pregno = regno;
4834 return slotno;
4837 /* Handle recursive register counting for structure field layout. */
4839 struct function_arg_record_value_parms
4841 rtx ret; /* return expression being built. */
4842 int slotno; /* slot number of the argument. */
4843 int named; /* whether the argument is named. */
4844 int regbase; /* regno of the base register. */
4845 int stack; /* 1 if part of the argument is on the stack. */
4846 int intoffset; /* offset of the first pending integer field. */
4847 unsigned int nregs; /* number of words passed in registers. */
4850 static void function_arg_record_value_3
4851 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
4852 static void function_arg_record_value_2
4853 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
4854 static void function_arg_record_value_1
4855 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
4856 static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
4857 static rtx function_arg_union_value (int, enum machine_mode, int, int);
4859 /* A subroutine of function_arg_record_value. Traverse the structure
4860 recursively and determine how many registers will be required. */
4862 static void
4863 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
4864 struct function_arg_record_value_parms *parms,
4865 bool packed_p)
4867 tree field;
4869 /* We need to compute how many registers are needed so we can
4870 allocate the PARALLEL but before we can do that we need to know
4871 whether there are any packed fields. The ABI obviously doesn't
4872 specify how structures are passed in this case, so they are
4873 defined to be passed in int regs if possible, otherwise memory,
4874 regardless of whether there are fp values present. */
4876 if (! packed_p)
4877 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4879 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
4881 packed_p = true;
4882 break;
4886 /* Compute how many registers we need. */
4887 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4889 if (TREE_CODE (field) == FIELD_DECL)
4891 HOST_WIDE_INT bitpos = startbitpos;
4893 if (DECL_SIZE (field) != 0)
4895 if (integer_zerop (DECL_SIZE (field)))
4896 continue;
4898 if (host_integerp (bit_position (field), 1))
4899 bitpos += int_bit_position (field);
4902 /* ??? FIXME: else assume zero offset. */
4904 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
4905 function_arg_record_value_1 (TREE_TYPE (field),
4906 bitpos,
4907 parms,
4908 packed_p);
4909 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
4910 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
4911 && TARGET_FPU
4912 && parms->named
4913 && ! packed_p)
4915 if (parms->intoffset != -1)
4917 unsigned int startbit, endbit;
4918 int intslots, this_slotno;
4920 startbit = parms->intoffset & -BITS_PER_WORD;
4921 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
4923 intslots = (endbit - startbit) / BITS_PER_WORD;
4924 this_slotno = parms->slotno + parms->intoffset
4925 / BITS_PER_WORD;
4927 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
4929 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
4930 /* We need to pass this field on the stack. */
4931 parms->stack = 1;
4934 parms->nregs += intslots;
4935 parms->intoffset = -1;
4938 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
4939 If it wasn't true we wouldn't be here. */
4940 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
4941 && DECL_MODE (field) == BLKmode)
4942 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
4943 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
4944 parms->nregs += 2;
4945 else
4946 parms->nregs += 1;
4948 else
4950 if (parms->intoffset == -1)
4951 parms->intoffset = bitpos;
4957 /* A subroutine of function_arg_record_value. Assign the bits of the
4958 structure between parms->intoffset and bitpos to integer registers. */
4960 static void
4961 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
4962 struct function_arg_record_value_parms *parms)
4964 enum machine_mode mode;
4965 unsigned int regno;
4966 unsigned int startbit, endbit;
4967 int this_slotno, intslots, intoffset;
4968 rtx reg;
4970 if (parms->intoffset == -1)
4971 return;
4973 intoffset = parms->intoffset;
4974 parms->intoffset = -1;
4976 startbit = intoffset & -BITS_PER_WORD;
4977 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
4978 intslots = (endbit - startbit) / BITS_PER_WORD;
4979 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
4981 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
4982 if (intslots <= 0)
4983 return;
4985 /* If this is the trailing part of a word, only load that much into
4986 the register. Otherwise load the whole register. Note that in
4987 the latter case we may pick up unwanted bits. It's not a problem
4988 at the moment but may wish to revisit. */
4990 if (intoffset % BITS_PER_WORD != 0)
4991 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
4992 MODE_INT);
4993 else
4994 mode = word_mode;
4996 intoffset /= BITS_PER_UNIT;
4999 regno = parms->regbase + this_slotno;
5000 reg = gen_rtx_REG (mode, regno);
5001 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
5002 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
5004 this_slotno += 1;
5005 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
5006 mode = word_mode;
5007 parms->nregs += 1;
5008 intslots -= 1;
5010 while (intslots > 0);
5013 /* A subroutine of function_arg_record_value. Traverse the structure
5014 recursively and assign bits to floating point registers. Track which
5015 bits in between need integer registers; invoke function_arg_record_value_3
5016 to make that happen. */
5018 static void
5019 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
5020 struct function_arg_record_value_parms *parms,
5021 bool packed_p)
5023 tree field;
5025 if (! packed_p)
5026 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5028 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
5030 packed_p = true;
5031 break;
5035 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5037 if (TREE_CODE (field) == FIELD_DECL)
5039 HOST_WIDE_INT bitpos = startbitpos;
5041 if (DECL_SIZE (field) != 0)
5043 if (integer_zerop (DECL_SIZE (field)))
5044 continue;
5046 if (host_integerp (bit_position (field), 1))
5047 bitpos += int_bit_position (field);
5050 /* ??? FIXME: else assume zero offset. */
5052 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
5053 function_arg_record_value_2 (TREE_TYPE (field),
5054 bitpos,
5055 parms,
5056 packed_p);
5057 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
5058 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
5059 && TARGET_FPU
5060 && parms->named
5061 && ! packed_p)
5063 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
5064 int regno, nregs, pos;
5065 enum machine_mode mode = DECL_MODE (field);
5066 rtx reg;
5068 function_arg_record_value_3 (bitpos, parms);
5070 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
5071 && mode == BLKmode)
5073 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
5074 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
5076 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
5078 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
5079 nregs = 2;
5081 else
5082 nregs = 1;
5084 regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
5085 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
5086 regno++;
5087 reg = gen_rtx_REG (mode, regno);
5088 pos = bitpos / BITS_PER_UNIT;
5089 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
5090 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
5091 parms->nregs += 1;
5092 while (--nregs > 0)
5094 regno += GET_MODE_SIZE (mode) / 4;
5095 reg = gen_rtx_REG (mode, regno);
5096 pos += GET_MODE_SIZE (mode);
5097 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
5098 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
5099 parms->nregs += 1;
5102 else
5104 if (parms->intoffset == -1)
5105 parms->intoffset = bitpos;
5111 /* Used by function_arg and function_value to implement the complex
5112 conventions of the 64-bit ABI for passing and returning structures.
5113 Return an expression valid as a return value for the two macros
5114 FUNCTION_ARG and FUNCTION_VALUE.
5116 TYPE is the data type of the argument (as a tree).
5117 This is null for libcalls where that information may
5118 not be available.
5119 MODE is the argument's machine mode.
5120 SLOTNO is the index number of the argument's slot in the parameter array.
5121 NAMED is nonzero if this argument is a named parameter
5122 (otherwise it is an extra parameter matching an ellipsis).
5123 REGBASE is the regno of the base register for the parameter array. */
5125 static rtx
5126 function_arg_record_value (const_tree type, enum machine_mode mode,
5127 int slotno, int named, int regbase)
5129 HOST_WIDE_INT typesize = int_size_in_bytes (type);
5130 struct function_arg_record_value_parms parms;
5131 unsigned int nregs;
5133 parms.ret = NULL_RTX;
5134 parms.slotno = slotno;
5135 parms.named = named;
5136 parms.regbase = regbase;
5137 parms.stack = 0;
5139 /* Compute how many registers we need. */
5140 parms.nregs = 0;
5141 parms.intoffset = 0;
5142 function_arg_record_value_1 (type, 0, &parms, false);
5144 /* Take into account pending integer fields. */
5145 if (parms.intoffset != -1)
5147 unsigned int startbit, endbit;
5148 int intslots, this_slotno;
5150 startbit = parms.intoffset & -BITS_PER_WORD;
5151 endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
5152 intslots = (endbit - startbit) / BITS_PER_WORD;
5153 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
5155 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
5157 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
5158 /* We need to pass this field on the stack. */
5159 parms.stack = 1;
5162 parms.nregs += intslots;
5164 nregs = parms.nregs;
5166 /* Allocate the vector and handle some annoying special cases. */
5167 if (nregs == 0)
5169 /* ??? Empty structure has no value? Duh? */
5170 if (typesize <= 0)
5172 /* Though there's nothing really to store, return a word register
5173 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
5174 leads to breakage due to the fact that there are zero bytes to
5175 load. */
5176 return gen_rtx_REG (mode, regbase);
5178 else
5180 /* ??? C++ has structures with no fields, and yet a size. Give up
5181 for now and pass everything back in integer registers. */
5182 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5184 if (nregs + slotno > SPARC_INT_ARG_MAX)
5185 nregs = SPARC_INT_ARG_MAX - slotno;
5187 gcc_assert (nregs != 0);
5189 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
5191 /* If at least one field must be passed on the stack, generate
5192 (parallel [(expr_list (nil) ...) ...]) so that all fields will
5193 also be passed on the stack. We can't do much better because the
5194 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
5195 of structures for which the fields passed exclusively in registers
5196 are not at the beginning of the structure. */
5197 if (parms.stack)
5198 XVECEXP (parms.ret, 0, 0)
5199 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
5201 /* Fill in the entries. */
5202 parms.nregs = 0;
5203 parms.intoffset = 0;
5204 function_arg_record_value_2 (type, 0, &parms, false);
5205 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
5207 gcc_assert (parms.nregs == nregs);
5209 return parms.ret;
5212 /* Used by function_arg and function_value to implement the conventions
5213 of the 64-bit ABI for passing and returning unions.
5214 Return an expression valid as a return value for the two macros
5215 FUNCTION_ARG and FUNCTION_VALUE.
5217 SIZE is the size in bytes of the union.
5218 MODE is the argument's machine mode.
5219 REGNO is the hard register the union will be passed in. */
5221 static rtx
5222 function_arg_union_value (int size, enum machine_mode mode, int slotno,
5223 int regno)
5225 int nwords = ROUND_ADVANCE (size), i;
5226 rtx regs;
5228 /* See comment in previous function for empty structures. */
5229 if (nwords == 0)
5230 return gen_rtx_REG (mode, regno);
5232 if (slotno == SPARC_INT_ARG_MAX - 1)
5233 nwords = 1;
5235 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
5237 for (i = 0; i < nwords; i++)
5239 /* Unions are passed left-justified. */
5240 XVECEXP (regs, 0, i)
5241 = gen_rtx_EXPR_LIST (VOIDmode,
5242 gen_rtx_REG (word_mode, regno),
5243 GEN_INT (UNITS_PER_WORD * i));
5244 regno++;
5247 return regs;
5250 /* Used by function_arg and function_value to implement the conventions
5251 for passing and returning large (BLKmode) vectors.
5252 Return an expression valid as a return value for the two macros
5253 FUNCTION_ARG and FUNCTION_VALUE.
5255 SIZE is the size in bytes of the vector (at least 8 bytes).
5256 REGNO is the FP hard register the vector will be passed in. */
5258 static rtx
5259 function_arg_vector_value (int size, int regno)
5261 int i, nregs = size / 8;
5262 rtx regs;
5264 regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
5266 for (i = 0; i < nregs; i++)
5268 XVECEXP (regs, 0, i)
5269 = gen_rtx_EXPR_LIST (VOIDmode,
5270 gen_rtx_REG (DImode, regno + 2*i),
5271 GEN_INT (i*8));
5274 return regs;
5277 /* Handle the FUNCTION_ARG macro.
5278 Determine where to put an argument to a function.
5279 Value is zero to push the argument on the stack,
5280 or a hard register in which to store the argument.
5282 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5283 the preceding args and about the function being called.
5284 MODE is the argument's machine mode.
5285 TYPE is the data type of the argument (as a tree).
5286 This is null for libcalls where that information may
5287 not be available.
5288 NAMED is nonzero if this argument is a named parameter
5289 (otherwise it is an extra parameter matching an ellipsis).
5290 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG. */
5293 function_arg (const struct sparc_args *cum, enum machine_mode mode,
5294 tree type, int named, int incoming_p)
5296 int regbase = (incoming_p
5297 ? SPARC_INCOMING_INT_ARG_FIRST
5298 : SPARC_OUTGOING_INT_ARG_FIRST);
5299 int slotno, regno, padding;
5300 enum mode_class mclass = GET_MODE_CLASS (mode);
5302 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
5303 &regno, &padding);
5304 if (slotno == -1)
5305 return 0;
5307 /* Vector types deserve special treatment because they are polymorphic wrt
5308 their mode, depending upon whether VIS instructions are enabled. */
5309 if (type && TREE_CODE (type) == VECTOR_TYPE)
5311 HOST_WIDE_INT size = int_size_in_bytes (type);
5312 gcc_assert ((TARGET_ARCH32 && size <= 8)
5313 || (TARGET_ARCH64 && size <= 16));
5315 if (mode == BLKmode)
5316 return function_arg_vector_value (size,
5317 SPARC_FP_ARG_FIRST + 2*slotno);
5318 else
5319 mclass = MODE_FLOAT;
5322 if (TARGET_ARCH32)
5323 return gen_rtx_REG (mode, regno);
5325 /* Structures up to 16 bytes in size are passed in arg slots on the stack
5326 and are promoted to registers if possible. */
5327 if (type && TREE_CODE (type) == RECORD_TYPE)
5329 HOST_WIDE_INT size = int_size_in_bytes (type);
5330 gcc_assert (size <= 16);
5332 return function_arg_record_value (type, mode, slotno, named, regbase);
5335 /* Unions up to 16 bytes in size are passed in integer registers. */
5336 else if (type && TREE_CODE (type) == UNION_TYPE)
5338 HOST_WIDE_INT size = int_size_in_bytes (type);
5339 gcc_assert (size <= 16);
5341 return function_arg_union_value (size, mode, slotno, regno);
5344 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
5345 but also have the slot allocated for them.
5346 If no prototype is in scope fp values in register slots get passed
5347 in two places, either fp regs and int regs or fp regs and memory. */
5348 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
5349 && SPARC_FP_REG_P (regno))
5351 rtx reg = gen_rtx_REG (mode, regno);
5352 if (cum->prototype_p || cum->libcall_p)
5354 /* "* 2" because fp reg numbers are recorded in 4 byte
5355 quantities. */
5356 #if 0
5357 /* ??? This will cause the value to be passed in the fp reg and
5358 in the stack. When a prototype exists we want to pass the
5359 value in the reg but reserve space on the stack. That's an
5360 optimization, and is deferred [for a bit]. */
5361 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
5362 return gen_rtx_PARALLEL (mode,
5363 gen_rtvec (2,
5364 gen_rtx_EXPR_LIST (VOIDmode,
5365 NULL_RTX, const0_rtx),
5366 gen_rtx_EXPR_LIST (VOIDmode,
5367 reg, const0_rtx)));
5368 else
5369 #else
5370 /* ??? It seems that passing back a register even when past
5371 the area declared by REG_PARM_STACK_SPACE will allocate
5372 space appropriately, and will not copy the data onto the
5373 stack, exactly as we desire.
5375 This is due to locate_and_pad_parm being called in
5376 expand_call whenever reg_parm_stack_space > 0, which
5377 while beneficial to our example here, would seem to be
5378 in error from what had been intended. Ho hum... -- r~ */
5379 #endif
5380 return reg;
5382 else
5384 rtx v0, v1;
5386 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
5388 int intreg;
5390 /* On incoming, we don't need to know that the value
5391 is passed in %f0 and %i0, and it confuses other parts
5392 causing needless spillage even on the simplest cases. */
5393 if (incoming_p)
5394 return reg;
5396 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
5397 + (regno - SPARC_FP_ARG_FIRST) / 2);
5399 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
5400 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
5401 const0_rtx);
5402 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
5404 else
5406 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
5407 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
5408 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
5413 /* All other aggregate types are passed in an integer register in a mode
5414 corresponding to the size of the type. */
5415 else if (type && AGGREGATE_TYPE_P (type))
5417 HOST_WIDE_INT size = int_size_in_bytes (type);
5418 gcc_assert (size <= 16);
5420 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5423 return gen_rtx_REG (mode, regno);
5426 /* For an arg passed partly in registers and partly in memory,
5427 this is the number of bytes of registers used.
5428 For args passed entirely in registers or entirely in memory, zero.
5430 Any arg that starts in the first 6 regs but won't entirely fit in them
5431 needs partial registers on v8. On v9, structures with integer
5432 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
5433 values that begin in the last fp reg [where "last fp reg" varies with the
5434 mode] will be split between that reg and memory. */
5436 static int
5437 sparc_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5438 tree type, bool named)
5440 int slotno, regno, padding;
5442 /* We pass 0 for incoming_p here, it doesn't matter. */
5443 slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
5445 if (slotno == -1)
5446 return 0;
5448 if (TARGET_ARCH32)
5450 if ((slotno + (mode == BLKmode
5451 ? ROUND_ADVANCE (int_size_in_bytes (type))
5452 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
5453 > SPARC_INT_ARG_MAX)
5454 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
5456 else
5458 /* We are guaranteed by pass_by_reference that the size of the
5459 argument is not greater than 16 bytes, so we only need to return
5460 one word if the argument is partially passed in registers. */
5462 if (type && AGGREGATE_TYPE_P (type))
5464 int size = int_size_in_bytes (type);
5466 if (size > UNITS_PER_WORD
5467 && slotno == SPARC_INT_ARG_MAX - 1)
5468 return UNITS_PER_WORD;
5470 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
5471 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5472 && ! (TARGET_FPU && named)))
5474 /* The complex types are passed as packed types. */
5475 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
5476 && slotno == SPARC_INT_ARG_MAX - 1)
5477 return UNITS_PER_WORD;
5479 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5481 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
5482 > SPARC_FP_ARG_MAX)
5483 return UNITS_PER_WORD;
5487 return 0;
5490 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
5491 Specify whether to pass the argument by reference. */
5493 static bool
5494 sparc_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5495 enum machine_mode mode, const_tree type,
5496 bool named ATTRIBUTE_UNUSED)
5498 if (TARGET_ARCH32)
5499 /* Original SPARC 32-bit ABI says that structures and unions,
5500 and quad-precision floats are passed by reference. For Pascal,
5501 also pass arrays by reference. All other base types are passed
5502 in registers.
5504 Extended ABI (as implemented by the Sun compiler) says that all
5505 complex floats are passed by reference. Pass complex integers
5506 in registers up to 8 bytes. More generally, enforce the 2-word
5507 cap for passing arguments in registers.
5509 Vector ABI (as implemented by the Sun VIS SDK) says that vector
5510 integers are passed like floats of the same size, that is in
5511 registers up to 8 bytes. Pass all vector floats by reference
5512 like structure and unions. */
5513 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
5514 || mode == SCmode
5515 /* Catch CDImode, TFmode, DCmode and TCmode. */
5516 || GET_MODE_SIZE (mode) > 8
5517 || (type
5518 && TREE_CODE (type) == VECTOR_TYPE
5519 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
5520 else
5521 /* Original SPARC 64-bit ABI says that structures and unions
5522 smaller than 16 bytes are passed in registers, as well as
5523 all other base types.
5525 Extended ABI (as implemented by the Sun compiler) says that
5526 complex floats are passed in registers up to 16 bytes. Pass
5527 all complex integers in registers up to 16 bytes. More generally,
5528 enforce the 2-word cap for passing arguments in registers.
5530 Vector ABI (as implemented by the Sun VIS SDK) says that vector
5531 integers are passed like floats of the same size, that is in
5532 registers (up to 16 bytes). Pass all vector floats like structure
5533 and unions. */
5534 return ((type
5535 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
5536 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
5537 /* Catch CTImode and TCmode. */
5538 || GET_MODE_SIZE (mode) > 16);
5541 /* Handle the FUNCTION_ARG_ADVANCE macro.
5542 Update the data in CUM to advance over an argument
5543 of mode MODE and data type TYPE.
5544 TYPE is null for libcalls where that information may not be available. */
5546 void
5547 function_arg_advance (struct sparc_args *cum, enum machine_mode mode,
5548 tree type, int named)
5550 int slotno, regno, padding;
5552 /* We pass 0 for incoming_p here, it doesn't matter. */
5553 slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
5555 /* If register required leading padding, add it. */
5556 if (slotno != -1)
5557 cum->words += padding;
5559 if (TARGET_ARCH32)
5561 cum->words += (mode != BLKmode
5562 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
5563 : ROUND_ADVANCE (int_size_in_bytes (type)));
5565 else
5567 if (type && AGGREGATE_TYPE_P (type))
5569 int size = int_size_in_bytes (type);
5571 if (size <= 8)
5572 ++cum->words;
5573 else if (size <= 16)
5574 cum->words += 2;
5575 else /* passed by reference */
5576 ++cum->words;
5578 else
5580 cum->words += (mode != BLKmode
5581 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
5582 : ROUND_ADVANCE (int_size_in_bytes (type)));
5587 /* Handle the FUNCTION_ARG_PADDING macro.
5588 For the 64 bit ABI structs are always stored left shifted in their
5589 argument slot. */
5591 enum direction
5592 function_arg_padding (enum machine_mode mode, const_tree type)
5594 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
5595 return upward;
5597 /* Fall back to the default. */
5598 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
5601 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
5602 Specify whether to return the return value in memory. */
5604 static bool
5605 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5607 if (TARGET_ARCH32)
5608 /* Original SPARC 32-bit ABI says that structures and unions,
5609 and quad-precision floats are returned in memory. All other
5610 base types are returned in registers.
5612 Extended ABI (as implemented by the Sun compiler) says that
5613 all complex floats are returned in registers (8 FP registers
5614 at most for '_Complex long double'). Return all complex integers
5615 in registers (4 at most for '_Complex long long').
5617 Vector ABI (as implemented by the Sun VIS SDK) says that vector
5618 integers are returned like floats of the same size, that is in
5619 registers up to 8 bytes and in memory otherwise. Return all
5620 vector floats in memory like structure and unions; note that
5621 they always have BLKmode like the latter. */
5622 return (TYPE_MODE (type) == BLKmode
5623 || TYPE_MODE (type) == TFmode
5624 || (TREE_CODE (type) == VECTOR_TYPE
5625 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
5626 else
5627 /* Original SPARC 64-bit ABI says that structures and unions
5628 smaller than 32 bytes are returned in registers, as well as
5629 all other base types.
5631 Extended ABI (as implemented by the Sun compiler) says that all
5632 complex floats are returned in registers (8 FP registers at most
5633 for '_Complex long double'). Return all complex integers in
5634 registers (4 at most for '_Complex TItype').
5636 Vector ABI (as implemented by the Sun VIS SDK) says that vector
5637 integers are returned like floats of the same size, that is in
5638 registers. Return all vector floats like structure and unions;
5639 note that they always have BLKmode like the latter. */
5640 return ((TYPE_MODE (type) == BLKmode
5641 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32));
5644 /* Handle the TARGET_STRUCT_VALUE target hook.
5645 Return where to find the structure return value address. */
5647 static rtx
5648 sparc_struct_value_rtx (tree fndecl, int incoming)
5650 if (TARGET_ARCH64)
5651 return 0;
5652 else
5654 rtx mem;
5656 if (incoming)
5657 mem = gen_rtx_MEM (Pmode, plus_constant (frame_pointer_rtx,
5658 STRUCT_VALUE_OFFSET));
5659 else
5660 mem = gen_rtx_MEM (Pmode, plus_constant (stack_pointer_rtx,
5661 STRUCT_VALUE_OFFSET));
5663 /* Only follow the SPARC ABI for fixed-size structure returns.
5664 Variable size structure returns are handled per the normal
5665 procedures in GCC. This is enabled by -mstd-struct-return */
5666 if (incoming == 2
5667 && sparc_std_struct_return
5668 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
5669 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
5671 /* We must check and adjust the return address, as it is
5672 optional as to whether the return object is really
5673 provided. */
5674 rtx ret_rtx = gen_rtx_REG (Pmode, 31);
5675 rtx scratch = gen_reg_rtx (SImode);
5676 rtx endlab = gen_label_rtx ();
5678 /* Calculate the return object size */
5679 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
5680 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
5681 /* Construct a temporary return value */
5682 rtx temp_val = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
5684 /* Implement SPARC 32-bit psABI callee returns struck checking
5685 requirements:
5687 Fetch the instruction where we will return to and see if
5688 it's an unimp instruction (the most significant 10 bits
5689 will be zero). */
5690 emit_move_insn (scratch, gen_rtx_MEM (SImode,
5691 plus_constant (ret_rtx, 8)));
5692 /* Assume the size is valid and pre-adjust */
5693 emit_insn (gen_add3_insn (ret_rtx, ret_rtx, GEN_INT (4)));
5694 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode, 0, endlab);
5695 emit_insn (gen_sub3_insn (ret_rtx, ret_rtx, GEN_INT (4)));
5696 /* Assign stack temp:
5697 Write the address of the memory pointed to by temp_val into
5698 the memory pointed to by mem */
5699 emit_move_insn (mem, XEXP (temp_val, 0));
5700 emit_label (endlab);
5703 set_mem_alias_set (mem, struct_value_alias_set);
5704 return mem;
5708 /* Handle FUNCTION_VALUE, FUNCTION_OUTGOING_VALUE, and LIBCALL_VALUE macros.
5709 For v9, function return values are subject to the same rules as arguments,
5710 except that up to 32 bytes may be returned in registers. */
5713 function_value (const_tree type, enum machine_mode mode, int incoming_p)
5715 /* Beware that the two values are swapped here wrt function_arg. */
5716 int regbase = (incoming_p
5717 ? SPARC_OUTGOING_INT_ARG_FIRST
5718 : SPARC_INCOMING_INT_ARG_FIRST);
5719 enum mode_class mclass = GET_MODE_CLASS (mode);
5720 int regno;
5722 /* Vector types deserve special treatment because they are polymorphic wrt
5723 their mode, depending upon whether VIS instructions are enabled. */
5724 if (type && TREE_CODE (type) == VECTOR_TYPE)
5726 HOST_WIDE_INT size = int_size_in_bytes (type);
5727 gcc_assert ((TARGET_ARCH32 && size <= 8)
5728 || (TARGET_ARCH64 && size <= 32));
5730 if (mode == BLKmode)
5731 return function_arg_vector_value (size,
5732 SPARC_FP_ARG_FIRST);
5733 else
5734 mclass = MODE_FLOAT;
5737 if (TARGET_ARCH64 && type)
5739 /* Structures up to 32 bytes in size are returned in registers. */
5740 if (TREE_CODE (type) == RECORD_TYPE)
5742 HOST_WIDE_INT size = int_size_in_bytes (type);
5743 gcc_assert (size <= 32);
5745 return function_arg_record_value (type, mode, 0, 1, regbase);
5748 /* Unions up to 32 bytes in size are returned in integer registers. */
5749 else if (TREE_CODE (type) == UNION_TYPE)
5751 HOST_WIDE_INT size = int_size_in_bytes (type);
5752 gcc_assert (size <= 32);
5754 return function_arg_union_value (size, mode, 0, regbase);
5757 /* Objects that require it are returned in FP registers. */
5758 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
5761 /* All other aggregate types are returned in an integer register in a
5762 mode corresponding to the size of the type. */
5763 else if (AGGREGATE_TYPE_P (type))
5765 /* All other aggregate types are passed in an integer register
5766 in a mode corresponding to the size of the type. */
5767 HOST_WIDE_INT size = int_size_in_bytes (type);
5768 gcc_assert (size <= 32);
5770 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5772 /* ??? We probably should have made the same ABI change in
5773 3.4.0 as the one we made for unions. The latter was
5774 required by the SCD though, while the former is not
5775 specified, so we favored compatibility and efficiency.
5777 Now we're stuck for aggregates larger than 16 bytes,
5778 because OImode vanished in the meantime. Let's not
5779 try to be unduly clever, and simply follow the ABI
5780 for unions in that case. */
5781 if (mode == BLKmode)
5782 return function_arg_union_value (size, mode, 0, regbase);
5783 else
5784 mclass = MODE_INT;
5787 /* This must match PROMOTE_FUNCTION_MODE. */
5788 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5789 mode = word_mode;
5792 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
5793 regno = SPARC_FP_ARG_FIRST;
5794 else
5795 regno = regbase;
5797 return gen_rtx_REG (mode, regno);
5800 /* Do what is necessary for `va_start'. We look at the current function
5801 to determine if stdarg or varargs is used and return the address of
5802 the first unnamed parameter. */
5804 static rtx
5805 sparc_builtin_saveregs (void)
5807 int first_reg = crtl->args.info.words;
5808 rtx address;
5809 int regno;
5811 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
5812 emit_move_insn (gen_rtx_MEM (word_mode,
5813 gen_rtx_PLUS (Pmode,
5814 frame_pointer_rtx,
5815 GEN_INT (FIRST_PARM_OFFSET (0)
5816 + (UNITS_PER_WORD
5817 * regno)))),
5818 gen_rtx_REG (word_mode,
5819 SPARC_INCOMING_INT_ARG_FIRST + regno));
5821 address = gen_rtx_PLUS (Pmode,
5822 frame_pointer_rtx,
5823 GEN_INT (FIRST_PARM_OFFSET (0)
5824 + UNITS_PER_WORD * first_reg));
5826 return address;
5829 /* Implement `va_start' for stdarg. */
5831 static void
5832 sparc_va_start (tree valist, rtx nextarg)
5834 nextarg = expand_builtin_saveregs ();
5835 std_expand_builtin_va_start (valist, nextarg);
5838 /* Implement `va_arg' for stdarg. */
5840 static tree
5841 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
5842 gimple_seq *post_p)
5844 HOST_WIDE_INT size, rsize, align;
5845 tree addr, incr;
5846 bool indirect;
5847 tree ptrtype = build_pointer_type (type);
5849 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
5851 indirect = true;
5852 size = rsize = UNITS_PER_WORD;
5853 align = 0;
5855 else
5857 indirect = false;
5858 size = int_size_in_bytes (type);
5859 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5860 align = 0;
5862 if (TARGET_ARCH64)
5864 /* For SPARC64, objects requiring 16-byte alignment get it. */
5865 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
5866 align = 2 * UNITS_PER_WORD;
5868 /* SPARC-V9 ABI states that structures up to 16 bytes in size
5869 are left-justified in their slots. */
5870 if (AGGREGATE_TYPE_P (type))
5872 if (size == 0)
5873 size = rsize = UNITS_PER_WORD;
5874 else
5875 size = rsize;
5880 incr = valist;
5881 if (align)
5883 incr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr,
5884 size_int (align - 1));
5885 incr = fold_convert (sizetype, incr);
5886 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
5887 size_int (-align));
5888 incr = fold_convert (ptr_type_node, incr);
5891 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
5892 addr = incr;
5894 if (BYTES_BIG_ENDIAN && size < rsize)
5895 addr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr,
5896 size_int (rsize - size));
5898 if (indirect)
5900 addr = fold_convert (build_pointer_type (ptrtype), addr);
5901 addr = build_va_arg_indirect_ref (addr);
5904 /* If the address isn't aligned properly for the type, we need a temporary.
5905 FIXME: This is inefficient, usually we can do this in registers. */
5906 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
5908 tree tmp = create_tmp_var (type, "va_arg_tmp");
5909 tree dest_addr = build_fold_addr_expr (tmp);
5910 tree copy = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
5911 3, dest_addr, addr, size_int (rsize));
5912 TREE_ADDRESSABLE (tmp) = 1;
5913 gimplify_and_add (copy, pre_p);
5914 addr = dest_addr;
5917 else
5918 addr = fold_convert (ptrtype, addr);
5920 incr
5921 = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr, size_int (rsize));
5922 gimplify_assign (valist, incr, post_p);
5924 return build_va_arg_indirect_ref (addr);
5927 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
5928 Specify whether the vector mode is supported by the hardware. */
5930 static bool
5931 sparc_vector_mode_supported_p (enum machine_mode mode)
5933 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
5936 /* Return the string to output an unconditional branch to LABEL, which is
5937 the operand number of the label.
5939 DEST is the destination insn (i.e. the label), INSN is the source. */
5941 const char *
5942 output_ubranch (rtx dest, int label, rtx insn)
5944 static char string[64];
5945 bool v9_form = false;
5946 char *p;
5948 if (TARGET_V9 && INSN_ADDRESSES_SET_P ())
5950 int delta = (INSN_ADDRESSES (INSN_UID (dest))
5951 - INSN_ADDRESSES (INSN_UID (insn)));
5952 /* Leave some instructions for "slop". */
5953 if (delta >= -260000 && delta < 260000)
5954 v9_form = true;
5957 if (v9_form)
5958 strcpy (string, "ba%*,pt\t%%xcc, ");
5959 else
5960 strcpy (string, "b%*\t");
5962 p = strchr (string, '\0');
5963 *p++ = '%';
5964 *p++ = 'l';
5965 *p++ = '0' + label;
5966 *p++ = '%';
5967 *p++ = '(';
5968 *p = '\0';
5970 return string;
5973 /* Return the string to output a conditional branch to LABEL, which is
5974 the operand number of the label. OP is the conditional expression.
5975 XEXP (OP, 0) is assumed to be a condition code register (integer or
5976 floating point) and its mode specifies what kind of comparison we made.
5978 DEST is the destination insn (i.e. the label), INSN is the source.
5980 REVERSED is nonzero if we should reverse the sense of the comparison.
5982 ANNUL is nonzero if we should generate an annulling branch. */
5984 const char *
5985 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
5986 rtx insn)
5988 static char string[64];
5989 enum rtx_code code = GET_CODE (op);
5990 rtx cc_reg = XEXP (op, 0);
5991 enum machine_mode mode = GET_MODE (cc_reg);
5992 const char *labelno, *branch;
5993 int spaces = 8, far;
5994 char *p;
5996 /* v9 branches are limited to +-1MB. If it is too far away,
5997 change
5999 bne,pt %xcc, .LC30
6003 be,pn %xcc, .+12
6005 ba .LC30
6009 fbne,a,pn %fcc2, .LC29
6013 fbe,pt %fcc2, .+16
6015 ba .LC29 */
6017 far = TARGET_V9 && (get_attr_length (insn) >= 3);
6018 if (reversed ^ far)
6020 /* Reversal of FP compares takes care -- an ordered compare
6021 becomes an unordered compare and vice versa. */
6022 if (mode == CCFPmode || mode == CCFPEmode)
6023 code = reverse_condition_maybe_unordered (code);
6024 else
6025 code = reverse_condition (code);
6028 /* Start by writing the branch condition. */
6029 if (mode == CCFPmode || mode == CCFPEmode)
6031 switch (code)
6033 case NE:
6034 branch = "fbne";
6035 break;
6036 case EQ:
6037 branch = "fbe";
6038 break;
6039 case GE:
6040 branch = "fbge";
6041 break;
6042 case GT:
6043 branch = "fbg";
6044 break;
6045 case LE:
6046 branch = "fble";
6047 break;
6048 case LT:
6049 branch = "fbl";
6050 break;
6051 case UNORDERED:
6052 branch = "fbu";
6053 break;
6054 case ORDERED:
6055 branch = "fbo";
6056 break;
6057 case UNGT:
6058 branch = "fbug";
6059 break;
6060 case UNLT:
6061 branch = "fbul";
6062 break;
6063 case UNEQ:
6064 branch = "fbue";
6065 break;
6066 case UNGE:
6067 branch = "fbuge";
6068 break;
6069 case UNLE:
6070 branch = "fbule";
6071 break;
6072 case LTGT:
6073 branch = "fblg";
6074 break;
6076 default:
6077 gcc_unreachable ();
6080 /* ??? !v9: FP branches cannot be preceded by another floating point
6081 insn. Because there is currently no concept of pre-delay slots,
6082 we can fix this only by always emitting a nop before a floating
6083 point branch. */
6085 string[0] = '\0';
6086 if (! TARGET_V9)
6087 strcpy (string, "nop\n\t");
6088 strcat (string, branch);
6090 else
6092 switch (code)
6094 case NE:
6095 branch = "bne";
6096 break;
6097 case EQ:
6098 branch = "be";
6099 break;
6100 case GE:
6101 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
6102 branch = "bpos";
6103 else
6104 branch = "bge";
6105 break;
6106 case GT:
6107 branch = "bg";
6108 break;
6109 case LE:
6110 branch = "ble";
6111 break;
6112 case LT:
6113 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
6114 branch = "bneg";
6115 else
6116 branch = "bl";
6117 break;
6118 case GEU:
6119 branch = "bgeu";
6120 break;
6121 case GTU:
6122 branch = "bgu";
6123 break;
6124 case LEU:
6125 branch = "bleu";
6126 break;
6127 case LTU:
6128 branch = "blu";
6129 break;
6131 default:
6132 gcc_unreachable ();
6134 strcpy (string, branch);
6136 spaces -= strlen (branch);
6137 p = strchr (string, '\0');
6139 /* Now add the annulling, the label, and a possible noop. */
6140 if (annul && ! far)
6142 strcpy (p, ",a");
6143 p += 2;
6144 spaces -= 2;
6147 if (TARGET_V9)
6149 rtx note;
6150 int v8 = 0;
6152 if (! far && insn && INSN_ADDRESSES_SET_P ())
6154 int delta = (INSN_ADDRESSES (INSN_UID (dest))
6155 - INSN_ADDRESSES (INSN_UID (insn)));
6156 /* Leave some instructions for "slop". */
6157 if (delta < -260000 || delta >= 260000)
6158 v8 = 1;
6161 if (mode == CCFPmode || mode == CCFPEmode)
6163 static char v9_fcc_labelno[] = "%%fccX, ";
6164 /* Set the char indicating the number of the fcc reg to use. */
6165 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
6166 labelno = v9_fcc_labelno;
6167 if (v8)
6169 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
6170 labelno = "";
6173 else if (mode == CCXmode || mode == CCX_NOOVmode)
6175 labelno = "%%xcc, ";
6176 gcc_assert (! v8);
6178 else
6180 labelno = "%%icc, ";
6181 if (v8)
6182 labelno = "";
6185 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
6187 strcpy (p,
6188 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
6189 ? ",pt" : ",pn");
6190 p += 3;
6191 spaces -= 3;
6194 else
6195 labelno = "";
6197 if (spaces > 0)
6198 *p++ = '\t';
6199 else
6200 *p++ = ' ';
6201 strcpy (p, labelno);
6202 p = strchr (p, '\0');
6203 if (far)
6205 strcpy (p, ".+12\n\t nop\n\tb\t");
6206 /* Skip the next insn if requested or
6207 if we know that it will be a nop. */
6208 if (annul || ! final_sequence)
6209 p[3] = '6';
6210 p += 14;
6212 *p++ = '%';
6213 *p++ = 'l';
6214 *p++ = label + '0';
6215 *p++ = '%';
6216 *p++ = '#';
6217 *p = '\0';
6219 return string;
6222 /* Emit a library call comparison between floating point X and Y.
6223 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
6224 Return the new operator to be used in the comparison sequence.
6226 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
6227 values as arguments instead of the TFmode registers themselves,
6228 that's why we cannot call emit_float_lib_cmp. */
6231 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
6233 const char *qpfunc;
6234 rtx slot0, slot1, result, tem, tem2;
6235 enum machine_mode mode;
6236 enum rtx_code new_comparison;
6238 switch (comparison)
6240 case EQ:
6241 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
6242 break;
6244 case NE:
6245 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
6246 break;
6248 case GT:
6249 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
6250 break;
6252 case GE:
6253 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
6254 break;
6256 case LT:
6257 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
6258 break;
6260 case LE:
6261 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
6262 break;
6264 case ORDERED:
6265 case UNORDERED:
6266 case UNGT:
6267 case UNLT:
6268 case UNEQ:
6269 case UNGE:
6270 case UNLE:
6271 case LTGT:
6272 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
6273 break;
6275 default:
6276 gcc_unreachable ();
6279 if (TARGET_ARCH64)
6281 if (MEM_P (x))
6282 slot0 = x;
6283 else
6285 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
6286 emit_move_insn (slot0, x);
6289 if (MEM_P (y))
6290 slot1 = y;
6291 else
6293 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
6294 emit_move_insn (slot1, y);
6297 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, qpfunc), LCT_NORMAL,
6298 DImode, 2,
6299 XEXP (slot0, 0), Pmode,
6300 XEXP (slot1, 0), Pmode);
6301 mode = DImode;
6303 else
6305 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, qpfunc), LCT_NORMAL,
6306 SImode, 2,
6307 x, TFmode, y, TFmode);
6308 mode = SImode;
6312 /* Immediately move the result of the libcall into a pseudo
6313 register so reload doesn't clobber the value if it needs
6314 the return register for a spill reg. */
6315 result = gen_reg_rtx (mode);
6316 emit_move_insn (result, hard_libcall_value (mode));
6318 switch (comparison)
6320 default:
6321 return gen_rtx_NE (VOIDmode, result, const0_rtx);
6322 case ORDERED:
6323 case UNORDERED:
6324 new_comparison = (comparison == UNORDERED ? EQ : NE);
6325 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
6326 case UNGT:
6327 case UNGE:
6328 new_comparison = (comparison == UNGT ? GT : NE);
6329 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
6330 case UNLE:
6331 return gen_rtx_NE (VOIDmode, result, const2_rtx);
6332 case UNLT:
6333 tem = gen_reg_rtx (mode);
6334 if (TARGET_ARCH32)
6335 emit_insn (gen_andsi3 (tem, result, const1_rtx));
6336 else
6337 emit_insn (gen_anddi3 (tem, result, const1_rtx));
6338 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
6339 case UNEQ:
6340 case LTGT:
6341 tem = gen_reg_rtx (mode);
6342 if (TARGET_ARCH32)
6343 emit_insn (gen_addsi3 (tem, result, const1_rtx));
6344 else
6345 emit_insn (gen_adddi3 (tem, result, const1_rtx));
6346 tem2 = gen_reg_rtx (mode);
6347 if (TARGET_ARCH32)
6348 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
6349 else
6350 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
6351 new_comparison = (comparison == UNEQ ? EQ : NE);
6352 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
6355 gcc_unreachable ();
6358 /* Generate an unsigned DImode to FP conversion. This is the same code
6359 optabs would emit if we didn't have TFmode patterns. */
6361 void
6362 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
6364 rtx neglab, donelab, i0, i1, f0, in, out;
6366 out = operands[0];
6367 in = force_reg (DImode, operands[1]);
6368 neglab = gen_label_rtx ();
6369 donelab = gen_label_rtx ();
6370 i0 = gen_reg_rtx (DImode);
6371 i1 = gen_reg_rtx (DImode);
6372 f0 = gen_reg_rtx (mode);
6374 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
6376 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
6377 emit_jump_insn (gen_jump (donelab));
6378 emit_barrier ();
6380 emit_label (neglab);
6382 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
6383 emit_insn (gen_anddi3 (i1, in, const1_rtx));
6384 emit_insn (gen_iordi3 (i0, i0, i1));
6385 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
6386 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
6388 emit_label (donelab);
6391 /* Generate an FP to unsigned DImode conversion. This is the same code
6392 optabs would emit if we didn't have TFmode patterns. */
6394 void
6395 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
6397 rtx neglab, donelab, i0, i1, f0, in, out, limit;
6399 out = operands[0];
6400 in = force_reg (mode, operands[1]);
6401 neglab = gen_label_rtx ();
6402 donelab = gen_label_rtx ();
6403 i0 = gen_reg_rtx (DImode);
6404 i1 = gen_reg_rtx (DImode);
6405 limit = gen_reg_rtx (mode);
6406 f0 = gen_reg_rtx (mode);
6408 emit_move_insn (limit,
6409 CONST_DOUBLE_FROM_REAL_VALUE (
6410 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
6411 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
6413 emit_insn (gen_rtx_SET (VOIDmode,
6414 out,
6415 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
6416 emit_jump_insn (gen_jump (donelab));
6417 emit_barrier ();
6419 emit_label (neglab);
6421 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
6422 emit_insn (gen_rtx_SET (VOIDmode,
6424 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
6425 emit_insn (gen_movdi (i1, const1_rtx));
6426 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
6427 emit_insn (gen_xordi3 (out, i0, i1));
6429 emit_label (donelab);
6432 /* Return the string to output a conditional branch to LABEL, testing
6433 register REG. LABEL is the operand number of the label; REG is the
6434 operand number of the reg. OP is the conditional expression. The mode
6435 of REG says what kind of comparison we made.
6437 DEST is the destination insn (i.e. the label), INSN is the source.
6439 REVERSED is nonzero if we should reverse the sense of the comparison.
6441 ANNUL is nonzero if we should generate an annulling branch. */
6443 const char *
6444 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
6445 int annul, rtx insn)
6447 static char string[64];
6448 enum rtx_code code = GET_CODE (op);
6449 enum machine_mode mode = GET_MODE (XEXP (op, 0));
6450 rtx note;
6451 int far;
6452 char *p;
6454 /* branch on register are limited to +-128KB. If it is too far away,
6455 change
6457 brnz,pt %g1, .LC30
6461 brz,pn %g1, .+12
6463 ba,pt %xcc, .LC30
6467 brgez,a,pn %o1, .LC29
6471 brlz,pt %o1, .+16
6473 ba,pt %xcc, .LC29 */
6475 far = get_attr_length (insn) >= 3;
6477 /* If not floating-point or if EQ or NE, we can just reverse the code. */
6478 if (reversed ^ far)
6479 code = reverse_condition (code);
6481 /* Only 64 bit versions of these instructions exist. */
6482 gcc_assert (mode == DImode);
6484 /* Start by writing the branch condition. */
6486 switch (code)
6488 case NE:
6489 strcpy (string, "brnz");
6490 break;
6492 case EQ:
6493 strcpy (string, "brz");
6494 break;
6496 case GE:
6497 strcpy (string, "brgez");
6498 break;
6500 case LT:
6501 strcpy (string, "brlz");
6502 break;
6504 case LE:
6505 strcpy (string, "brlez");
6506 break;
6508 case GT:
6509 strcpy (string, "brgz");
6510 break;
6512 default:
6513 gcc_unreachable ();
6516 p = strchr (string, '\0');
6518 /* Now add the annulling, reg, label, and nop. */
6519 if (annul && ! far)
6521 strcpy (p, ",a");
6522 p += 2;
6525 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
6527 strcpy (p,
6528 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
6529 ? ",pt" : ",pn");
6530 p += 3;
6533 *p = p < string + 8 ? '\t' : ' ';
6534 p++;
6535 *p++ = '%';
6536 *p++ = '0' + reg;
6537 *p++ = ',';
6538 *p++ = ' ';
6539 if (far)
6541 int veryfar = 1, delta;
6543 if (INSN_ADDRESSES_SET_P ())
6545 delta = (INSN_ADDRESSES (INSN_UID (dest))
6546 - INSN_ADDRESSES (INSN_UID (insn)));
6547 /* Leave some instructions for "slop". */
6548 if (delta >= -260000 && delta < 260000)
6549 veryfar = 0;
6552 strcpy (p, ".+12\n\t nop\n\t");
6553 /* Skip the next insn if requested or
6554 if we know that it will be a nop. */
6555 if (annul || ! final_sequence)
6556 p[3] = '6';
6557 p += 12;
6558 if (veryfar)
6560 strcpy (p, "b\t");
6561 p += 2;
6563 else
6565 strcpy (p, "ba,pt\t%%xcc, ");
6566 p += 13;
6569 *p++ = '%';
6570 *p++ = 'l';
6571 *p++ = '0' + label;
6572 *p++ = '%';
6573 *p++ = '#';
6574 *p = '\0';
6576 return string;
6579 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
6580 Such instructions cannot be used in the delay slot of return insn on v9.
6581 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
6584 static int
6585 epilogue_renumber (register rtx *where, int test)
6587 register const char *fmt;
6588 register int i;
6589 register enum rtx_code code;
6591 if (*where == 0)
6592 return 0;
6594 code = GET_CODE (*where);
6596 switch (code)
6598 case REG:
6599 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
6600 return 1;
6601 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
6602 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
6603 case SCRATCH:
6604 case CC0:
6605 case PC:
6606 case CONST_INT:
6607 case CONST_DOUBLE:
6608 return 0;
6610 /* Do not replace the frame pointer with the stack pointer because
6611 it can cause the delayed instruction to load below the stack.
6612 This occurs when instructions like:
6614 (set (reg/i:SI 24 %i0)
6615 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
6616 (const_int -20 [0xffffffec])) 0))
6618 are in the return delayed slot. */
6619 case PLUS:
6620 if (GET_CODE (XEXP (*where, 0)) == REG
6621 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
6622 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
6623 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
6624 return 1;
6625 break;
6627 case MEM:
6628 if (SPARC_STACK_BIAS
6629 && GET_CODE (XEXP (*where, 0)) == REG
6630 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
6631 return 1;
6632 break;
6634 default:
6635 break;
6638 fmt = GET_RTX_FORMAT (code);
6640 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
6642 if (fmt[i] == 'E')
6644 register int j;
6645 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
6646 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
6647 return 1;
6649 else if (fmt[i] == 'e'
6650 && epilogue_renumber (&(XEXP (*where, i)), test))
6651 return 1;
6653 return 0;
6656 /* Leaf functions and non-leaf functions have different needs. */
6658 static const int
6659 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
6661 static const int
6662 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
6664 static const int *const reg_alloc_orders[] = {
6665 reg_leaf_alloc_order,
6666 reg_nonleaf_alloc_order};
6668 void
6669 order_regs_for_local_alloc (void)
6671 static int last_order_nonleaf = 1;
6673 if (df_regs_ever_live_p (15) != last_order_nonleaf)
6675 last_order_nonleaf = !last_order_nonleaf;
6676 memcpy ((char *) reg_alloc_order,
6677 (const char *) reg_alloc_orders[last_order_nonleaf],
6678 FIRST_PSEUDO_REGISTER * sizeof (int));
6682 /* Return 1 if REG and MEM are legitimate enough to allow the various
6683 mem<-->reg splits to be run. */
6686 sparc_splitdi_legitimate (rtx reg, rtx mem)
6688 /* Punt if we are here by mistake. */
6689 gcc_assert (reload_completed);
6691 /* We must have an offsettable memory reference. */
6692 if (! offsettable_memref_p (mem))
6693 return 0;
6695 /* If we have legitimate args for ldd/std, we do not want
6696 the split to happen. */
6697 if ((REGNO (reg) % 2) == 0
6698 && mem_min_alignment (mem, 8))
6699 return 0;
6701 /* Success. */
6702 return 1;
6705 /* Return 1 if x and y are some kind of REG and they refer to
6706 different hard registers. This test is guaranteed to be
6707 run after reload. */
6710 sparc_absnegfloat_split_legitimate (rtx x, rtx y)
6712 if (GET_CODE (x) != REG)
6713 return 0;
6714 if (GET_CODE (y) != REG)
6715 return 0;
6716 if (REGNO (x) == REGNO (y))
6717 return 0;
6718 return 1;
6721 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
6722 This makes them candidates for using ldd and std insns.
6724 Note reg1 and reg2 *must* be hard registers. */
6727 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
6729 /* We might have been passed a SUBREG. */
6730 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
6731 return 0;
6733 if (REGNO (reg1) % 2 != 0)
6734 return 0;
6736 /* Integer ldd is deprecated in SPARC V9 */
6737 if (TARGET_V9 && REGNO (reg1) < 32)
6738 return 0;
6740 return (REGNO (reg1) == REGNO (reg2) - 1);
6743 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
6744 an ldd or std insn.
6746 This can only happen when addr1 and addr2, the addresses in mem1
6747 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
6748 addr1 must also be aligned on a 64-bit boundary.
6750 Also iff dependent_reg_rtx is not null it should not be used to
6751 compute the address for mem1, i.e. we cannot optimize a sequence
6752 like:
6753 ld [%o0], %o0
6754 ld [%o0 + 4], %o1
6756 ldd [%o0], %o0
6757 nor:
6758 ld [%g3 + 4], %g3
6759 ld [%g3], %g2
6761 ldd [%g3], %g2
6763 But, note that the transformation from:
6764 ld [%g2 + 4], %g3
6765 ld [%g2], %g2
6767 ldd [%g2], %g2
6768 is perfectly fine. Thus, the peephole2 patterns always pass us
6769 the destination register of the first load, never the second one.
6771 For stores we don't have a similar problem, so dependent_reg_rtx is
6772 NULL_RTX. */
6775 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
6777 rtx addr1, addr2;
6778 unsigned int reg1;
6779 HOST_WIDE_INT offset1;
6781 /* The mems cannot be volatile. */
6782 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
6783 return 0;
6785 /* MEM1 should be aligned on a 64-bit boundary. */
6786 if (MEM_ALIGN (mem1) < 64)
6787 return 0;
6789 addr1 = XEXP (mem1, 0);
6790 addr2 = XEXP (mem2, 0);
6792 /* Extract a register number and offset (if used) from the first addr. */
6793 if (GET_CODE (addr1) == PLUS)
6795 /* If not a REG, return zero. */
6796 if (GET_CODE (XEXP (addr1, 0)) != REG)
6797 return 0;
6798 else
6800 reg1 = REGNO (XEXP (addr1, 0));
6801 /* The offset must be constant! */
6802 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
6803 return 0;
6804 offset1 = INTVAL (XEXP (addr1, 1));
6807 else if (GET_CODE (addr1) != REG)
6808 return 0;
6809 else
6811 reg1 = REGNO (addr1);
6812 /* This was a simple (mem (reg)) expression. Offset is 0. */
6813 offset1 = 0;
6816 /* Make sure the second address is a (mem (plus (reg) (const_int). */
6817 if (GET_CODE (addr2) != PLUS)
6818 return 0;
6820 if (GET_CODE (XEXP (addr2, 0)) != REG
6821 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
6822 return 0;
6824 if (reg1 != REGNO (XEXP (addr2, 0)))
6825 return 0;
6827 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
6828 return 0;
6830 /* The first offset must be evenly divisible by 8 to ensure the
6831 address is 64 bit aligned. */
6832 if (offset1 % 8 != 0)
6833 return 0;
6835 /* The offset for the second addr must be 4 more than the first addr. */
6836 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
6837 return 0;
6839 /* All the tests passed. addr1 and addr2 are valid for ldd and std
6840 instructions. */
6841 return 1;
6844 /* Return 1 if reg is a pseudo, or is the first register in
6845 a hard register pair. This makes it suitable for use in
6846 ldd and std insns. */
6849 register_ok_for_ldd (rtx reg)
6851 /* We might have been passed a SUBREG. */
6852 if (!REG_P (reg))
6853 return 0;
6855 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
6856 return (REGNO (reg) % 2 == 0);
6858 return 1;
6861 /* Return 1 if OP is a memory whose address is known to be
6862 aligned to 8-byte boundary, or a pseudo during reload.
6863 This makes it suitable for use in ldd and std insns. */
6866 memory_ok_for_ldd (rtx op)
6868 if (MEM_P (op))
6870 /* In 64-bit mode, we assume that the address is word-aligned. */
6871 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
6872 return 0;
6874 if ((reload_in_progress || reload_completed)
6875 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
6876 return 0;
6878 else if (REG_P (op) && REGNO (op) >= FIRST_PSEUDO_REGISTER)
6880 if (!(reload_in_progress && reg_renumber [REGNO (op)] < 0))
6881 return 0;
6883 else
6884 return 0;
6886 return 1;
6889 /* Print operand X (an rtx) in assembler syntax to file FILE.
6890 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
6891 For `%' followed by punctuation, CODE is the punctuation and X is null. */
6893 void
6894 print_operand (FILE *file, rtx x, int code)
6896 switch (code)
6898 case '#':
6899 /* Output an insn in a delay slot. */
6900 if (final_sequence)
6901 sparc_indent_opcode = 1;
6902 else
6903 fputs ("\n\t nop", file);
6904 return;
6905 case '*':
6906 /* Output an annul flag if there's nothing for the delay slot and we
6907 are optimizing. This is always used with '(' below.
6908 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
6909 this is a dbx bug. So, we only do this when optimizing.
6910 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
6911 Always emit a nop in case the next instruction is a branch. */
6912 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
6913 fputs (",a", file);
6914 return;
6915 case '(':
6916 /* Output a 'nop' if there's nothing for the delay slot and we are
6917 not optimizing. This is always used with '*' above. */
6918 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
6919 fputs ("\n\t nop", file);
6920 else if (final_sequence)
6921 sparc_indent_opcode = 1;
6922 return;
6923 case ')':
6924 /* Output the right displacement from the saved PC on function return.
6925 The caller may have placed an "unimp" insn immediately after the call
6926 so we have to account for it. This insn is used in the 32-bit ABI
6927 when calling a function that returns a non zero-sized structure. The
6928 64-bit ABI doesn't have it. Be careful to have this test be the same
6929 as that used on the call. The exception here is that when
6930 sparc_std_struct_return is enabled, the psABI is followed exactly
6931 and the adjustment is made by the code in sparc_struct_value_rtx.
6932 The call emitted is the same when sparc_std_struct_return is
6933 present. */
6934 if (! TARGET_ARCH64
6935 && cfun->returns_struct
6936 && ! sparc_std_struct_return
6937 && (TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
6938 == INTEGER_CST)
6939 && ! integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
6940 fputs ("12", file);
6941 else
6942 fputc ('8', file);
6943 return;
6944 case '_':
6945 /* Output the Embedded Medium/Anywhere code model base register. */
6946 fputs (EMBMEDANY_BASE_REG, file);
6947 return;
6948 case '&':
6949 /* Print some local dynamic TLS name. */
6950 assemble_name (file, get_some_local_dynamic_name ());
6951 return;
6953 case 'Y':
6954 /* Adjust the operand to take into account a RESTORE operation. */
6955 if (GET_CODE (x) == CONST_INT)
6956 break;
6957 else if (GET_CODE (x) != REG)
6958 output_operand_lossage ("invalid %%Y operand");
6959 else if (REGNO (x) < 8)
6960 fputs (reg_names[REGNO (x)], file);
6961 else if (REGNO (x) >= 24 && REGNO (x) < 32)
6962 fputs (reg_names[REGNO (x)-16], file);
6963 else
6964 output_operand_lossage ("invalid %%Y operand");
6965 return;
6966 case 'L':
6967 /* Print out the low order register name of a register pair. */
6968 if (WORDS_BIG_ENDIAN)
6969 fputs (reg_names[REGNO (x)+1], file);
6970 else
6971 fputs (reg_names[REGNO (x)], file);
6972 return;
6973 case 'H':
6974 /* Print out the high order register name of a register pair. */
6975 if (WORDS_BIG_ENDIAN)
6976 fputs (reg_names[REGNO (x)], file);
6977 else
6978 fputs (reg_names[REGNO (x)+1], file);
6979 return;
6980 case 'R':
6981 /* Print out the second register name of a register pair or quad.
6982 I.e., R (%o0) => %o1. */
6983 fputs (reg_names[REGNO (x)+1], file);
6984 return;
6985 case 'S':
6986 /* Print out the third register name of a register quad.
6987 I.e., S (%o0) => %o2. */
6988 fputs (reg_names[REGNO (x)+2], file);
6989 return;
6990 case 'T':
6991 /* Print out the fourth register name of a register quad.
6992 I.e., T (%o0) => %o3. */
6993 fputs (reg_names[REGNO (x)+3], file);
6994 return;
6995 case 'x':
6996 /* Print a condition code register. */
6997 if (REGNO (x) == SPARC_ICC_REG)
6999 /* We don't handle CC[X]_NOOVmode because they're not supposed
7000 to occur here. */
7001 if (GET_MODE (x) == CCmode)
7002 fputs ("%icc", file);
7003 else if (GET_MODE (x) == CCXmode)
7004 fputs ("%xcc", file);
7005 else
7006 gcc_unreachable ();
7008 else
7009 /* %fccN register */
7010 fputs (reg_names[REGNO (x)], file);
7011 return;
7012 case 'm':
7013 /* Print the operand's address only. */
7014 output_address (XEXP (x, 0));
7015 return;
7016 case 'r':
7017 /* In this case we need a register. Use %g0 if the
7018 operand is const0_rtx. */
7019 if (x == const0_rtx
7020 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
7022 fputs ("%g0", file);
7023 return;
7025 else
7026 break;
7028 case 'A':
7029 switch (GET_CODE (x))
7031 case IOR: fputs ("or", file); break;
7032 case AND: fputs ("and", file); break;
7033 case XOR: fputs ("xor", file); break;
7034 default: output_operand_lossage ("invalid %%A operand");
7036 return;
7038 case 'B':
7039 switch (GET_CODE (x))
7041 case IOR: fputs ("orn", file); break;
7042 case AND: fputs ("andn", file); break;
7043 case XOR: fputs ("xnor", file); break;
7044 default: output_operand_lossage ("invalid %%B operand");
7046 return;
7048 /* These are used by the conditional move instructions. */
7049 case 'c' :
7050 case 'C':
7052 enum rtx_code rc = GET_CODE (x);
7054 if (code == 'c')
7056 enum machine_mode mode = GET_MODE (XEXP (x, 0));
7057 if (mode == CCFPmode || mode == CCFPEmode)
7058 rc = reverse_condition_maybe_unordered (GET_CODE (x));
7059 else
7060 rc = reverse_condition (GET_CODE (x));
7062 switch (rc)
7064 case NE: fputs ("ne", file); break;
7065 case EQ: fputs ("e", file); break;
7066 case GE: fputs ("ge", file); break;
7067 case GT: fputs ("g", file); break;
7068 case LE: fputs ("le", file); break;
7069 case LT: fputs ("l", file); break;
7070 case GEU: fputs ("geu", file); break;
7071 case GTU: fputs ("gu", file); break;
7072 case LEU: fputs ("leu", file); break;
7073 case LTU: fputs ("lu", file); break;
7074 case LTGT: fputs ("lg", file); break;
7075 case UNORDERED: fputs ("u", file); break;
7076 case ORDERED: fputs ("o", file); break;
7077 case UNLT: fputs ("ul", file); break;
7078 case UNLE: fputs ("ule", file); break;
7079 case UNGT: fputs ("ug", file); break;
7080 case UNGE: fputs ("uge", file); break;
7081 case UNEQ: fputs ("ue", file); break;
7082 default: output_operand_lossage (code == 'c'
7083 ? "invalid %%c operand"
7084 : "invalid %%C operand");
7086 return;
7089 /* These are used by the movr instruction pattern. */
7090 case 'd':
7091 case 'D':
7093 enum rtx_code rc = (code == 'd'
7094 ? reverse_condition (GET_CODE (x))
7095 : GET_CODE (x));
7096 switch (rc)
7098 case NE: fputs ("ne", file); break;
7099 case EQ: fputs ("e", file); break;
7100 case GE: fputs ("gez", file); break;
7101 case LT: fputs ("lz", file); break;
7102 case LE: fputs ("lez", file); break;
7103 case GT: fputs ("gz", file); break;
7104 default: output_operand_lossage (code == 'd'
7105 ? "invalid %%d operand"
7106 : "invalid %%D operand");
7108 return;
7111 case 'b':
7113 /* Print a sign-extended character. */
7114 int i = trunc_int_for_mode (INTVAL (x), QImode);
7115 fprintf (file, "%d", i);
7116 return;
7119 case 'f':
7120 /* Operand must be a MEM; write its address. */
7121 if (GET_CODE (x) != MEM)
7122 output_operand_lossage ("invalid %%f operand");
7123 output_address (XEXP (x, 0));
7124 return;
7126 case 's':
7128 /* Print a sign-extended 32-bit value. */
7129 HOST_WIDE_INT i;
7130 if (GET_CODE(x) == CONST_INT)
7131 i = INTVAL (x);
7132 else if (GET_CODE(x) == CONST_DOUBLE)
7133 i = CONST_DOUBLE_LOW (x);
7134 else
7136 output_operand_lossage ("invalid %%s operand");
7137 return;
7139 i = trunc_int_for_mode (i, SImode);
7140 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
7141 return;
7144 case 0:
7145 /* Do nothing special. */
7146 break;
7148 default:
7149 /* Undocumented flag. */
7150 output_operand_lossage ("invalid operand output code");
7153 if (GET_CODE (x) == REG)
7154 fputs (reg_names[REGNO (x)], file);
7155 else if (GET_CODE (x) == MEM)
7157 fputc ('[', file);
7158 /* Poor Sun assembler doesn't understand absolute addressing. */
7159 if (CONSTANT_P (XEXP (x, 0)))
7160 fputs ("%g0+", file);
7161 output_address (XEXP (x, 0));
7162 fputc (']', file);
7164 else if (GET_CODE (x) == HIGH)
7166 fputs ("%hi(", file);
7167 output_addr_const (file, XEXP (x, 0));
7168 fputc (')', file);
7170 else if (GET_CODE (x) == LO_SUM)
7172 print_operand (file, XEXP (x, 0), 0);
7173 if (TARGET_CM_MEDMID)
7174 fputs ("+%l44(", file);
7175 else
7176 fputs ("+%lo(", file);
7177 output_addr_const (file, XEXP (x, 1));
7178 fputc (')', file);
7180 else if (GET_CODE (x) == CONST_DOUBLE
7181 && (GET_MODE (x) == VOIDmode
7182 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
7184 if (CONST_DOUBLE_HIGH (x) == 0)
7185 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
7186 else if (CONST_DOUBLE_HIGH (x) == -1
7187 && CONST_DOUBLE_LOW (x) < 0)
7188 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
7189 else
7190 output_operand_lossage ("long long constant not a valid immediate operand");
7192 else if (GET_CODE (x) == CONST_DOUBLE)
7193 output_operand_lossage ("floating point constant not a valid immediate operand");
7194 else { output_addr_const (file, x); }
7197 /* Target hook for assembling integer objects. The sparc version has
7198 special handling for aligned DI-mode objects. */
7200 static bool
7201 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
7203 /* ??? We only output .xword's for symbols and only then in environments
7204 where the assembler can handle them. */
7205 if (aligned_p && size == 8
7206 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
7208 if (TARGET_V9)
7210 assemble_integer_with_op ("\t.xword\t", x);
7211 return true;
7213 else
7215 assemble_aligned_integer (4, const0_rtx);
7216 assemble_aligned_integer (4, x);
7217 return true;
7220 return default_assemble_integer (x, size, aligned_p);
7223 /* Return the value of a code used in the .proc pseudo-op that says
7224 what kind of result this function returns. For non-C types, we pick
7225 the closest C type. */
7227 #ifndef SHORT_TYPE_SIZE
7228 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
7229 #endif
7231 #ifndef INT_TYPE_SIZE
7232 #define INT_TYPE_SIZE BITS_PER_WORD
7233 #endif
7235 #ifndef LONG_TYPE_SIZE
7236 #define LONG_TYPE_SIZE BITS_PER_WORD
7237 #endif
7239 #ifndef LONG_LONG_TYPE_SIZE
7240 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
7241 #endif
7243 #ifndef FLOAT_TYPE_SIZE
7244 #define FLOAT_TYPE_SIZE BITS_PER_WORD
7245 #endif
7247 #ifndef DOUBLE_TYPE_SIZE
7248 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
7249 #endif
7251 #ifndef LONG_DOUBLE_TYPE_SIZE
7252 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
7253 #endif
7255 unsigned long
7256 sparc_type_code (register tree type)
7258 register unsigned long qualifiers = 0;
7259 register unsigned shift;
7261 /* Only the first 30 bits of the qualifier are valid. We must refrain from
7262 setting more, since some assemblers will give an error for this. Also,
7263 we must be careful to avoid shifts of 32 bits or more to avoid getting
7264 unpredictable results. */
7266 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
7268 switch (TREE_CODE (type))
7270 case ERROR_MARK:
7271 return qualifiers;
7273 case ARRAY_TYPE:
7274 qualifiers |= (3 << shift);
7275 break;
7277 case FUNCTION_TYPE:
7278 case METHOD_TYPE:
7279 qualifiers |= (2 << shift);
7280 break;
7282 case POINTER_TYPE:
7283 case REFERENCE_TYPE:
7284 case OFFSET_TYPE:
7285 qualifiers |= (1 << shift);
7286 break;
7288 case RECORD_TYPE:
7289 return (qualifiers | 8);
7291 case UNION_TYPE:
7292 case QUAL_UNION_TYPE:
7293 return (qualifiers | 9);
7295 case ENUMERAL_TYPE:
7296 return (qualifiers | 10);
7298 case VOID_TYPE:
7299 return (qualifiers | 16);
7301 case INTEGER_TYPE:
7302 /* If this is a range type, consider it to be the underlying
7303 type. */
7304 if (TREE_TYPE (type) != 0)
7305 break;
7307 /* Carefully distinguish all the standard types of C,
7308 without messing up if the language is not C. We do this by
7309 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
7310 look at both the names and the above fields, but that's redundant.
7311 Any type whose size is between two C types will be considered
7312 to be the wider of the two types. Also, we do not have a
7313 special code to use for "long long", so anything wider than
7314 long is treated the same. Note that we can't distinguish
7315 between "int" and "long" in this code if they are the same
7316 size, but that's fine, since neither can the assembler. */
7318 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
7319 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
7321 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
7322 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
7324 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
7325 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
7327 else
7328 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
7330 case REAL_TYPE:
7331 /* If this is a range type, consider it to be the underlying
7332 type. */
7333 if (TREE_TYPE (type) != 0)
7334 break;
7336 /* Carefully distinguish all the standard types of C,
7337 without messing up if the language is not C. */
7339 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
7340 return (qualifiers | 6);
7342 else
7343 return (qualifiers | 7);
7345 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
7346 /* ??? We need to distinguish between double and float complex types,
7347 but I don't know how yet because I can't reach this code from
7348 existing front-ends. */
7349 return (qualifiers | 7); /* Who knows? */
7351 case VECTOR_TYPE:
7352 case BOOLEAN_TYPE: /* Boolean truth value type. */
7353 case LANG_TYPE: /* ? */
7354 return qualifiers;
7356 default:
7357 gcc_unreachable (); /* Not a type! */
7361 return qualifiers;
7364 /* Nested function support. */
7366 /* Emit RTL insns to initialize the variable parts of a trampoline.
7367 FNADDR is an RTX for the address of the function's pure code.
7368 CXT is an RTX for the static chain value for the function.
7370 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
7371 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
7372 (to store insns). This is a bit excessive. Perhaps a different
7373 mechanism would be better here.
7375 Emit enough FLUSH insns to synchronize the data and instruction caches. */
7377 void
7378 sparc_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
7380 /* SPARC 32-bit trampoline:
7382 sethi %hi(fn), %g1
7383 sethi %hi(static), %g2
7384 jmp %g1+%lo(fn)
7385 or %g2, %lo(static), %g2
7387 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
7388 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
7391 emit_move_insn
7392 (gen_rtx_MEM (SImode, plus_constant (tramp, 0)),
7393 expand_binop (SImode, ior_optab,
7394 expand_shift (RSHIFT_EXPR, SImode, fnaddr,
7395 size_int (10), 0, 1),
7396 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
7397 NULL_RTX, 1, OPTAB_DIRECT));
7399 emit_move_insn
7400 (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
7401 expand_binop (SImode, ior_optab,
7402 expand_shift (RSHIFT_EXPR, SImode, cxt,
7403 size_int (10), 0, 1),
7404 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
7405 NULL_RTX, 1, OPTAB_DIRECT));
7407 emit_move_insn
7408 (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
7409 expand_binop (SImode, ior_optab,
7410 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
7411 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
7412 NULL_RTX, 1, OPTAB_DIRECT));
7414 emit_move_insn
7415 (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
7416 expand_binop (SImode, ior_optab,
7417 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
7418 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
7419 NULL_RTX, 1, OPTAB_DIRECT));
7421 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
7422 aligned on a 16 byte boundary so one flush clears it all. */
7423 emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, tramp))));
7424 if (sparc_cpu != PROCESSOR_ULTRASPARC
7425 && sparc_cpu != PROCESSOR_ULTRASPARC3
7426 && sparc_cpu != PROCESSOR_NIAGARA
7427 && sparc_cpu != PROCESSOR_NIAGARA2)
7428 emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
7429 plus_constant (tramp, 8)))));
7431 /* Call __enable_execute_stack after writing onto the stack to make sure
7432 the stack address is accessible. */
7433 #ifdef ENABLE_EXECUTE_STACK
7434 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
7435 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
7436 #endif
7440 /* The 64-bit version is simpler because it makes more sense to load the
7441 values as "immediate" data out of the trampoline. It's also easier since
7442 we can read the PC without clobbering a register. */
7444 void
7445 sparc64_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
7447 /* SPARC 64-bit trampoline:
7449 rd %pc, %g1
7450 ldx [%g1+24], %g5
7451 jmp %g5
7452 ldx [%g1+16], %g5
7453 +16 bytes data
7456 emit_move_insn (gen_rtx_MEM (SImode, tramp),
7457 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
7458 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
7459 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
7460 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
7461 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
7462 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
7463 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
7464 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), cxt);
7465 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 24)), fnaddr);
7466 emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, tramp))));
7468 if (sparc_cpu != PROCESSOR_ULTRASPARC
7469 && sparc_cpu != PROCESSOR_ULTRASPARC3
7470 && sparc_cpu != PROCESSOR_NIAGARA
7471 && sparc_cpu != PROCESSOR_NIAGARA2)
7472 emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8)))));
7474 /* Call __enable_execute_stack after writing onto the stack to make sure
7475 the stack address is accessible. */
7476 #ifdef ENABLE_EXECUTE_STACK
7477 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
7478 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
7479 #endif
7482 /* Adjust the cost of a scheduling dependency. Return the new cost of
7483 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
7485 static int
7486 supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
7488 enum attr_type insn_type;
7490 if (! recog_memoized (insn))
7491 return 0;
7493 insn_type = get_attr_type (insn);
7495 if (REG_NOTE_KIND (link) == 0)
7497 /* Data dependency; DEP_INSN writes a register that INSN reads some
7498 cycles later. */
7500 /* if a load, then the dependence must be on the memory address;
7501 add an extra "cycle". Note that the cost could be two cycles
7502 if the reg was written late in an instruction group; we ca not tell
7503 here. */
7504 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
7505 return cost + 3;
7507 /* Get the delay only if the address of the store is the dependence. */
7508 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
7510 rtx pat = PATTERN(insn);
7511 rtx dep_pat = PATTERN (dep_insn);
7513 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
7514 return cost; /* This should not happen! */
7516 /* The dependency between the two instructions was on the data that
7517 is being stored. Assume that this implies that the address of the
7518 store is not dependent. */
7519 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
7520 return cost;
7522 return cost + 3; /* An approximation. */
7525 /* A shift instruction cannot receive its data from an instruction
7526 in the same cycle; add a one cycle penalty. */
7527 if (insn_type == TYPE_SHIFT)
7528 return cost + 3; /* Split before cascade into shift. */
7530 else
7532 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
7533 INSN writes some cycles later. */
7535 /* These are only significant for the fpu unit; writing a fp reg before
7536 the fpu has finished with it stalls the processor. */
7538 /* Reusing an integer register causes no problems. */
7539 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
7540 return 0;
7543 return cost;
7546 static int
7547 hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
7549 enum attr_type insn_type, dep_type;
7550 rtx pat = PATTERN(insn);
7551 rtx dep_pat = PATTERN (dep_insn);
7553 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
7554 return cost;
7556 insn_type = get_attr_type (insn);
7557 dep_type = get_attr_type (dep_insn);
7559 switch (REG_NOTE_KIND (link))
7561 case 0:
7562 /* Data dependency; DEP_INSN writes a register that INSN reads some
7563 cycles later. */
7565 switch (insn_type)
7567 case TYPE_STORE:
7568 case TYPE_FPSTORE:
7569 /* Get the delay iff the address of the store is the dependence. */
7570 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
7571 return cost;
7573 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
7574 return cost;
7575 return cost + 3;
7577 case TYPE_LOAD:
7578 case TYPE_SLOAD:
7579 case TYPE_FPLOAD:
7580 /* If a load, then the dependence must be on the memory address. If
7581 the addresses aren't equal, then it might be a false dependency */
7582 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
7584 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
7585 || GET_CODE (SET_DEST (dep_pat)) != MEM
7586 || GET_CODE (SET_SRC (pat)) != MEM
7587 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
7588 XEXP (SET_SRC (pat), 0)))
7589 return cost + 2;
7591 return cost + 8;
7593 break;
7595 case TYPE_BRANCH:
7596 /* Compare to branch latency is 0. There is no benefit from
7597 separating compare and branch. */
7598 if (dep_type == TYPE_COMPARE)
7599 return 0;
7600 /* Floating point compare to branch latency is less than
7601 compare to conditional move. */
7602 if (dep_type == TYPE_FPCMP)
7603 return cost - 1;
7604 break;
7605 default:
7606 break;
7608 break;
7610 case REG_DEP_ANTI:
7611 /* Anti-dependencies only penalize the fpu unit. */
7612 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
7613 return 0;
7614 break;
7616 default:
7617 break;
7620 return cost;
7623 static int
7624 sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
7626 switch (sparc_cpu)
7628 case PROCESSOR_SUPERSPARC:
7629 cost = supersparc_adjust_cost (insn, link, dep, cost);
7630 break;
7631 case PROCESSOR_HYPERSPARC:
7632 case PROCESSOR_SPARCLITE86X:
7633 cost = hypersparc_adjust_cost (insn, link, dep, cost);
7634 break;
7635 default:
7636 break;
7638 return cost;
7641 static void
7642 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7643 int sched_verbose ATTRIBUTE_UNUSED,
7644 int max_ready ATTRIBUTE_UNUSED)
7648 static int
7649 sparc_use_sched_lookahead (void)
7651 if (sparc_cpu == PROCESSOR_NIAGARA
7652 || sparc_cpu == PROCESSOR_NIAGARA2)
7653 return 0;
7654 if (sparc_cpu == PROCESSOR_ULTRASPARC
7655 || sparc_cpu == PROCESSOR_ULTRASPARC3)
7656 return 4;
7657 if ((1 << sparc_cpu) &
7658 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
7659 (1 << PROCESSOR_SPARCLITE86X)))
7660 return 3;
7661 return 0;
7664 static int
7665 sparc_issue_rate (void)
7667 switch (sparc_cpu)
7669 case PROCESSOR_NIAGARA:
7670 case PROCESSOR_NIAGARA2:
7671 default:
7672 return 1;
7673 case PROCESSOR_V9:
7674 /* Assume V9 processors are capable of at least dual-issue. */
7675 return 2;
7676 case PROCESSOR_SUPERSPARC:
7677 return 3;
7678 case PROCESSOR_HYPERSPARC:
7679 case PROCESSOR_SPARCLITE86X:
7680 return 2;
7681 case PROCESSOR_ULTRASPARC:
7682 case PROCESSOR_ULTRASPARC3:
7683 return 4;
7687 static int
7688 set_extends (rtx insn)
7690 register rtx pat = PATTERN (insn);
7692 switch (GET_CODE (SET_SRC (pat)))
7694 /* Load and some shift instructions zero extend. */
7695 case MEM:
7696 case ZERO_EXTEND:
7697 /* sethi clears the high bits */
7698 case HIGH:
7699 /* LO_SUM is used with sethi. sethi cleared the high
7700 bits and the values used with lo_sum are positive */
7701 case LO_SUM:
7702 /* Store flag stores 0 or 1 */
7703 case LT: case LTU:
7704 case GT: case GTU:
7705 case LE: case LEU:
7706 case GE: case GEU:
7707 case EQ:
7708 case NE:
7709 return 1;
7710 case AND:
7712 rtx op0 = XEXP (SET_SRC (pat), 0);
7713 rtx op1 = XEXP (SET_SRC (pat), 1);
7714 if (GET_CODE (op1) == CONST_INT)
7715 return INTVAL (op1) >= 0;
7716 if (GET_CODE (op0) != REG)
7717 return 0;
7718 if (sparc_check_64 (op0, insn) == 1)
7719 return 1;
7720 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
7722 case IOR:
7723 case XOR:
7725 rtx op0 = XEXP (SET_SRC (pat), 0);
7726 rtx op1 = XEXP (SET_SRC (pat), 1);
7727 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
7728 return 0;
7729 if (GET_CODE (op1) == CONST_INT)
7730 return INTVAL (op1) >= 0;
7731 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
7733 case LSHIFTRT:
7734 return GET_MODE (SET_SRC (pat)) == SImode;
7735 /* Positive integers leave the high bits zero. */
7736 case CONST_DOUBLE:
7737 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
7738 case CONST_INT:
7739 return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
7740 case ASHIFTRT:
7741 case SIGN_EXTEND:
7742 return - (GET_MODE (SET_SRC (pat)) == SImode);
7743 case REG:
7744 return sparc_check_64 (SET_SRC (pat), insn);
7745 default:
7746 return 0;
7750 /* We _ought_ to have only one kind per function, but... */
7751 static GTY(()) rtx sparc_addr_diff_list;
7752 static GTY(()) rtx sparc_addr_list;
7754 void
7755 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
7757 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
7758 if (diff)
7759 sparc_addr_diff_list
7760 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
7761 else
7762 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
7765 static void
7766 sparc_output_addr_vec (rtx vec)
7768 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7769 int idx, vlen = XVECLEN (body, 0);
7771 #ifdef ASM_OUTPUT_ADDR_VEC_START
7772 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7773 #endif
7775 #ifdef ASM_OUTPUT_CASE_LABEL
7776 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7777 NEXT_INSN (lab));
7778 #else
7779 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7780 #endif
7782 for (idx = 0; idx < vlen; idx++)
7784 ASM_OUTPUT_ADDR_VEC_ELT
7785 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
7788 #ifdef ASM_OUTPUT_ADDR_VEC_END
7789 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7790 #endif
7793 static void
7794 sparc_output_addr_diff_vec (rtx vec)
7796 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7797 rtx base = XEXP (XEXP (body, 0), 0);
7798 int idx, vlen = XVECLEN (body, 1);
7800 #ifdef ASM_OUTPUT_ADDR_VEC_START
7801 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7802 #endif
7804 #ifdef ASM_OUTPUT_CASE_LABEL
7805 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7806 NEXT_INSN (lab));
7807 #else
7808 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7809 #endif
7811 for (idx = 0; idx < vlen; idx++)
7813 ASM_OUTPUT_ADDR_DIFF_ELT
7814 (asm_out_file,
7815 body,
7816 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
7817 CODE_LABEL_NUMBER (base));
7820 #ifdef ASM_OUTPUT_ADDR_VEC_END
7821 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7822 #endif
7825 static void
7826 sparc_output_deferred_case_vectors (void)
7828 rtx t;
7829 int align;
7831 if (sparc_addr_list == NULL_RTX
7832 && sparc_addr_diff_list == NULL_RTX)
7833 return;
7835 /* Align to cache line in the function's code section. */
7836 switch_to_section (current_function_section ());
7838 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
7839 if (align > 0)
7840 ASM_OUTPUT_ALIGN (asm_out_file, align);
7842 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
7843 sparc_output_addr_vec (XEXP (t, 0));
7844 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
7845 sparc_output_addr_diff_vec (XEXP (t, 0));
7847 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
7850 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
7851 unknown. Return 1 if the high bits are zero, -1 if the register is
7852 sign extended. */
7854 sparc_check_64 (rtx x, rtx insn)
7856 /* If a register is set only once it is safe to ignore insns this
7857 code does not know how to handle. The loop will either recognize
7858 the single set and return the correct value or fail to recognize
7859 it and return 0. */
7860 int set_once = 0;
7861 rtx y = x;
7863 gcc_assert (GET_CODE (x) == REG);
7865 if (GET_MODE (x) == DImode)
7866 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
7868 if (flag_expensive_optimizations
7869 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
7870 set_once = 1;
7872 if (insn == 0)
7874 if (set_once)
7875 insn = get_last_insn_anywhere ();
7876 else
7877 return 0;
7880 while ((insn = PREV_INSN (insn)))
7882 switch (GET_CODE (insn))
7884 case JUMP_INSN:
7885 case NOTE:
7886 break;
7887 case CODE_LABEL:
7888 case CALL_INSN:
7889 default:
7890 if (! set_once)
7891 return 0;
7892 break;
7893 case INSN:
7895 rtx pat = PATTERN (insn);
7896 if (GET_CODE (pat) != SET)
7897 return 0;
7898 if (rtx_equal_p (x, SET_DEST (pat)))
7899 return set_extends (insn);
7900 if (y && rtx_equal_p (y, SET_DEST (pat)))
7901 return set_extends (insn);
7902 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
7903 return 0;
7907 return 0;
7910 /* Returns assembly code to perform a DImode shift using
7911 a 64-bit global or out register on SPARC-V8+. */
7912 const char *
7913 output_v8plus_shift (rtx *operands, rtx insn, const char *opcode)
7915 static char asm_code[60];
7917 /* The scratch register is only required when the destination
7918 register is not a 64-bit global or out register. */
7919 if (which_alternative != 2)
7920 operands[3] = operands[0];
7922 /* We can only shift by constants <= 63. */
7923 if (GET_CODE (operands[2]) == CONST_INT)
7924 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
7926 if (GET_CODE (operands[1]) == CONST_INT)
7928 output_asm_insn ("mov\t%1, %3", operands);
7930 else
7932 output_asm_insn ("sllx\t%H1, 32, %3", operands);
7933 if (sparc_check_64 (operands[1], insn) <= 0)
7934 output_asm_insn ("srl\t%L1, 0, %L1", operands);
7935 output_asm_insn ("or\t%L1, %3, %3", operands);
7938 strcpy(asm_code, opcode);
7940 if (which_alternative != 2)
7941 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
7942 else
7943 return strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
7946 /* Output rtl to increment the profiler label LABELNO
7947 for profiling a function entry. */
7949 void
7950 sparc_profile_hook (int labelno)
7952 char buf[32];
7953 rtx lab, fun;
7955 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
7956 if (NO_PROFILE_COUNTERS)
7958 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
7960 else
7962 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
7963 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
7964 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
7968 #ifdef OBJECT_FORMAT_ELF
7969 static void
7970 sparc_elf_asm_named_section (const char *name, unsigned int flags,
7971 tree decl)
7973 if (flags & SECTION_MERGE)
7975 /* entsize cannot be expressed in this section attributes
7976 encoding style. */
7977 default_elf_asm_named_section (name, flags, decl);
7978 return;
7981 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
7983 if (!(flags & SECTION_DEBUG))
7984 fputs (",#alloc", asm_out_file);
7985 if (flags & SECTION_WRITE)
7986 fputs (",#write", asm_out_file);
7987 if (flags & SECTION_TLS)
7988 fputs (",#tls", asm_out_file);
7989 if (flags & SECTION_CODE)
7990 fputs (",#execinstr", asm_out_file);
7992 /* ??? Handle SECTION_BSS. */
7994 fputc ('\n', asm_out_file);
7996 #endif /* OBJECT_FORMAT_ELF */
7998 /* We do not allow indirect calls to be optimized into sibling calls.
8000 We cannot use sibling calls when delayed branches are disabled
8001 because they will likely require the call delay slot to be filled.
8003 Also, on SPARC 32-bit we cannot emit a sibling call when the
8004 current function returns a structure. This is because the "unimp
8005 after call" convention would cause the callee to return to the
8006 wrong place. The generic code already disallows cases where the
8007 function being called returns a structure.
8009 It may seem strange how this last case could occur. Usually there
8010 is code after the call which jumps to epilogue code which dumps the
8011 return value into the struct return area. That ought to invalidate
8012 the sibling call right? Well, in the C++ case we can end up passing
8013 the pointer to the struct return area to a constructor (which returns
8014 void) and then nothing else happens. Such a sibling call would look
8015 valid without the added check here.
8017 VxWorks PIC PLT entries require the global pointer to be initialized
8018 on entry. We therefore can't emit sibling calls to them. */
8019 static bool
8020 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8022 return (decl
8023 && flag_delayed_branch
8024 && (TARGET_ARCH64 || ! cfun->returns_struct)
8025 && !(TARGET_VXWORKS_RTP
8026 && flag_pic
8027 && !targetm.binds_local_p (decl)));
8030 /* libfunc renaming. */
8031 #include "config/gofast.h"
8033 static void
8034 sparc_init_libfuncs (void)
8036 if (TARGET_ARCH32)
8038 /* Use the subroutines that Sun's library provides for integer
8039 multiply and divide. The `*' prevents an underscore from
8040 being prepended by the compiler. .umul is a little faster
8041 than .mul. */
8042 set_optab_libfunc (smul_optab, SImode, "*.umul");
8043 set_optab_libfunc (sdiv_optab, SImode, "*.div");
8044 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
8045 set_optab_libfunc (smod_optab, SImode, "*.rem");
8046 set_optab_libfunc (umod_optab, SImode, "*.urem");
8048 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
8049 set_optab_libfunc (add_optab, TFmode, "_Q_add");
8050 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
8051 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
8052 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
8053 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
8055 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
8056 is because with soft-float, the SFmode and DFmode sqrt
8057 instructions will be absent, and the compiler will notice and
8058 try to use the TFmode sqrt instruction for calls to the
8059 builtin function sqrt, but this fails. */
8060 if (TARGET_FPU)
8061 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
8063 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
8064 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
8065 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
8066 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
8067 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
8068 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
8070 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
8071 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
8072 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
8073 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
8075 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
8076 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
8077 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
8078 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
8080 if (DITF_CONVERSION_LIBFUNCS)
8082 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
8083 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
8084 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
8085 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
8088 if (SUN_CONVERSION_LIBFUNCS)
8090 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
8091 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
8092 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
8093 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
8096 if (TARGET_ARCH64)
8098 /* In the SPARC 64bit ABI, SImode multiply and divide functions
8099 do not exist in the library. Make sure the compiler does not
8100 emit calls to them by accident. (It should always use the
8101 hardware instructions.) */
8102 set_optab_libfunc (smul_optab, SImode, 0);
8103 set_optab_libfunc (sdiv_optab, SImode, 0);
8104 set_optab_libfunc (udiv_optab, SImode, 0);
8105 set_optab_libfunc (smod_optab, SImode, 0);
8106 set_optab_libfunc (umod_optab, SImode, 0);
8108 if (SUN_INTEGER_MULTIPLY_64)
8110 set_optab_libfunc (smul_optab, DImode, "__mul64");
8111 set_optab_libfunc (sdiv_optab, DImode, "__div64");
8112 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
8113 set_optab_libfunc (smod_optab, DImode, "__rem64");
8114 set_optab_libfunc (umod_optab, DImode, "__urem64");
8117 if (SUN_CONVERSION_LIBFUNCS)
8119 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
8120 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
8121 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
8122 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
8126 gofast_maybe_init_libfuncs ();
8129 #define def_builtin(NAME, CODE, TYPE) \
8130 add_builtin_function((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL, \
8131 NULL_TREE)
8133 /* Implement the TARGET_INIT_BUILTINS target hook.
8134 Create builtin functions for special SPARC instructions. */
8136 static void
8137 sparc_init_builtins (void)
8139 if (TARGET_VIS)
8140 sparc_vis_init_builtins ();
8143 /* Create builtin functions for VIS 1.0 instructions. */
8145 static void
8146 sparc_vis_init_builtins (void)
8148 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
8149 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
8150 tree v4hi = build_vector_type (intHI_type_node, 4);
8151 tree v2hi = build_vector_type (intHI_type_node, 2);
8152 tree v2si = build_vector_type (intSI_type_node, 2);
8154 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
8155 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
8156 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
8157 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
8158 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
8159 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
8160 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
8161 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
8162 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
8163 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
8164 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
8165 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
8166 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
8167 v8qi, v8qi,
8168 intDI_type_node, 0);
8169 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
8170 intDI_type_node,
8171 intDI_type_node, 0);
8172 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
8173 ptr_type_node,
8174 intSI_type_node, 0);
8175 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
8176 ptr_type_node,
8177 intDI_type_node, 0);
8179 /* Packing and expanding vectors. */
8180 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis, v4qi_ftype_v4hi);
8181 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
8182 v8qi_ftype_v2si_v8qi);
8183 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
8184 v2hi_ftype_v2si);
8185 def_builtin ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis, v4hi_ftype_v4qi);
8186 def_builtin ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
8187 v8qi_ftype_v4qi_v4qi);
8189 /* Multiplications. */
8190 def_builtin ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
8191 v4hi_ftype_v4qi_v4hi);
8192 def_builtin ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
8193 v4hi_ftype_v4qi_v2hi);
8194 def_builtin ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
8195 v4hi_ftype_v4qi_v2hi);
8196 def_builtin ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
8197 v4hi_ftype_v8qi_v4hi);
8198 def_builtin ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
8199 v4hi_ftype_v8qi_v4hi);
8200 def_builtin ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
8201 v2si_ftype_v4qi_v2hi);
8202 def_builtin ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
8203 v2si_ftype_v4qi_v2hi);
8205 /* Data aligning. */
8206 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
8207 v4hi_ftype_v4hi_v4hi);
8208 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
8209 v8qi_ftype_v8qi_v8qi);
8210 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
8211 v2si_ftype_v2si_v2si);
8212 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatadi_vis,
8213 di_ftype_di_di);
8214 if (TARGET_ARCH64)
8215 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
8216 ptr_ftype_ptr_di);
8217 else
8218 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
8219 ptr_ftype_ptr_si);
8221 /* Pixel distance. */
8222 def_builtin ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
8223 di_ftype_v8qi_v8qi_di);
8226 /* Handle TARGET_EXPAND_BUILTIN target hook.
8227 Expand builtin functions for sparc intrinsics. */
8229 static rtx
8230 sparc_expand_builtin (tree exp, rtx target,
8231 rtx subtarget ATTRIBUTE_UNUSED,
8232 enum machine_mode tmode ATTRIBUTE_UNUSED,
8233 int ignore ATTRIBUTE_UNUSED)
8235 tree arg;
8236 call_expr_arg_iterator iter;
8237 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
8238 unsigned int icode = DECL_FUNCTION_CODE (fndecl);
8239 rtx pat, op[4];
8240 enum machine_mode mode[4];
8241 int arg_count = 0;
8243 mode[0] = insn_data[icode].operand[0].mode;
8244 if (!target
8245 || GET_MODE (target) != mode[0]
8246 || ! (*insn_data[icode].operand[0].predicate) (target, mode[0]))
8247 op[0] = gen_reg_rtx (mode[0]);
8248 else
8249 op[0] = target;
8251 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
8253 arg_count++;
8254 mode[arg_count] = insn_data[icode].operand[arg_count].mode;
8255 op[arg_count] = expand_normal (arg);
8257 if (! (*insn_data[icode].operand[arg_count].predicate) (op[arg_count],
8258 mode[arg_count]))
8259 op[arg_count] = copy_to_mode_reg (mode[arg_count], op[arg_count]);
8262 switch (arg_count)
8264 case 1:
8265 pat = GEN_FCN (icode) (op[0], op[1]);
8266 break;
8267 case 2:
8268 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
8269 break;
8270 case 3:
8271 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
8272 break;
8273 default:
8274 gcc_unreachable ();
8277 if (!pat)
8278 return NULL_RTX;
8280 emit_insn (pat);
8282 return op[0];
8285 static int
8286 sparc_vis_mul8x16 (int e8, int e16)
8288 return (e8 * e16 + 128) / 256;
8291 /* Multiply the vector elements in ELTS0 to the elements in ELTS1 as specified
8292 by FNCODE. All of the elements in ELTS0 and ELTS1 lists must be integer
8293 constants. A tree list with the results of the multiplications is returned,
8294 and each element in the list is of INNER_TYPE. */
8296 static tree
8297 sparc_handle_vis_mul8x16 (int fncode, tree inner_type, tree elts0, tree elts1)
8299 tree n_elts = NULL_TREE;
8300 int scale;
8302 switch (fncode)
8304 case CODE_FOR_fmul8x16_vis:
8305 for (; elts0 && elts1;
8306 elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
8308 int val
8309 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8310 TREE_INT_CST_LOW (TREE_VALUE (elts1)));
8311 n_elts = tree_cons (NULL_TREE,
8312 build_int_cst (inner_type, val),
8313 n_elts);
8315 break;
8317 case CODE_FOR_fmul8x16au_vis:
8318 scale = TREE_INT_CST_LOW (TREE_VALUE (elts1));
8320 for (; elts0; elts0 = TREE_CHAIN (elts0))
8322 int val
8323 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8324 scale);
8325 n_elts = tree_cons (NULL_TREE,
8326 build_int_cst (inner_type, val),
8327 n_elts);
8329 break;
8331 case CODE_FOR_fmul8x16al_vis:
8332 scale = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (elts1)));
8334 for (; elts0; elts0 = TREE_CHAIN (elts0))
8336 int val
8337 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8338 scale);
8339 n_elts = tree_cons (NULL_TREE,
8340 build_int_cst (inner_type, val),
8341 n_elts);
8343 break;
8345 default:
8346 gcc_unreachable ();
8349 return nreverse (n_elts);
8352 /* Handle TARGET_FOLD_BUILTIN target hook.
8353 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
8354 result of the function call is ignored. NULL_TREE is returned if the
8355 function could not be folded. */
8357 static tree
8358 sparc_fold_builtin (tree fndecl, tree arglist, bool ignore)
8360 tree arg0, arg1, arg2;
8361 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
8362 enum insn_code icode = (enum insn_code) DECL_FUNCTION_CODE (fndecl);
8364 if (ignore
8365 && icode != CODE_FOR_alignaddrsi_vis
8366 && icode != CODE_FOR_alignaddrdi_vis)
8367 return fold_convert (rtype, integer_zero_node);
8369 switch (icode)
8371 case CODE_FOR_fexpand_vis:
8372 arg0 = TREE_VALUE (arglist);
8373 STRIP_NOPS (arg0);
8375 if (TREE_CODE (arg0) == VECTOR_CST)
8377 tree inner_type = TREE_TYPE (rtype);
8378 tree elts = TREE_VECTOR_CST_ELTS (arg0);
8379 tree n_elts = NULL_TREE;
8381 for (; elts; elts = TREE_CHAIN (elts))
8383 unsigned int val = TREE_INT_CST_LOW (TREE_VALUE (elts)) << 4;
8384 n_elts = tree_cons (NULL_TREE,
8385 build_int_cst (inner_type, val),
8386 n_elts);
8388 return build_vector (rtype, nreverse (n_elts));
8390 break;
8392 case CODE_FOR_fmul8x16_vis:
8393 case CODE_FOR_fmul8x16au_vis:
8394 case CODE_FOR_fmul8x16al_vis:
8395 arg0 = TREE_VALUE (arglist);
8396 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8397 STRIP_NOPS (arg0);
8398 STRIP_NOPS (arg1);
8400 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
8402 tree inner_type = TREE_TYPE (rtype);
8403 tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
8404 tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
8405 tree n_elts = sparc_handle_vis_mul8x16 (icode, inner_type, elts0,
8406 elts1);
8408 return build_vector (rtype, n_elts);
8410 break;
8412 case CODE_FOR_fpmerge_vis:
8413 arg0 = TREE_VALUE (arglist);
8414 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8415 STRIP_NOPS (arg0);
8416 STRIP_NOPS (arg1);
8418 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
8420 tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
8421 tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
8422 tree n_elts = NULL_TREE;
8424 for (; elts0 && elts1;
8425 elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
8427 n_elts = tree_cons (NULL_TREE, TREE_VALUE (elts0), n_elts);
8428 n_elts = tree_cons (NULL_TREE, TREE_VALUE (elts1), n_elts);
8431 return build_vector (rtype, nreverse (n_elts));
8433 break;
8435 case CODE_FOR_pdist_vis:
8436 arg0 = TREE_VALUE (arglist);
8437 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8438 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8439 STRIP_NOPS (arg0);
8440 STRIP_NOPS (arg1);
8441 STRIP_NOPS (arg2);
8443 if (TREE_CODE (arg0) == VECTOR_CST
8444 && TREE_CODE (arg1) == VECTOR_CST
8445 && TREE_CODE (arg2) == INTEGER_CST)
8447 int overflow = 0;
8448 unsigned HOST_WIDE_INT low = TREE_INT_CST_LOW (arg2);
8449 HOST_WIDE_INT high = TREE_INT_CST_HIGH (arg2);
8450 tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
8451 tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
8453 for (; elts0 && elts1;
8454 elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
8456 unsigned HOST_WIDE_INT
8457 low0 = TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8458 low1 = TREE_INT_CST_LOW (TREE_VALUE (elts1));
8459 HOST_WIDE_INT high0 = TREE_INT_CST_HIGH (TREE_VALUE (elts0));
8460 HOST_WIDE_INT high1 = TREE_INT_CST_HIGH (TREE_VALUE (elts1));
8462 unsigned HOST_WIDE_INT l;
8463 HOST_WIDE_INT h;
8465 overflow |= neg_double (low1, high1, &l, &h);
8466 overflow |= add_double (low0, high0, l, h, &l, &h);
8467 if (h < 0)
8468 overflow |= neg_double (l, h, &l, &h);
8470 overflow |= add_double (low, high, l, h, &low, &high);
8473 gcc_assert (overflow == 0);
8475 return build_int_cst_wide (rtype, low, high);
8478 default:
8479 break;
8482 return NULL_TREE;
8485 /* ??? This duplicates information provided to the compiler by the
8486 ??? scheduler description. Some day, teach genautomata to output
8487 ??? the latencies and then CSE will just use that. */
8489 static bool
8490 sparc_rtx_costs (rtx x, int code, int outer_code, int *total,
8491 bool speed ATTRIBUTE_UNUSED)
8493 enum machine_mode mode = GET_MODE (x);
8494 bool float_mode_p = FLOAT_MODE_P (mode);
8496 switch (code)
8498 case CONST_INT:
8499 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
8501 *total = 0;
8502 return true;
8504 /* FALLTHRU */
8506 case HIGH:
8507 *total = 2;
8508 return true;
8510 case CONST:
8511 case LABEL_REF:
8512 case SYMBOL_REF:
8513 *total = 4;
8514 return true;
8516 case CONST_DOUBLE:
8517 if (GET_MODE (x) == VOIDmode
8518 && ((CONST_DOUBLE_HIGH (x) == 0
8519 && CONST_DOUBLE_LOW (x) < 0x1000)
8520 || (CONST_DOUBLE_HIGH (x) == -1
8521 && CONST_DOUBLE_LOW (x) < 0
8522 && CONST_DOUBLE_LOW (x) >= -0x1000)))
8523 *total = 0;
8524 else
8525 *total = 8;
8526 return true;
8528 case MEM:
8529 /* If outer-code was a sign or zero extension, a cost
8530 of COSTS_N_INSNS (1) was already added in. This is
8531 why we are subtracting it back out. */
8532 if (outer_code == ZERO_EXTEND)
8534 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
8536 else if (outer_code == SIGN_EXTEND)
8538 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
8540 else if (float_mode_p)
8542 *total = sparc_costs->float_load;
8544 else
8546 *total = sparc_costs->int_load;
8549 return true;
8551 case PLUS:
8552 case MINUS:
8553 if (float_mode_p)
8554 *total = sparc_costs->float_plusminus;
8555 else
8556 *total = COSTS_N_INSNS (1);
8557 return false;
8559 case MULT:
8560 if (float_mode_p)
8561 *total = sparc_costs->float_mul;
8562 else if (! TARGET_HARD_MUL)
8563 *total = COSTS_N_INSNS (25);
8564 else
8566 int bit_cost;
8568 bit_cost = 0;
8569 if (sparc_costs->int_mul_bit_factor)
8571 int nbits;
8573 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8575 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
8576 for (nbits = 0; value != 0; value &= value - 1)
8577 nbits++;
8579 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
8580 && GET_MODE (XEXP (x, 1)) == VOIDmode)
8582 rtx x1 = XEXP (x, 1);
8583 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
8584 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
8586 for (nbits = 0; value1 != 0; value1 &= value1 - 1)
8587 nbits++;
8588 for (; value2 != 0; value2 &= value2 - 1)
8589 nbits++;
8591 else
8592 nbits = 7;
8594 if (nbits < 3)
8595 nbits = 3;
8596 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
8597 bit_cost = COSTS_N_INSNS (bit_cost);
8600 if (mode == DImode)
8601 *total = sparc_costs->int_mulX + bit_cost;
8602 else
8603 *total = sparc_costs->int_mul + bit_cost;
8605 return false;
8607 case ASHIFT:
8608 case ASHIFTRT:
8609 case LSHIFTRT:
8610 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
8611 return false;
8613 case DIV:
8614 case UDIV:
8615 case MOD:
8616 case UMOD:
8617 if (float_mode_p)
8619 if (mode == DFmode)
8620 *total = sparc_costs->float_div_df;
8621 else
8622 *total = sparc_costs->float_div_sf;
8624 else
8626 if (mode == DImode)
8627 *total = sparc_costs->int_divX;
8628 else
8629 *total = sparc_costs->int_div;
8631 return false;
8633 case NEG:
8634 if (! float_mode_p)
8636 *total = COSTS_N_INSNS (1);
8637 return false;
8639 /* FALLTHRU */
8641 case ABS:
8642 case FLOAT:
8643 case UNSIGNED_FLOAT:
8644 case FIX:
8645 case UNSIGNED_FIX:
8646 case FLOAT_EXTEND:
8647 case FLOAT_TRUNCATE:
8648 *total = sparc_costs->float_move;
8649 return false;
8651 case SQRT:
8652 if (mode == DFmode)
8653 *total = sparc_costs->float_sqrt_df;
8654 else
8655 *total = sparc_costs->float_sqrt_sf;
8656 return false;
8658 case COMPARE:
8659 if (float_mode_p)
8660 *total = sparc_costs->float_cmp;
8661 else
8662 *total = COSTS_N_INSNS (1);
8663 return false;
8665 case IF_THEN_ELSE:
8666 if (float_mode_p)
8667 *total = sparc_costs->float_cmove;
8668 else
8669 *total = sparc_costs->int_cmove;
8670 return false;
8672 case IOR:
8673 /* Handle the NAND vector patterns. */
8674 if (sparc_vector_mode_supported_p (GET_MODE (x))
8675 && GET_CODE (XEXP (x, 0)) == NOT
8676 && GET_CODE (XEXP (x, 1)) == NOT)
8678 *total = COSTS_N_INSNS (1);
8679 return true;
8681 else
8682 return false;
8684 default:
8685 return false;
8689 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
8690 This is achieved by means of a manual dynamic stack space allocation in
8691 the current frame. We make the assumption that SEQ doesn't contain any
8692 function calls, with the possible exception of calls to the PIC helper. */
8694 static void
8695 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
8697 /* We must preserve the lowest 16 words for the register save area. */
8698 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
8699 /* We really need only 2 words of fresh stack space. */
8700 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
8702 rtx slot
8703 = gen_rtx_MEM (word_mode, plus_constant (stack_pointer_rtx,
8704 SPARC_STACK_BIAS + offset));
8706 emit_insn (gen_stack_pointer_dec (GEN_INT (size)));
8707 emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
8708 if (reg2)
8709 emit_insn (gen_rtx_SET (VOIDmode,
8710 adjust_address (slot, word_mode, UNITS_PER_WORD),
8711 reg2));
8712 emit_insn (seq);
8713 if (reg2)
8714 emit_insn (gen_rtx_SET (VOIDmode,
8715 reg2,
8716 adjust_address (slot, word_mode, UNITS_PER_WORD)));
8717 emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
8718 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
8721 /* Output the assembler code for a thunk function. THUNK_DECL is the
8722 declaration for the thunk function itself, FUNCTION is the decl for
8723 the target function. DELTA is an immediate constant offset to be
8724 added to THIS. If VCALL_OFFSET is nonzero, the word at address
8725 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
8727 static void
8728 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8729 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8730 tree function)
8732 rtx this_rtx, insn, funexp;
8733 unsigned int int_arg_first;
8735 reload_completed = 1;
8736 epilogue_completed = 1;
8738 emit_note (NOTE_INSN_PROLOGUE_END);
8740 if (flag_delayed_branch)
8742 /* We will emit a regular sibcall below, so we need to instruct
8743 output_sibcall that we are in a leaf function. */
8744 sparc_leaf_function_p = current_function_uses_only_leaf_regs = 1;
8746 /* This will cause final.c to invoke leaf_renumber_regs so we
8747 must behave as if we were in a not-yet-leafified function. */
8748 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
8750 else
8752 /* We will emit the sibcall manually below, so we will need to
8753 manually spill non-leaf registers. */
8754 sparc_leaf_function_p = current_function_uses_only_leaf_regs = 0;
8756 /* We really are in a leaf function. */
8757 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
8760 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
8761 returns a structure, the structure return pointer is there instead. */
8762 if (TARGET_ARCH64 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8763 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
8764 else
8765 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
8767 /* Add DELTA. When possible use a plain add, otherwise load it into
8768 a register first. */
8769 if (delta)
8771 rtx delta_rtx = GEN_INT (delta);
8773 if (! SPARC_SIMM13_P (delta))
8775 rtx scratch = gen_rtx_REG (Pmode, 1);
8776 emit_move_insn (scratch, delta_rtx);
8777 delta_rtx = scratch;
8780 /* THIS_RTX += DELTA. */
8781 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
8784 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
8785 if (vcall_offset)
8787 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8788 rtx scratch = gen_rtx_REG (Pmode, 1);
8790 gcc_assert (vcall_offset < 0);
8792 /* SCRATCH = *THIS_RTX. */
8793 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
8795 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
8796 may not have any available scratch register at this point. */
8797 if (SPARC_SIMM13_P (vcall_offset))
8799 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
8800 else if (! fixed_regs[5]
8801 /* The below sequence is made up of at least 2 insns,
8802 while the default method may need only one. */
8803 && vcall_offset < -8192)
8805 rtx scratch2 = gen_rtx_REG (Pmode, 5);
8806 emit_move_insn (scratch2, vcall_offset_rtx);
8807 vcall_offset_rtx = scratch2;
8809 else
8811 rtx increment = GEN_INT (-4096);
8813 /* VCALL_OFFSET is a negative number whose typical range can be
8814 estimated as -32768..0 in 32-bit mode. In almost all cases
8815 it is therefore cheaper to emit multiple add insns than
8816 spilling and loading the constant into a register (at least
8817 6 insns). */
8818 while (! SPARC_SIMM13_P (vcall_offset))
8820 emit_insn (gen_add2_insn (scratch, increment));
8821 vcall_offset += 4096;
8823 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
8826 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
8827 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
8828 gen_rtx_PLUS (Pmode,
8829 scratch,
8830 vcall_offset_rtx)));
8832 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
8833 emit_insn (gen_add2_insn (this_rtx, scratch));
8836 /* Generate a tail call to the target function. */
8837 if (! TREE_USED (function))
8839 assemble_external (function);
8840 TREE_USED (function) = 1;
8842 funexp = XEXP (DECL_RTL (function), 0);
8844 if (flag_delayed_branch)
8846 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8847 insn = emit_call_insn (gen_sibcall (funexp));
8848 SIBLING_CALL_P (insn) = 1;
8850 else
8852 /* The hoops we have to jump through in order to generate a sibcall
8853 without using delay slots... */
8854 rtx spill_reg, spill_reg2, seq, scratch = gen_rtx_REG (Pmode, 1);
8856 if (flag_pic)
8858 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
8859 spill_reg2 = gen_rtx_REG (word_mode, PIC_OFFSET_TABLE_REGNUM);
8860 start_sequence ();
8861 /* Delay emitting the PIC helper function because it needs to
8862 change the section and we are emitting assembly code. */
8863 load_pic_register (true); /* clobbers %o7 */
8864 scratch = legitimize_pic_address (funexp, Pmode, scratch);
8865 seq = get_insns ();
8866 end_sequence ();
8867 emit_and_preserve (seq, spill_reg, spill_reg2);
8869 else if (TARGET_ARCH32)
8871 emit_insn (gen_rtx_SET (VOIDmode,
8872 scratch,
8873 gen_rtx_HIGH (SImode, funexp)));
8874 emit_insn (gen_rtx_SET (VOIDmode,
8875 scratch,
8876 gen_rtx_LO_SUM (SImode, scratch, funexp)));
8878 else /* TARGET_ARCH64 */
8880 switch (sparc_cmodel)
8882 case CM_MEDLOW:
8883 case CM_MEDMID:
8884 /* The destination can serve as a temporary. */
8885 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
8886 break;
8888 case CM_MEDANY:
8889 case CM_EMBMEDANY:
8890 /* The destination cannot serve as a temporary. */
8891 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
8892 start_sequence ();
8893 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
8894 seq = get_insns ();
8895 end_sequence ();
8896 emit_and_preserve (seq, spill_reg, 0);
8897 break;
8899 default:
8900 gcc_unreachable ();
8904 emit_jump_insn (gen_indirect_jump (scratch));
8907 emit_barrier ();
8909 /* Run just enough of rest_of_compilation to get the insns emitted.
8910 There's not really enough bulk here to make other passes such as
8911 instruction scheduling worth while. Note that use_thunk calls
8912 assemble_start_function and assemble_end_function. */
8913 insn = get_insns ();
8914 insn_locators_alloc ();
8915 shorten_branches (insn);
8916 final_start_function (insn, file, 1);
8917 final (insn, file, 1);
8918 final_end_function ();
8919 free_after_compilation (cfun);
8921 reload_completed = 0;
8922 epilogue_completed = 0;
8925 /* Return true if sparc_output_mi_thunk would be able to output the
8926 assembler code for the thunk function specified by the arguments
8927 it is passed, and false otherwise. */
8928 static bool
8929 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
8930 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
8931 HOST_WIDE_INT vcall_offset,
8932 const_tree function ATTRIBUTE_UNUSED)
8934 /* Bound the loop used in the default method above. */
8935 return (vcall_offset >= -32768 || ! fixed_regs[5]);
8938 /* How to allocate a 'struct machine_function'. */
8940 static struct machine_function *
8941 sparc_init_machine_status (void)
8943 return GGC_CNEW (struct machine_function);
8946 /* Locate some local-dynamic symbol still in use by this function
8947 so that we can print its name in local-dynamic base patterns. */
8949 static const char *
8950 get_some_local_dynamic_name (void)
8952 rtx insn;
8954 if (cfun->machine->some_ld_name)
8955 return cfun->machine->some_ld_name;
8957 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8958 if (INSN_P (insn)
8959 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8960 return cfun->machine->some_ld_name;
8962 gcc_unreachable ();
8965 static int
8966 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8968 rtx x = *px;
8970 if (x
8971 && GET_CODE (x) == SYMBOL_REF
8972 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8974 cfun->machine->some_ld_name = XSTR (x, 0);
8975 return 1;
8978 return 0;
8981 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
8982 This is called from dwarf2out.c to emit call frame instructions
8983 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
8984 static void
8985 sparc_dwarf_handle_frame_unspec (const char *label,
8986 rtx pattern ATTRIBUTE_UNUSED,
8987 int index ATTRIBUTE_UNUSED)
8989 gcc_assert (index == UNSPECV_SAVEW);
8990 dwarf2out_window_save (label);
8993 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8994 We need to emit DTP-relative relocations. */
8996 static void
8997 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
8999 switch (size)
9001 case 4:
9002 fputs ("\t.word\t%r_tls_dtpoff32(", file);
9003 break;
9004 case 8:
9005 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
9006 break;
9007 default:
9008 gcc_unreachable ();
9010 output_addr_const (file, x);
9011 fputs (")", file);
9014 /* Do whatever processing is required at the end of a file. */
9016 static void
9017 sparc_file_end (void)
9019 /* If we haven't emitted the special PIC helper function, do so now. */
9020 if (pic_helper_symbol_name[0] && !pic_helper_emitted_p)
9021 emit_pic_helper ();
9023 if (NEED_INDICATE_EXEC_STACK)
9024 file_end_indicate_exec_stack ();
9027 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
9028 /* Implement TARGET_MANGLE_TYPE. */
9030 static const char *
9031 sparc_mangle_type (const_tree type)
9033 if (!TARGET_64BIT
9034 && TYPE_MAIN_VARIANT (type) == long_double_type_node
9035 && TARGET_LONG_DOUBLE_128)
9036 return "g";
9038 /* For all other types, use normal C++ mangling. */
9039 return NULL;
9041 #endif
9043 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
9044 compare and swap on the word containing the byte or half-word. */
9046 void
9047 sparc_expand_compare_and_swap_12 (rtx result, rtx mem, rtx oldval, rtx newval)
9049 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
9050 rtx addr = gen_reg_rtx (Pmode);
9051 rtx off = gen_reg_rtx (SImode);
9052 rtx oldv = gen_reg_rtx (SImode);
9053 rtx newv = gen_reg_rtx (SImode);
9054 rtx oldvalue = gen_reg_rtx (SImode);
9055 rtx newvalue = gen_reg_rtx (SImode);
9056 rtx res = gen_reg_rtx (SImode);
9057 rtx resv = gen_reg_rtx (SImode);
9058 rtx memsi, val, mask, end_label, loop_label, cc;
9060 emit_insn (gen_rtx_SET (VOIDmode, addr,
9061 gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
9063 if (Pmode != SImode)
9064 addr1 = gen_lowpart (SImode, addr1);
9065 emit_insn (gen_rtx_SET (VOIDmode, off,
9066 gen_rtx_AND (SImode, addr1, GEN_INT (3))));
9068 memsi = gen_rtx_MEM (SImode, addr);
9069 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
9070 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
9072 val = force_reg (SImode, memsi);
9074 emit_insn (gen_rtx_SET (VOIDmode, off,
9075 gen_rtx_XOR (SImode, off,
9076 GEN_INT (GET_MODE (mem) == QImode
9077 ? 3 : 2))));
9079 emit_insn (gen_rtx_SET (VOIDmode, off,
9080 gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
9082 if (GET_MODE (mem) == QImode)
9083 mask = force_reg (SImode, GEN_INT (0xff));
9084 else
9085 mask = force_reg (SImode, GEN_INT (0xffff));
9087 emit_insn (gen_rtx_SET (VOIDmode, mask,
9088 gen_rtx_ASHIFT (SImode, mask, off)));
9090 emit_insn (gen_rtx_SET (VOIDmode, val,
9091 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
9092 val)));
9094 oldval = gen_lowpart (SImode, oldval);
9095 emit_insn (gen_rtx_SET (VOIDmode, oldv,
9096 gen_rtx_ASHIFT (SImode, oldval, off)));
9098 newval = gen_lowpart_common (SImode, newval);
9099 emit_insn (gen_rtx_SET (VOIDmode, newv,
9100 gen_rtx_ASHIFT (SImode, newval, off)));
9102 emit_insn (gen_rtx_SET (VOIDmode, oldv,
9103 gen_rtx_AND (SImode, oldv, mask)));
9105 emit_insn (gen_rtx_SET (VOIDmode, newv,
9106 gen_rtx_AND (SImode, newv, mask)));
9108 end_label = gen_label_rtx ();
9109 loop_label = gen_label_rtx ();
9110 emit_label (loop_label);
9112 emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
9113 gen_rtx_IOR (SImode, oldv, val)));
9115 emit_insn (gen_rtx_SET (VOIDmode, newvalue,
9116 gen_rtx_IOR (SImode, newv, val)));
9118 emit_insn (gen_sync_compare_and_swapsi (res, memsi, oldvalue, newvalue));
9120 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
9122 emit_insn (gen_rtx_SET (VOIDmode, resv,
9123 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
9124 res)));
9126 cc = gen_compare_reg_1 (NE, resv, val);
9127 emit_insn (gen_rtx_SET (VOIDmode, val, resv));
9129 /* Use cbranchcc4 to separate the compare and branch! */
9130 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
9131 cc, const0_rtx, loop_label));
9133 emit_label (end_label);
9135 emit_insn (gen_rtx_SET (VOIDmode, res,
9136 gen_rtx_AND (SImode, res, mask)));
9138 emit_insn (gen_rtx_SET (VOIDmode, res,
9139 gen_rtx_LSHIFTRT (SImode, res, off)));
9141 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
9144 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
9146 bool
9147 sparc_frame_pointer_required (void)
9149 return !(leaf_function_p () && only_leaf_regs_used ());
9152 /* The way this is structured, we can't eliminate SFP in favor of SP
9153 if the frame pointer is required: we want to use the SFP->HFP elimination
9154 in that case. But the test in update_eliminables doesn't know we are
9155 assuming below that we only do the former elimination. */
9157 bool
9158 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
9160 return (to == HARD_FRAME_POINTER_REGNUM
9161 || !targetm.frame_pointer_required ());
9164 #include "gt-sparc.h"