Merge branches/gcc-4_8-branch rev 208968.
[official-gcc.git] / gcc-4_8-branch / gcc / config / sparc / sparc.c
blobdfcc1480ffe97fdfe0805bdc0b607d42891abf90
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2013 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "rtl.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "insn-codes.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "function.h"
38 #include "except.h"
39 #include "expr.h"
40 #include "optabs.h"
41 #include "recog.h"
42 #include "diagnostic-core.h"
43 #include "ggc.h"
44 #include "tm_p.h"
45 #include "debug.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "gimple.h"
50 #include "langhooks.h"
51 #include "reload.h"
52 #include "params.h"
53 #include "df.h"
54 #include "opts.h"
55 #include "tree-pass.h"
57 /* Processor costs */
59 struct processor_costs {
60 /* Integer load */
61 const int int_load;
63 /* Integer signed load */
64 const int int_sload;
66 /* Integer zeroed load */
67 const int int_zload;
69 /* Float load */
70 const int float_load;
72 /* fmov, fneg, fabs */
73 const int float_move;
75 /* fadd, fsub */
76 const int float_plusminus;
78 /* fcmp */
79 const int float_cmp;
81 /* fmov, fmovr */
82 const int float_cmove;
84 /* fmul */
85 const int float_mul;
87 /* fdivs */
88 const int float_div_sf;
90 /* fdivd */
91 const int float_div_df;
93 /* fsqrts */
94 const int float_sqrt_sf;
96 /* fsqrtd */
97 const int float_sqrt_df;
99 /* umul/smul */
100 const int int_mul;
102 /* mulX */
103 const int int_mulX;
105 /* integer multiply cost for each bit set past the most
106 significant 3, so the formula for multiply cost becomes:
108 if (rs1 < 0)
109 highest_bit = highest_clear_bit(rs1);
110 else
111 highest_bit = highest_set_bit(rs1);
112 if (highest_bit < 3)
113 highest_bit = 3;
114 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
116 A value of zero indicates that the multiply costs is fixed,
117 and not variable. */
118 const int int_mul_bit_factor;
120 /* udiv/sdiv */
121 const int int_div;
123 /* divX */
124 const int int_divX;
126 /* movcc, movr */
127 const int int_cmove;
129 /* penalty for shifts, due to scheduling rules etc. */
130 const int shift_penalty;
133 static const
134 struct processor_costs cypress_costs = {
135 COSTS_N_INSNS (2), /* int load */
136 COSTS_N_INSNS (2), /* int signed load */
137 COSTS_N_INSNS (2), /* int zeroed load */
138 COSTS_N_INSNS (2), /* float load */
139 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
140 COSTS_N_INSNS (5), /* fadd, fsub */
141 COSTS_N_INSNS (1), /* fcmp */
142 COSTS_N_INSNS (1), /* fmov, fmovr */
143 COSTS_N_INSNS (7), /* fmul */
144 COSTS_N_INSNS (37), /* fdivs */
145 COSTS_N_INSNS (37), /* fdivd */
146 COSTS_N_INSNS (63), /* fsqrts */
147 COSTS_N_INSNS (63), /* fsqrtd */
148 COSTS_N_INSNS (1), /* imul */
149 COSTS_N_INSNS (1), /* imulX */
150 0, /* imul bit factor */
151 COSTS_N_INSNS (1), /* idiv */
152 COSTS_N_INSNS (1), /* idivX */
153 COSTS_N_INSNS (1), /* movcc/movr */
154 0, /* shift penalty */
157 static const
158 struct processor_costs supersparc_costs = {
159 COSTS_N_INSNS (1), /* int load */
160 COSTS_N_INSNS (1), /* int signed load */
161 COSTS_N_INSNS (1), /* int zeroed load */
162 COSTS_N_INSNS (0), /* float load */
163 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
164 COSTS_N_INSNS (3), /* fadd, fsub */
165 COSTS_N_INSNS (3), /* fcmp */
166 COSTS_N_INSNS (1), /* fmov, fmovr */
167 COSTS_N_INSNS (3), /* fmul */
168 COSTS_N_INSNS (6), /* fdivs */
169 COSTS_N_INSNS (9), /* fdivd */
170 COSTS_N_INSNS (12), /* fsqrts */
171 COSTS_N_INSNS (12), /* fsqrtd */
172 COSTS_N_INSNS (4), /* imul */
173 COSTS_N_INSNS (4), /* imulX */
174 0, /* imul bit factor */
175 COSTS_N_INSNS (4), /* idiv */
176 COSTS_N_INSNS (4), /* idivX */
177 COSTS_N_INSNS (1), /* movcc/movr */
178 1, /* shift penalty */
181 static const
182 struct processor_costs hypersparc_costs = {
183 COSTS_N_INSNS (1), /* int load */
184 COSTS_N_INSNS (1), /* int signed load */
185 COSTS_N_INSNS (1), /* int zeroed load */
186 COSTS_N_INSNS (1), /* float load */
187 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
188 COSTS_N_INSNS (1), /* fadd, fsub */
189 COSTS_N_INSNS (1), /* fcmp */
190 COSTS_N_INSNS (1), /* fmov, fmovr */
191 COSTS_N_INSNS (1), /* fmul */
192 COSTS_N_INSNS (8), /* fdivs */
193 COSTS_N_INSNS (12), /* fdivd */
194 COSTS_N_INSNS (17), /* fsqrts */
195 COSTS_N_INSNS (17), /* fsqrtd */
196 COSTS_N_INSNS (17), /* imul */
197 COSTS_N_INSNS (17), /* imulX */
198 0, /* imul bit factor */
199 COSTS_N_INSNS (17), /* idiv */
200 COSTS_N_INSNS (17), /* idivX */
201 COSTS_N_INSNS (1), /* movcc/movr */
202 0, /* shift penalty */
205 static const
206 struct processor_costs leon_costs = {
207 COSTS_N_INSNS (1), /* int load */
208 COSTS_N_INSNS (1), /* int signed load */
209 COSTS_N_INSNS (1), /* int zeroed load */
210 COSTS_N_INSNS (1), /* float load */
211 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
212 COSTS_N_INSNS (1), /* fadd, fsub */
213 COSTS_N_INSNS (1), /* fcmp */
214 COSTS_N_INSNS (1), /* fmov, fmovr */
215 COSTS_N_INSNS (1), /* fmul */
216 COSTS_N_INSNS (15), /* fdivs */
217 COSTS_N_INSNS (15), /* fdivd */
218 COSTS_N_INSNS (23), /* fsqrts */
219 COSTS_N_INSNS (23), /* fsqrtd */
220 COSTS_N_INSNS (5), /* imul */
221 COSTS_N_INSNS (5), /* imulX */
222 0, /* imul bit factor */
223 COSTS_N_INSNS (5), /* idiv */
224 COSTS_N_INSNS (5), /* idivX */
225 COSTS_N_INSNS (1), /* movcc/movr */
226 0, /* shift penalty */
229 static const
230 struct processor_costs leon3_costs = {
231 COSTS_N_INSNS (1), /* int load */
232 COSTS_N_INSNS (1), /* int signed load */
233 COSTS_N_INSNS (1), /* int zeroed load */
234 COSTS_N_INSNS (1), /* float load */
235 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
236 COSTS_N_INSNS (1), /* fadd, fsub */
237 COSTS_N_INSNS (1), /* fcmp */
238 COSTS_N_INSNS (1), /* fmov, fmovr */
239 COSTS_N_INSNS (1), /* fmul */
240 COSTS_N_INSNS (14), /* fdivs */
241 COSTS_N_INSNS (15), /* fdivd */
242 COSTS_N_INSNS (22), /* fsqrts */
243 COSTS_N_INSNS (23), /* fsqrtd */
244 COSTS_N_INSNS (5), /* imul */
245 COSTS_N_INSNS (5), /* imulX */
246 0, /* imul bit factor */
247 COSTS_N_INSNS (35), /* idiv */
248 COSTS_N_INSNS (35), /* idivX */
249 COSTS_N_INSNS (1), /* movcc/movr */
250 0, /* shift penalty */
253 static const
254 struct processor_costs sparclet_costs = {
255 COSTS_N_INSNS (3), /* int load */
256 COSTS_N_INSNS (3), /* int signed load */
257 COSTS_N_INSNS (1), /* int zeroed load */
258 COSTS_N_INSNS (1), /* float load */
259 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
260 COSTS_N_INSNS (1), /* fadd, fsub */
261 COSTS_N_INSNS (1), /* fcmp */
262 COSTS_N_INSNS (1), /* fmov, fmovr */
263 COSTS_N_INSNS (1), /* fmul */
264 COSTS_N_INSNS (1), /* fdivs */
265 COSTS_N_INSNS (1), /* fdivd */
266 COSTS_N_INSNS (1), /* fsqrts */
267 COSTS_N_INSNS (1), /* fsqrtd */
268 COSTS_N_INSNS (5), /* imul */
269 COSTS_N_INSNS (5), /* imulX */
270 0, /* imul bit factor */
271 COSTS_N_INSNS (5), /* idiv */
272 COSTS_N_INSNS (5), /* idivX */
273 COSTS_N_INSNS (1), /* movcc/movr */
274 0, /* shift penalty */
277 static const
278 struct processor_costs ultrasparc_costs = {
279 COSTS_N_INSNS (2), /* int load */
280 COSTS_N_INSNS (3), /* int signed load */
281 COSTS_N_INSNS (2), /* int zeroed load */
282 COSTS_N_INSNS (2), /* float load */
283 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
284 COSTS_N_INSNS (4), /* fadd, fsub */
285 COSTS_N_INSNS (1), /* fcmp */
286 COSTS_N_INSNS (2), /* fmov, fmovr */
287 COSTS_N_INSNS (4), /* fmul */
288 COSTS_N_INSNS (13), /* fdivs */
289 COSTS_N_INSNS (23), /* fdivd */
290 COSTS_N_INSNS (13), /* fsqrts */
291 COSTS_N_INSNS (23), /* fsqrtd */
292 COSTS_N_INSNS (4), /* imul */
293 COSTS_N_INSNS (4), /* imulX */
294 2, /* imul bit factor */
295 COSTS_N_INSNS (37), /* idiv */
296 COSTS_N_INSNS (68), /* idivX */
297 COSTS_N_INSNS (2), /* movcc/movr */
298 2, /* shift penalty */
301 static const
302 struct processor_costs ultrasparc3_costs = {
303 COSTS_N_INSNS (2), /* int load */
304 COSTS_N_INSNS (3), /* int signed load */
305 COSTS_N_INSNS (3), /* int zeroed load */
306 COSTS_N_INSNS (2), /* float load */
307 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
308 COSTS_N_INSNS (4), /* fadd, fsub */
309 COSTS_N_INSNS (5), /* fcmp */
310 COSTS_N_INSNS (3), /* fmov, fmovr */
311 COSTS_N_INSNS (4), /* fmul */
312 COSTS_N_INSNS (17), /* fdivs */
313 COSTS_N_INSNS (20), /* fdivd */
314 COSTS_N_INSNS (20), /* fsqrts */
315 COSTS_N_INSNS (29), /* fsqrtd */
316 COSTS_N_INSNS (6), /* imul */
317 COSTS_N_INSNS (6), /* imulX */
318 0, /* imul bit factor */
319 COSTS_N_INSNS (40), /* idiv */
320 COSTS_N_INSNS (71), /* idivX */
321 COSTS_N_INSNS (2), /* movcc/movr */
322 0, /* shift penalty */
325 static const
326 struct processor_costs niagara_costs = {
327 COSTS_N_INSNS (3), /* int load */
328 COSTS_N_INSNS (3), /* int signed load */
329 COSTS_N_INSNS (3), /* int zeroed load */
330 COSTS_N_INSNS (9), /* float load */
331 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
332 COSTS_N_INSNS (8), /* fadd, fsub */
333 COSTS_N_INSNS (26), /* fcmp */
334 COSTS_N_INSNS (8), /* fmov, fmovr */
335 COSTS_N_INSNS (29), /* fmul */
336 COSTS_N_INSNS (54), /* fdivs */
337 COSTS_N_INSNS (83), /* fdivd */
338 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
339 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
340 COSTS_N_INSNS (11), /* imul */
341 COSTS_N_INSNS (11), /* imulX */
342 0, /* imul bit factor */
343 COSTS_N_INSNS (72), /* idiv */
344 COSTS_N_INSNS (72), /* idivX */
345 COSTS_N_INSNS (1), /* movcc/movr */
346 0, /* shift penalty */
349 static const
350 struct processor_costs niagara2_costs = {
351 COSTS_N_INSNS (3), /* int load */
352 COSTS_N_INSNS (3), /* int signed load */
353 COSTS_N_INSNS (3), /* int zeroed load */
354 COSTS_N_INSNS (3), /* float load */
355 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
356 COSTS_N_INSNS (6), /* fadd, fsub */
357 COSTS_N_INSNS (6), /* fcmp */
358 COSTS_N_INSNS (6), /* fmov, fmovr */
359 COSTS_N_INSNS (6), /* fmul */
360 COSTS_N_INSNS (19), /* fdivs */
361 COSTS_N_INSNS (33), /* fdivd */
362 COSTS_N_INSNS (19), /* fsqrts */
363 COSTS_N_INSNS (33), /* fsqrtd */
364 COSTS_N_INSNS (5), /* imul */
365 COSTS_N_INSNS (5), /* imulX */
366 0, /* imul bit factor */
367 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
368 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
369 COSTS_N_INSNS (1), /* movcc/movr */
370 0, /* shift penalty */
373 static const
374 struct processor_costs niagara3_costs = {
375 COSTS_N_INSNS (3), /* int load */
376 COSTS_N_INSNS (3), /* int signed load */
377 COSTS_N_INSNS (3), /* int zeroed load */
378 COSTS_N_INSNS (3), /* float load */
379 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
380 COSTS_N_INSNS (9), /* fadd, fsub */
381 COSTS_N_INSNS (9), /* fcmp */
382 COSTS_N_INSNS (9), /* fmov, fmovr */
383 COSTS_N_INSNS (9), /* fmul */
384 COSTS_N_INSNS (23), /* fdivs */
385 COSTS_N_INSNS (37), /* fdivd */
386 COSTS_N_INSNS (23), /* fsqrts */
387 COSTS_N_INSNS (37), /* fsqrtd */
388 COSTS_N_INSNS (9), /* imul */
389 COSTS_N_INSNS (9), /* imulX */
390 0, /* imul bit factor */
391 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
392 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
393 COSTS_N_INSNS (1), /* movcc/movr */
394 0, /* shift penalty */
397 static const
398 struct processor_costs niagara4_costs = {
399 COSTS_N_INSNS (5), /* int load */
400 COSTS_N_INSNS (5), /* int signed load */
401 COSTS_N_INSNS (5), /* int zeroed load */
402 COSTS_N_INSNS (5), /* float load */
403 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
404 COSTS_N_INSNS (11), /* fadd, fsub */
405 COSTS_N_INSNS (11), /* fcmp */
406 COSTS_N_INSNS (11), /* fmov, fmovr */
407 COSTS_N_INSNS (11), /* fmul */
408 COSTS_N_INSNS (24), /* fdivs */
409 COSTS_N_INSNS (37), /* fdivd */
410 COSTS_N_INSNS (24), /* fsqrts */
411 COSTS_N_INSNS (37), /* fsqrtd */
412 COSTS_N_INSNS (12), /* imul */
413 COSTS_N_INSNS (12), /* imulX */
414 0, /* imul bit factor */
415 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
416 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
417 COSTS_N_INSNS (1), /* movcc/movr */
418 0, /* shift penalty */
421 static const struct processor_costs *sparc_costs = &cypress_costs;
423 #ifdef HAVE_AS_RELAX_OPTION
424 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
425 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
426 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
427 somebody does not branch between the sethi and jmp. */
428 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
429 #else
430 #define LEAF_SIBCALL_SLOT_RESERVED_P \
431 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
432 #endif
434 /* Vector to say how input registers are mapped to output registers.
435 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
436 eliminate it. You must use -fomit-frame-pointer to get that. */
437 char leaf_reg_remap[] =
438 { 0, 1, 2, 3, 4, 5, 6, 7,
439 -1, -1, -1, -1, -1, -1, 14, -1,
440 -1, -1, -1, -1, -1, -1, -1, -1,
441 8, 9, 10, 11, 12, 13, -1, 15,
443 32, 33, 34, 35, 36, 37, 38, 39,
444 40, 41, 42, 43, 44, 45, 46, 47,
445 48, 49, 50, 51, 52, 53, 54, 55,
446 56, 57, 58, 59, 60, 61, 62, 63,
447 64, 65, 66, 67, 68, 69, 70, 71,
448 72, 73, 74, 75, 76, 77, 78, 79,
449 80, 81, 82, 83, 84, 85, 86, 87,
450 88, 89, 90, 91, 92, 93, 94, 95,
451 96, 97, 98, 99, 100, 101, 102};
453 /* Vector, indexed by hard register number, which contains 1
454 for a register that is allowable in a candidate for leaf
455 function treatment. */
456 char sparc_leaf_regs[] =
457 { 1, 1, 1, 1, 1, 1, 1, 1,
458 0, 0, 0, 0, 0, 0, 1, 0,
459 0, 0, 0, 0, 0, 0, 0, 0,
460 1, 1, 1, 1, 1, 1, 0, 1,
461 1, 1, 1, 1, 1, 1, 1, 1,
462 1, 1, 1, 1, 1, 1, 1, 1,
463 1, 1, 1, 1, 1, 1, 1, 1,
464 1, 1, 1, 1, 1, 1, 1, 1,
465 1, 1, 1, 1, 1, 1, 1, 1,
466 1, 1, 1, 1, 1, 1, 1, 1,
467 1, 1, 1, 1, 1, 1, 1, 1,
468 1, 1, 1, 1, 1, 1, 1, 1,
469 1, 1, 1, 1, 1, 1, 1};
471 struct GTY(()) machine_function
473 /* Size of the frame of the function. */
474 HOST_WIDE_INT frame_size;
476 /* Size of the frame of the function minus the register window save area
477 and the outgoing argument area. */
478 HOST_WIDE_INT apparent_frame_size;
480 /* Register we pretend the frame pointer is allocated to. Normally, this
481 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
482 record "offset" separately as it may be too big for (reg + disp). */
483 rtx frame_base_reg;
484 HOST_WIDE_INT frame_base_offset;
486 /* Some local-dynamic TLS symbol name. */
487 const char *some_ld_name;
489 /* Number of global or FP registers to be saved (as 4-byte quantities). */
490 int n_global_fp_regs;
492 /* True if the current function is leaf and uses only leaf regs,
493 so that the SPARC leaf function optimization can be applied.
494 Private version of crtl->uses_only_leaf_regs, see
495 sparc_expand_prologue for the rationale. */
496 int leaf_function_p;
498 /* True if the prologue saves local or in registers. */
499 bool save_local_in_regs_p;
501 /* True if the data calculated by sparc_expand_prologue are valid. */
502 bool prologue_data_valid_p;
505 #define sparc_frame_size cfun->machine->frame_size
506 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
507 #define sparc_frame_base_reg cfun->machine->frame_base_reg
508 #define sparc_frame_base_offset cfun->machine->frame_base_offset
509 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
510 #define sparc_leaf_function_p cfun->machine->leaf_function_p
511 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
512 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
514 /* 1 if the next opcode is to be specially indented. */
515 int sparc_indent_opcode = 0;
517 static void sparc_option_override (void);
518 static void sparc_init_modes (void);
519 static void scan_record_type (const_tree, int *, int *, int *);
520 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
521 const_tree, bool, bool, int *, int *);
523 static int supersparc_adjust_cost (rtx, rtx, rtx, int);
524 static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
526 static void sparc_emit_set_const32 (rtx, rtx);
527 static void sparc_emit_set_const64 (rtx, rtx);
528 static void sparc_output_addr_vec (rtx);
529 static void sparc_output_addr_diff_vec (rtx);
530 static void sparc_output_deferred_case_vectors (void);
531 static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
532 static bool sparc_legitimate_constant_p (enum machine_mode, rtx);
533 static rtx sparc_builtin_saveregs (void);
534 static int epilogue_renumber (rtx *, int);
535 static bool sparc_assemble_integer (rtx, unsigned int, int);
536 static int set_extends (rtx);
537 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
538 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
539 #ifdef TARGET_SOLARIS
540 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
541 tree) ATTRIBUTE_UNUSED;
542 #endif
543 static int sparc_adjust_cost (rtx, rtx, rtx, int);
544 static int sparc_issue_rate (void);
545 static void sparc_sched_init (FILE *, int, int);
546 static int sparc_use_sched_lookahead (void);
548 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
549 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
550 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
551 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
552 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
554 static bool sparc_function_ok_for_sibcall (tree, tree);
555 static void sparc_init_libfuncs (void);
556 static void sparc_init_builtins (void);
557 static void sparc_vis_init_builtins (void);
558 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
559 static tree sparc_fold_builtin (tree, int, tree *, bool);
560 static int sparc_vis_mul8x16 (int, int);
561 static void sparc_handle_vis_mul8x16 (tree *, int, tree, tree, tree);
562 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
563 HOST_WIDE_INT, tree);
564 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
565 HOST_WIDE_INT, const_tree);
566 static struct machine_function * sparc_init_machine_status (void);
567 static bool sparc_cannot_force_const_mem (enum machine_mode, rtx);
568 static rtx sparc_tls_get_addr (void);
569 static rtx sparc_tls_got (void);
570 static const char *get_some_local_dynamic_name (void);
571 static int get_some_local_dynamic_name_1 (rtx *, void *);
572 static int sparc_register_move_cost (enum machine_mode,
573 reg_class_t, reg_class_t);
574 static bool sparc_rtx_costs (rtx, int, int, int, int *, bool);
575 static rtx sparc_function_value (const_tree, const_tree, bool);
576 static rtx sparc_libcall_value (enum machine_mode, const_rtx);
577 static bool sparc_function_value_regno_p (const unsigned int);
578 static rtx sparc_struct_value_rtx (tree, int);
579 static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode,
580 int *, const_tree, int);
581 static bool sparc_return_in_memory (const_tree, const_tree);
582 static bool sparc_strict_argument_naming (cumulative_args_t);
583 static void sparc_va_start (tree, rtx);
584 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
585 static bool sparc_vector_mode_supported_p (enum machine_mode);
586 static bool sparc_tls_referenced_p (rtx);
587 static rtx sparc_legitimize_tls_address (rtx);
588 static rtx sparc_legitimize_pic_address (rtx, rtx);
589 static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
590 static rtx sparc_delegitimize_address (rtx);
591 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
592 static bool sparc_pass_by_reference (cumulative_args_t,
593 enum machine_mode, const_tree, bool);
594 static void sparc_function_arg_advance (cumulative_args_t,
595 enum machine_mode, const_tree, bool);
596 static rtx sparc_function_arg_1 (cumulative_args_t,
597 enum machine_mode, const_tree, bool, bool);
598 static rtx sparc_function_arg (cumulative_args_t,
599 enum machine_mode, const_tree, bool);
600 static rtx sparc_function_incoming_arg (cumulative_args_t,
601 enum machine_mode, const_tree, bool);
602 static unsigned int sparc_function_arg_boundary (enum machine_mode,
603 const_tree);
604 static int sparc_arg_partial_bytes (cumulative_args_t,
605 enum machine_mode, tree, bool);
606 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
607 static void sparc_file_end (void);
608 static bool sparc_frame_pointer_required (void);
609 static bool sparc_can_eliminate (const int, const int);
610 static rtx sparc_builtin_setjmp_frame_value (void);
611 static void sparc_conditional_register_usage (void);
612 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
613 static const char *sparc_mangle_type (const_tree);
614 #endif
615 static void sparc_trampoline_init (rtx, tree, rtx);
616 static enum machine_mode sparc_preferred_simd_mode (enum machine_mode);
617 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
618 static bool sparc_print_operand_punct_valid_p (unsigned char);
619 static void sparc_print_operand (FILE *, rtx, int);
620 static void sparc_print_operand_address (FILE *, rtx);
621 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
622 enum machine_mode,
623 secondary_reload_info *);
625 #ifdef SUBTARGET_ATTRIBUTE_TABLE
626 /* Table of valid machine attributes. */
627 static const struct attribute_spec sparc_attribute_table[] =
629 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
630 do_diagnostic } */
631 SUBTARGET_ATTRIBUTE_TABLE,
632 { NULL, 0, 0, false, false, false, NULL, false }
634 #endif
636 /* Option handling. */
638 /* Parsed value. */
639 enum cmodel sparc_cmodel;
641 char sparc_hard_reg_printed[8];
643 /* Initialize the GCC target structure. */
645 /* The default is to use .half rather than .short for aligned HI objects. */
646 #undef TARGET_ASM_ALIGNED_HI_OP
647 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
649 #undef TARGET_ASM_UNALIGNED_HI_OP
650 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
651 #undef TARGET_ASM_UNALIGNED_SI_OP
652 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
653 #undef TARGET_ASM_UNALIGNED_DI_OP
654 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
656 /* The target hook has to handle DI-mode values. */
657 #undef TARGET_ASM_INTEGER
658 #define TARGET_ASM_INTEGER sparc_assemble_integer
660 #undef TARGET_ASM_FUNCTION_PROLOGUE
661 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
662 #undef TARGET_ASM_FUNCTION_EPILOGUE
663 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
665 #undef TARGET_SCHED_ADJUST_COST
666 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
667 #undef TARGET_SCHED_ISSUE_RATE
668 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
669 #undef TARGET_SCHED_INIT
670 #define TARGET_SCHED_INIT sparc_sched_init
671 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
672 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
674 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
675 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
677 #undef TARGET_INIT_LIBFUNCS
678 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
679 #undef TARGET_INIT_BUILTINS
680 #define TARGET_INIT_BUILTINS sparc_init_builtins
682 #undef TARGET_LEGITIMIZE_ADDRESS
683 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
684 #undef TARGET_DELEGITIMIZE_ADDRESS
685 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
686 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
687 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
689 #undef TARGET_EXPAND_BUILTIN
690 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
691 #undef TARGET_FOLD_BUILTIN
692 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
694 #if TARGET_TLS
695 #undef TARGET_HAVE_TLS
696 #define TARGET_HAVE_TLS true
697 #endif
699 #undef TARGET_CANNOT_FORCE_CONST_MEM
700 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
702 #undef TARGET_ASM_OUTPUT_MI_THUNK
703 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
704 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
705 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
707 #undef TARGET_RTX_COSTS
708 #define TARGET_RTX_COSTS sparc_rtx_costs
709 #undef TARGET_ADDRESS_COST
710 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
711 #undef TARGET_REGISTER_MOVE_COST
712 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
714 #undef TARGET_PROMOTE_FUNCTION_MODE
715 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
717 #undef TARGET_FUNCTION_VALUE
718 #define TARGET_FUNCTION_VALUE sparc_function_value
719 #undef TARGET_LIBCALL_VALUE
720 #define TARGET_LIBCALL_VALUE sparc_libcall_value
721 #undef TARGET_FUNCTION_VALUE_REGNO_P
722 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
724 #undef TARGET_STRUCT_VALUE_RTX
725 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
726 #undef TARGET_RETURN_IN_MEMORY
727 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
728 #undef TARGET_MUST_PASS_IN_STACK
729 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
730 #undef TARGET_PASS_BY_REFERENCE
731 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
732 #undef TARGET_ARG_PARTIAL_BYTES
733 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
734 #undef TARGET_FUNCTION_ARG_ADVANCE
735 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
736 #undef TARGET_FUNCTION_ARG
737 #define TARGET_FUNCTION_ARG sparc_function_arg
738 #undef TARGET_FUNCTION_INCOMING_ARG
739 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
740 #undef TARGET_FUNCTION_ARG_BOUNDARY
741 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
743 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
744 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
745 #undef TARGET_STRICT_ARGUMENT_NAMING
746 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
748 #undef TARGET_EXPAND_BUILTIN_VA_START
749 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
750 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
751 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
753 #undef TARGET_VECTOR_MODE_SUPPORTED_P
754 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
756 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
757 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
759 #ifdef SUBTARGET_INSERT_ATTRIBUTES
760 #undef TARGET_INSERT_ATTRIBUTES
761 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
762 #endif
764 #ifdef SUBTARGET_ATTRIBUTE_TABLE
765 #undef TARGET_ATTRIBUTE_TABLE
766 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
767 #endif
769 #undef TARGET_RELAXED_ORDERING
770 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
772 #undef TARGET_OPTION_OVERRIDE
773 #define TARGET_OPTION_OVERRIDE sparc_option_override
775 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
776 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
777 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
778 #endif
780 #undef TARGET_ASM_FILE_END
781 #define TARGET_ASM_FILE_END sparc_file_end
783 #undef TARGET_FRAME_POINTER_REQUIRED
784 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
786 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
787 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
789 #undef TARGET_CAN_ELIMINATE
790 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
792 #undef TARGET_PREFERRED_RELOAD_CLASS
793 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
795 #undef TARGET_SECONDARY_RELOAD
796 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
798 #undef TARGET_CONDITIONAL_REGISTER_USAGE
799 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
801 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
802 #undef TARGET_MANGLE_TYPE
803 #define TARGET_MANGLE_TYPE sparc_mangle_type
804 #endif
806 #undef TARGET_LEGITIMATE_ADDRESS_P
807 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
809 #undef TARGET_LEGITIMATE_CONSTANT_P
810 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
812 #undef TARGET_TRAMPOLINE_INIT
813 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
815 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
816 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
817 #undef TARGET_PRINT_OPERAND
818 #define TARGET_PRINT_OPERAND sparc_print_operand
819 #undef TARGET_PRINT_OPERAND_ADDRESS
820 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
822 /* The value stored by LDSTUB. */
823 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
824 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
826 struct gcc_target targetm = TARGET_INITIALIZER;
828 /* Return the memory reference contained in X if any, zero otherwise. */
830 static rtx
831 mem_ref (rtx x)
833 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
834 x = XEXP (x, 0);
836 if (MEM_P (x))
837 return x;
839 return NULL_RTX;
842 /* We use a machine specific pass to enable workarounds for errata.
843 We need to have the (essentially) final form of the insn stream in order
844 to properly detect the various hazards. Therefore, this machine specific
845 pass runs as late as possible. The pass is inserted in the pass pipeline
846 at the end of sparc_option_override. */
848 static bool
849 sparc_gate_work_around_errata (void)
851 /* The only errata we handle are those of the AT697F and UT699. */
852 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
855 static unsigned int
856 sparc_do_work_around_errata (void)
858 rtx insn, next;
860 /* Force all instructions to be split into their final form. */
861 split_all_insns_noflow ();
863 /* Now look for specific patterns in the insn stream. */
864 for (insn = get_insns (); insn; insn = next)
866 bool insert_nop = false;
867 rtx set;
869 /* Look into the instruction in a delay slot. */
870 if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
871 insn = XVECEXP (PATTERN (insn), 0, 1);
873 /* Look for a single-word load into an odd-numbered FP register. */
874 if (sparc_fix_at697f
875 && NONJUMP_INSN_P (insn)
876 && (set = single_set (insn)) != NULL_RTX
877 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
878 && MEM_P (SET_SRC (set))
879 && REG_P (SET_DEST (set))
880 && REGNO (SET_DEST (set)) > 31
881 && REGNO (SET_DEST (set)) % 2 != 0)
883 /* The wrong dependency is on the enclosing double register. */
884 const unsigned int x = REGNO (SET_DEST (set)) - 1;
885 unsigned int src1, src2, dest;
886 int code;
888 next = next_active_insn (insn);
889 if (!next)
890 break;
891 /* If the insn is a branch, then it cannot be problematic. */
892 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
893 continue;
895 extract_insn (next);
896 code = INSN_CODE (next);
898 switch (code)
900 case CODE_FOR_adddf3:
901 case CODE_FOR_subdf3:
902 case CODE_FOR_muldf3:
903 case CODE_FOR_divdf3:
904 dest = REGNO (recog_data.operand[0]);
905 src1 = REGNO (recog_data.operand[1]);
906 src2 = REGNO (recog_data.operand[2]);
907 if (src1 != src2)
909 /* Case [1-4]:
910 ld [address], %fx+1
911 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
912 if ((src1 == x || src2 == x)
913 && (dest == src1 || dest == src2))
914 insert_nop = true;
916 else
918 /* Case 5:
919 ld [address], %fx+1
920 FPOPd %fx, %fx, %fx */
921 if (src1 == x
922 && dest == src1
923 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
924 insert_nop = true;
926 break;
928 case CODE_FOR_sqrtdf2:
929 dest = REGNO (recog_data.operand[0]);
930 src1 = REGNO (recog_data.operand[1]);
931 /* Case 6:
932 ld [address], %fx+1
933 fsqrtd %fx, %fx */
934 if (src1 == x && dest == src1)
935 insert_nop = true;
936 break;
938 default:
939 break;
943 /* Look for a single-word load into an integer register. */
944 else if (sparc_fix_ut699
945 && NONJUMP_INSN_P (insn)
946 && (set = single_set (insn)) != NULL_RTX
947 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
948 && mem_ref (SET_SRC (set)) != NULL_RTX
949 && REG_P (SET_DEST (set))
950 && REGNO (SET_DEST (set)) < 32)
952 /* There is no problem if the second memory access has a data
953 dependency on the first single-cycle load. */
954 rtx x = SET_DEST (set);
956 next = next_active_insn (insn);
957 if (!next)
958 break;
959 /* If the insn is a branch, then it cannot be problematic. */
960 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
961 continue;
963 /* Look for a second memory access to/from an integer register. */
964 if ((set = single_set (next)) != NULL_RTX)
966 rtx src = SET_SRC (set);
967 rtx dest = SET_DEST (set);
968 rtx mem;
970 /* LDD is affected. */
971 if ((mem = mem_ref (src)) != NULL_RTX
972 && REG_P (dest)
973 && REGNO (dest) < 32
974 && !reg_mentioned_p (x, XEXP (mem, 0)))
975 insert_nop = true;
977 /* STD is *not* affected. */
978 else if (MEM_P (dest)
979 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
980 && (src == CONST0_RTX (GET_MODE (dest))
981 || (REG_P (src)
982 && REGNO (src) < 32
983 && REGNO (src) != REGNO (x)))
984 && !reg_mentioned_p (x, XEXP (dest, 0)))
985 insert_nop = true;
989 /* Look for a single-word load/operation into an FP register. */
990 else if (sparc_fix_ut699
991 && NONJUMP_INSN_P (insn)
992 && (set = single_set (insn)) != NULL_RTX
993 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
994 && REG_P (SET_DEST (set))
995 && REGNO (SET_DEST (set)) > 31)
997 /* Number of instructions in the problematic window. */
998 const int n_insns = 4;
999 /* The problematic combination is with the sibling FP register. */
1000 const unsigned int x = REGNO (SET_DEST (set));
1001 const unsigned int y = x ^ 1;
1002 rtx after;
1003 int i;
1005 next = next_active_insn (insn);
1006 if (!next)
1007 break;
1008 /* If the insn is a branch, then it cannot be problematic. */
1009 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1010 continue;
1012 /* Look for a second load/operation into the sibling FP register. */
1013 if (!((set = single_set (next)) != NULL_RTX
1014 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1015 && REG_P (SET_DEST (set))
1016 && REGNO (SET_DEST (set)) == y))
1017 continue;
1019 /* Look for a (possible) store from the FP register in the next N
1020 instructions, but bail out if it is again modified or if there
1021 is a store from the sibling FP register before this store. */
1022 for (after = next, i = 0; i < n_insns; i++)
1024 bool branch_p;
1026 after = next_active_insn (after);
1027 if (!after)
1028 break;
1030 /* This is a branch with an empty delay slot. */
1031 if (!NONJUMP_INSN_P (after))
1033 if (++i == n_insns)
1034 break;
1035 branch_p = true;
1036 after = NULL_RTX;
1038 /* This is a branch with a filled delay slot. */
1039 else if (GET_CODE (PATTERN (after)) == SEQUENCE)
1041 if (++i == n_insns)
1042 break;
1043 branch_p = true;
1044 after = XVECEXP (PATTERN (after), 0, 1);
1046 /* This is a regular instruction. */
1047 else
1048 branch_p = false;
1050 if (after && (set = single_set (after)) != NULL_RTX)
1052 const rtx src = SET_SRC (set);
1053 const rtx dest = SET_DEST (set);
1054 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1056 /* If the FP register is again modified before the store,
1057 then the store isn't affected. */
1058 if (REG_P (dest)
1059 && (REGNO (dest) == x
1060 || (REGNO (dest) == y && size == 8)))
1061 break;
1063 if (MEM_P (dest) && REG_P (src))
1065 /* If there is a store from the sibling FP register
1066 before the store, then the store is not affected. */
1067 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1068 break;
1070 /* Otherwise, the store is affected. */
1071 if (REGNO (src) == x && size == 4)
1073 insert_nop = true;
1074 break;
1079 /* If we have a branch in the first M instructions, then we
1080 cannot see the (M+2)th instruction so we play safe. */
1081 if (branch_p && i <= (n_insns - 2))
1083 insert_nop = true;
1084 break;
1089 else
1090 next = NEXT_INSN (insn);
1092 if (insert_nop)
1093 emit_insn_before (gen_nop (), next);
1096 return 0;
1099 struct rtl_opt_pass pass_work_around_errata =
1102 RTL_PASS,
1103 "errata", /* name */
1104 OPTGROUP_NONE, /* optinfo_flags */
1105 sparc_gate_work_around_errata, /* gate */
1106 sparc_do_work_around_errata, /* execute */
1107 NULL, /* sub */
1108 NULL, /* next */
1109 0, /* static_pass_number */
1110 TV_MACH_DEP, /* tv_id */
1111 0, /* properties_required */
1112 0, /* properties_provided */
1113 0, /* properties_destroyed */
1114 0, /* todo_flags_start */
1115 TODO_verify_rtl_sharing, /* todo_flags_finish */
1119 struct register_pass_info insert_pass_work_around_errata =
1121 &pass_work_around_errata.pass, /* pass */
1122 "dbr", /* reference_pass_name */
1123 1, /* ref_pass_instance_number */
1124 PASS_POS_INSERT_AFTER /* po_op */
1127 /* Helpers for TARGET_DEBUG_OPTIONS. */
1128 static void
1129 dump_target_flag_bits (const int flags)
1131 if (flags & MASK_64BIT)
1132 fprintf (stderr, "64BIT ");
1133 if (flags & MASK_APP_REGS)
1134 fprintf (stderr, "APP_REGS ");
1135 if (flags & MASK_FASTER_STRUCTS)
1136 fprintf (stderr, "FASTER_STRUCTS ");
1137 if (flags & MASK_FLAT)
1138 fprintf (stderr, "FLAT ");
1139 if (flags & MASK_FMAF)
1140 fprintf (stderr, "FMAF ");
1141 if (flags & MASK_FPU)
1142 fprintf (stderr, "FPU ");
1143 if (flags & MASK_HARD_QUAD)
1144 fprintf (stderr, "HARD_QUAD ");
1145 if (flags & MASK_POPC)
1146 fprintf (stderr, "POPC ");
1147 if (flags & MASK_PTR64)
1148 fprintf (stderr, "PTR64 ");
1149 if (flags & MASK_STACK_BIAS)
1150 fprintf (stderr, "STACK_BIAS ");
1151 if (flags & MASK_UNALIGNED_DOUBLES)
1152 fprintf (stderr, "UNALIGNED_DOUBLES ");
1153 if (flags & MASK_V8PLUS)
1154 fprintf (stderr, "V8PLUS ");
1155 if (flags & MASK_VIS)
1156 fprintf (stderr, "VIS ");
1157 if (flags & MASK_VIS2)
1158 fprintf (stderr, "VIS2 ");
1159 if (flags & MASK_VIS3)
1160 fprintf (stderr, "VIS3 ");
1161 if (flags & MASK_CBCOND)
1162 fprintf (stderr, "CBCOND ");
1163 if (flags & MASK_DEPRECATED_V8_INSNS)
1164 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1165 if (flags & MASK_SPARCLET)
1166 fprintf (stderr, "SPARCLET ");
1167 if (flags & MASK_SPARCLITE)
1168 fprintf (stderr, "SPARCLITE ");
1169 if (flags & MASK_V8)
1170 fprintf (stderr, "V8 ");
1171 if (flags & MASK_V9)
1172 fprintf (stderr, "V9 ");
1175 static void
1176 dump_target_flags (const char *prefix, const int flags)
1178 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1179 dump_target_flag_bits (flags);
1180 fprintf(stderr, "]\n");
1183 /* Validate and override various options, and do some machine dependent
1184 initialization. */
1186 static void
1187 sparc_option_override (void)
1189 static struct code_model {
1190 const char *const name;
1191 const enum cmodel value;
1192 } const cmodels[] = {
1193 { "32", CM_32 },
1194 { "medlow", CM_MEDLOW },
1195 { "medmid", CM_MEDMID },
1196 { "medany", CM_MEDANY },
1197 { "embmedany", CM_EMBMEDANY },
1198 { NULL, (enum cmodel) 0 }
1200 const struct code_model *cmodel;
1201 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1202 static struct cpu_default {
1203 const int cpu;
1204 const enum processor_type processor;
1205 } const cpu_default[] = {
1206 /* There must be one entry here for each TARGET_CPU value. */
1207 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1208 { TARGET_CPU_v8, PROCESSOR_V8 },
1209 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1210 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1211 { TARGET_CPU_leon, PROCESSOR_LEON },
1212 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1213 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1214 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1215 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1216 { TARGET_CPU_v9, PROCESSOR_V9 },
1217 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1218 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1219 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1220 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1221 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1222 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1223 { -1, PROCESSOR_V7 }
1225 const struct cpu_default *def;
1226 /* Table of values for -m{cpu,tune}=. This must match the order of
1227 the enum processor_type in sparc-opts.h. */
1228 static struct cpu_table {
1229 const char *const name;
1230 const int disable;
1231 const int enable;
1232 } const cpu_table[] = {
1233 { "v7", MASK_ISA, 0 },
1234 { "cypress", MASK_ISA, 0 },
1235 { "v8", MASK_ISA, MASK_V8 },
1236 /* TI TMS390Z55 supersparc */
1237 { "supersparc", MASK_ISA, MASK_V8 },
1238 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1239 { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
1240 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
1241 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1242 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1243 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1244 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1245 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1246 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1247 { "sparclet", MASK_ISA, MASK_SPARCLET },
1248 /* TEMIC sparclet */
1249 { "tsc701", MASK_ISA, MASK_SPARCLET },
1250 { "v9", MASK_ISA, MASK_V9 },
1251 /* UltraSPARC I, II, IIi */
1252 { "ultrasparc", MASK_ISA,
1253 /* Although insns using %y are deprecated, it is a clear win. */
1254 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1255 /* UltraSPARC III */
1256 /* ??? Check if %y issue still holds true. */
1257 { "ultrasparc3", MASK_ISA,
1258 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1259 /* UltraSPARC T1 */
1260 { "niagara", MASK_ISA,
1261 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1262 /* UltraSPARC T2 */
1263 { "niagara2", MASK_ISA,
1264 MASK_V9|MASK_POPC|MASK_VIS2 },
1265 /* UltraSPARC T3 */
1266 { "niagara3", MASK_ISA,
1267 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
1268 /* UltraSPARC T4 */
1269 { "niagara4", MASK_ISA,
1270 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1272 const struct cpu_table *cpu;
1273 unsigned int i;
1274 int fpu;
1276 if (sparc_debug_string != NULL)
1278 const char *q;
1279 char *p;
1281 p = ASTRDUP (sparc_debug_string);
1282 while ((q = strtok (p, ",")) != NULL)
1284 bool invert;
1285 int mask;
1287 p = NULL;
1288 if (*q == '!')
1290 invert = true;
1291 q++;
1293 else
1294 invert = false;
1296 if (! strcmp (q, "all"))
1297 mask = MASK_DEBUG_ALL;
1298 else if (! strcmp (q, "options"))
1299 mask = MASK_DEBUG_OPTIONS;
1300 else
1301 error ("unknown -mdebug-%s switch", q);
1303 if (invert)
1304 sparc_debug &= ~mask;
1305 else
1306 sparc_debug |= mask;
1310 if (TARGET_DEBUG_OPTIONS)
1312 dump_target_flags("Initial target_flags", target_flags);
1313 dump_target_flags("target_flags_explicit", target_flags_explicit);
1316 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1317 SUBTARGET_OVERRIDE_OPTIONS;
1318 #endif
1320 #ifndef SPARC_BI_ARCH
1321 /* Check for unsupported architecture size. */
1322 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1323 error ("%s is not supported by this configuration",
1324 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1325 #endif
1327 /* We force all 64bit archs to use 128 bit long double */
1328 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1330 error ("-mlong-double-64 not allowed with -m64");
1331 target_flags |= MASK_LONG_DOUBLE_128;
1334 /* Code model selection. */
1335 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1337 #ifdef SPARC_BI_ARCH
1338 if (TARGET_ARCH32)
1339 sparc_cmodel = CM_32;
1340 #endif
1342 if (sparc_cmodel_string != NULL)
1344 if (TARGET_ARCH64)
1346 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1347 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1348 break;
1349 if (cmodel->name == NULL)
1350 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1351 else
1352 sparc_cmodel = cmodel->value;
1354 else
1355 error ("-mcmodel= is not supported on 32 bit systems");
1358 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1359 for (i = 8; i < 16; i++)
1360 if (!call_used_regs [i])
1362 error ("-fcall-saved-REG is not supported for out registers");
1363 call_used_regs [i] = 1;
1366 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1368 /* Set the default CPU. */
1369 if (!global_options_set.x_sparc_cpu_and_features)
1371 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1372 if (def->cpu == TARGET_CPU_DEFAULT)
1373 break;
1374 gcc_assert (def->cpu != -1);
1375 sparc_cpu_and_features = def->processor;
1378 if (!global_options_set.x_sparc_cpu)
1379 sparc_cpu = sparc_cpu_and_features;
1381 cpu = &cpu_table[(int) sparc_cpu_and_features];
1383 if (TARGET_DEBUG_OPTIONS)
1385 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1386 fprintf (stderr, "sparc_cpu: %s\n",
1387 cpu_table[(int) sparc_cpu].name);
1388 dump_target_flags ("cpu->disable", cpu->disable);
1389 dump_target_flags ("cpu->enable", cpu->enable);
1392 target_flags &= ~cpu->disable;
1393 target_flags |= (cpu->enable
1394 #ifndef HAVE_AS_FMAF_HPC_VIS3
1395 & ~(MASK_FMAF | MASK_VIS3)
1396 #endif
1397 #ifndef HAVE_AS_SPARC4
1398 & ~MASK_CBCOND
1399 #endif
1400 #ifndef HAVE_AS_LEON
1401 & ~(MASK_LEON | MASK_LEON3)
1402 #endif
1405 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1406 the processor default. */
1407 if (target_flags_explicit & MASK_FPU)
1408 target_flags = (target_flags & ~MASK_FPU) | fpu;
1410 /* -mvis2 implies -mvis */
1411 if (TARGET_VIS2)
1412 target_flags |= MASK_VIS;
1414 /* -mvis3 implies -mvis2 and -mvis */
1415 if (TARGET_VIS3)
1416 target_flags |= MASK_VIS2 | MASK_VIS;
1418 /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is
1419 disabled. */
1420 if (! TARGET_FPU)
1421 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF);
1423 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1424 are available.
1425 -m64 also implies v9. */
1426 if (TARGET_VIS || TARGET_ARCH64)
1428 target_flags |= MASK_V9;
1429 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1432 /* -mvis also implies -mv8plus on 32-bit */
1433 if (TARGET_VIS && ! TARGET_ARCH64)
1434 target_flags |= MASK_V8PLUS;
1436 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1437 if (TARGET_V9 && TARGET_ARCH32)
1438 target_flags |= MASK_DEPRECATED_V8_INSNS;
1440 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1441 if (! TARGET_V9 || TARGET_ARCH64)
1442 target_flags &= ~MASK_V8PLUS;
1444 /* Don't use stack biasing in 32 bit mode. */
1445 if (TARGET_ARCH32)
1446 target_flags &= ~MASK_STACK_BIAS;
1448 /* Supply a default value for align_functions. */
1449 if (align_functions == 0
1450 && (sparc_cpu == PROCESSOR_ULTRASPARC
1451 || sparc_cpu == PROCESSOR_ULTRASPARC3
1452 || sparc_cpu == PROCESSOR_NIAGARA
1453 || sparc_cpu == PROCESSOR_NIAGARA2
1454 || sparc_cpu == PROCESSOR_NIAGARA3
1455 || sparc_cpu == PROCESSOR_NIAGARA4))
1456 align_functions = 32;
1458 /* Validate PCC_STRUCT_RETURN. */
1459 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1460 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1462 /* Only use .uaxword when compiling for a 64-bit target. */
1463 if (!TARGET_ARCH64)
1464 targetm.asm_out.unaligned_op.di = NULL;
1466 /* Do various machine dependent initializations. */
1467 sparc_init_modes ();
1469 /* Set up function hooks. */
1470 init_machine_status = sparc_init_machine_status;
1472 switch (sparc_cpu)
1474 case PROCESSOR_V7:
1475 case PROCESSOR_CYPRESS:
1476 sparc_costs = &cypress_costs;
1477 break;
1478 case PROCESSOR_V8:
1479 case PROCESSOR_SPARCLITE:
1480 case PROCESSOR_SUPERSPARC:
1481 sparc_costs = &supersparc_costs;
1482 break;
1483 case PROCESSOR_F930:
1484 case PROCESSOR_F934:
1485 case PROCESSOR_HYPERSPARC:
1486 case PROCESSOR_SPARCLITE86X:
1487 sparc_costs = &hypersparc_costs;
1488 break;
1489 case PROCESSOR_LEON:
1490 sparc_costs = &leon_costs;
1491 break;
1492 case PROCESSOR_LEON3:
1493 sparc_costs = &leon3_costs;
1494 break;
1495 case PROCESSOR_SPARCLET:
1496 case PROCESSOR_TSC701:
1497 sparc_costs = &sparclet_costs;
1498 break;
1499 case PROCESSOR_V9:
1500 case PROCESSOR_ULTRASPARC:
1501 sparc_costs = &ultrasparc_costs;
1502 break;
1503 case PROCESSOR_ULTRASPARC3:
1504 sparc_costs = &ultrasparc3_costs;
1505 break;
1506 case PROCESSOR_NIAGARA:
1507 sparc_costs = &niagara_costs;
1508 break;
1509 case PROCESSOR_NIAGARA2:
1510 sparc_costs = &niagara2_costs;
1511 break;
1512 case PROCESSOR_NIAGARA3:
1513 sparc_costs = &niagara3_costs;
1514 break;
1515 case PROCESSOR_NIAGARA4:
1516 sparc_costs = &niagara4_costs;
1517 break;
1518 case PROCESSOR_NATIVE:
1519 gcc_unreachable ();
1522 if (sparc_memory_model == SMM_DEFAULT)
1524 /* Choose the memory model for the operating system. */
1525 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1526 if (os_default != SMM_DEFAULT)
1527 sparc_memory_model = os_default;
1528 /* Choose the most relaxed model for the processor. */
1529 else if (TARGET_V9)
1530 sparc_memory_model = SMM_RMO;
1531 else if (TARGET_LEON3)
1532 sparc_memory_model = SMM_TSO;
1533 else if (TARGET_LEON)
1534 sparc_memory_model = SMM_SC;
1535 else if (TARGET_V8)
1536 sparc_memory_model = SMM_PSO;
1537 else
1538 sparc_memory_model = SMM_SC;
1541 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1542 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1543 target_flags |= MASK_LONG_DOUBLE_128;
1544 #endif
1546 if (TARGET_DEBUG_OPTIONS)
1547 dump_target_flags ("Final target_flags", target_flags);
1549 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1550 ((sparc_cpu == PROCESSOR_ULTRASPARC
1551 || sparc_cpu == PROCESSOR_NIAGARA
1552 || sparc_cpu == PROCESSOR_NIAGARA2
1553 || sparc_cpu == PROCESSOR_NIAGARA3
1554 || sparc_cpu == PROCESSOR_NIAGARA4)
1556 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1557 ? 8 : 3)),
1558 global_options.x_param_values,
1559 global_options_set.x_param_values);
1560 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1561 ((sparc_cpu == PROCESSOR_ULTRASPARC
1562 || sparc_cpu == PROCESSOR_ULTRASPARC3
1563 || sparc_cpu == PROCESSOR_NIAGARA
1564 || sparc_cpu == PROCESSOR_NIAGARA2
1565 || sparc_cpu == PROCESSOR_NIAGARA3
1566 || sparc_cpu == PROCESSOR_NIAGARA4)
1567 ? 64 : 32),
1568 global_options.x_param_values,
1569 global_options_set.x_param_values);
1571 /* Disable save slot sharing for call-clobbered registers by default.
1572 The IRA sharing algorithm works on single registers only and this
1573 pessimizes for double floating-point registers. */
1574 if (!global_options_set.x_flag_ira_share_save_slots)
1575 flag_ira_share_save_slots = 0;
1577 /* We register a machine specific pass to work around errata, if any.
1578 The pass mut be scheduled as late as possible so that we have the
1579 (essentially) final form of the insn stream to work on.
1580 Registering the pass must be done at start up. It's convenient to
1581 do it here. */
1582 register_pass (&insert_pass_work_around_errata);
1585 /* Miscellaneous utilities. */
1587 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1588 or branch on register contents instructions. */
1591 v9_regcmp_p (enum rtx_code code)
1593 return (code == EQ || code == NE || code == GE || code == LT
1594 || code == LE || code == GT);
1597 /* Nonzero if OP is a floating point constant which can
1598 be loaded into an integer register using a single
1599 sethi instruction. */
1602 fp_sethi_p (rtx op)
1604 if (GET_CODE (op) == CONST_DOUBLE)
1606 REAL_VALUE_TYPE r;
1607 long i;
1609 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1610 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1611 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1614 return 0;
1617 /* Nonzero if OP is a floating point constant which can
1618 be loaded into an integer register using a single
1619 mov instruction. */
1622 fp_mov_p (rtx op)
1624 if (GET_CODE (op) == CONST_DOUBLE)
1626 REAL_VALUE_TYPE r;
1627 long i;
1629 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1630 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1631 return SPARC_SIMM13_P (i);
1634 return 0;
1637 /* Nonzero if OP is a floating point constant which can
1638 be loaded into an integer register using a high/losum
1639 instruction sequence. */
1642 fp_high_losum_p (rtx op)
1644 /* The constraints calling this should only be in
1645 SFmode move insns, so any constant which cannot
1646 be moved using a single insn will do. */
1647 if (GET_CODE (op) == CONST_DOUBLE)
1649 REAL_VALUE_TYPE r;
1650 long i;
1652 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1653 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1654 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1657 return 0;
1660 /* Return true if the address of LABEL can be loaded by means of the
1661 mov{si,di}_pic_label_ref patterns in PIC mode. */
1663 static bool
1664 can_use_mov_pic_label_ref (rtx label)
1666 /* VxWorks does not impose a fixed gap between segments; the run-time
1667 gap can be different from the object-file gap. We therefore can't
1668 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1669 are absolutely sure that X is in the same segment as the GOT.
1670 Unfortunately, the flexibility of linker scripts means that we
1671 can't be sure of that in general, so assume that GOT-relative
1672 accesses are never valid on VxWorks. */
1673 if (TARGET_VXWORKS_RTP)
1674 return false;
1676 /* Similarly, if the label is non-local, it might end up being placed
1677 in a different section than the current one; now mov_pic_label_ref
1678 requires the label and the code to be in the same section. */
1679 if (LABEL_REF_NONLOCAL_P (label))
1680 return false;
1682 /* Finally, if we are reordering basic blocks and partition into hot
1683 and cold sections, this might happen for any label. */
1684 if (flag_reorder_blocks_and_partition)
1685 return false;
1687 return true;
1690 /* Expand a move instruction. Return true if all work is done. */
1692 bool
1693 sparc_expand_move (enum machine_mode mode, rtx *operands)
1695 /* Handle sets of MEM first. */
1696 if (GET_CODE (operands[0]) == MEM)
1698 /* 0 is a register (or a pair of registers) on SPARC. */
1699 if (register_or_zero_operand (operands[1], mode))
1700 return false;
1702 if (!reload_in_progress)
1704 operands[0] = validize_mem (operands[0]);
1705 operands[1] = force_reg (mode, operands[1]);
1709 /* Fixup TLS cases. */
1710 if (TARGET_HAVE_TLS
1711 && CONSTANT_P (operands[1])
1712 && sparc_tls_referenced_p (operands [1]))
1714 operands[1] = sparc_legitimize_tls_address (operands[1]);
1715 return false;
1718 /* Fixup PIC cases. */
1719 if (flag_pic && CONSTANT_P (operands[1]))
1721 if (pic_address_needs_scratch (operands[1]))
1722 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1724 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1725 if (GET_CODE (operands[1]) == LABEL_REF
1726 && can_use_mov_pic_label_ref (operands[1]))
1728 if (mode == SImode)
1730 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1731 return true;
1734 if (mode == DImode)
1736 gcc_assert (TARGET_ARCH64);
1737 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1738 return true;
1742 if (symbolic_operand (operands[1], mode))
1744 operands[1]
1745 = sparc_legitimize_pic_address (operands[1],
1746 reload_in_progress
1747 ? operands[0] : NULL_RTX);
1748 return false;
1752 /* If we are trying to toss an integer constant into FP registers,
1753 or loading a FP or vector constant, force it into memory. */
1754 if (CONSTANT_P (operands[1])
1755 && REG_P (operands[0])
1756 && (SPARC_FP_REG_P (REGNO (operands[0]))
1757 || SCALAR_FLOAT_MODE_P (mode)
1758 || VECTOR_MODE_P (mode)))
1760 /* emit_group_store will send such bogosity to us when it is
1761 not storing directly into memory. So fix this up to avoid
1762 crashes in output_constant_pool. */
1763 if (operands [1] == const0_rtx)
1764 operands[1] = CONST0_RTX (mode);
1766 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1767 always other regs. */
1768 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1769 && (const_zero_operand (operands[1], mode)
1770 || const_all_ones_operand (operands[1], mode)))
1771 return false;
1773 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1774 /* We are able to build any SF constant in integer registers
1775 with at most 2 instructions. */
1776 && (mode == SFmode
1777 /* And any DF constant in integer registers. */
1778 || (mode == DFmode
1779 && ! can_create_pseudo_p ())))
1780 return false;
1782 operands[1] = force_const_mem (mode, operands[1]);
1783 if (!reload_in_progress)
1784 operands[1] = validize_mem (operands[1]);
1785 return false;
1788 /* Accept non-constants and valid constants unmodified. */
1789 if (!CONSTANT_P (operands[1])
1790 || GET_CODE (operands[1]) == HIGH
1791 || input_operand (operands[1], mode))
1792 return false;
1794 switch (mode)
1796 case QImode:
1797 /* All QImode constants require only one insn, so proceed. */
1798 break;
1800 case HImode:
1801 case SImode:
1802 sparc_emit_set_const32 (operands[0], operands[1]);
1803 return true;
1805 case DImode:
1806 /* input_operand should have filtered out 32-bit mode. */
1807 sparc_emit_set_const64 (operands[0], operands[1]);
1808 return true;
1810 case TImode:
1812 rtx high, low;
1813 /* TImode isn't available in 32-bit mode. */
1814 split_double (operands[1], &high, &low);
1815 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1816 high));
1817 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1818 low));
1820 return true;
1822 default:
1823 gcc_unreachable ();
1826 return false;
1829 /* Load OP1, a 32-bit constant, into OP0, a register.
1830 We know it can't be done in one insn when we get
1831 here, the move expander guarantees this. */
1833 static void
1834 sparc_emit_set_const32 (rtx op0, rtx op1)
1836 enum machine_mode mode = GET_MODE (op0);
1837 rtx temp = op0;
1839 if (can_create_pseudo_p ())
1840 temp = gen_reg_rtx (mode);
1842 if (GET_CODE (op1) == CONST_INT)
1844 gcc_assert (!small_int_operand (op1, mode)
1845 && !const_high_operand (op1, mode));
1847 /* Emit them as real moves instead of a HIGH/LO_SUM,
1848 this way CSE can see everything and reuse intermediate
1849 values if it wants. */
1850 emit_insn (gen_rtx_SET (VOIDmode, temp,
1851 GEN_INT (INTVAL (op1)
1852 & ~(HOST_WIDE_INT)0x3ff)));
1854 emit_insn (gen_rtx_SET (VOIDmode,
1855 op0,
1856 gen_rtx_IOR (mode, temp,
1857 GEN_INT (INTVAL (op1) & 0x3ff))));
1859 else
1861 /* A symbol, emit in the traditional way. */
1862 emit_insn (gen_rtx_SET (VOIDmode, temp,
1863 gen_rtx_HIGH (mode, op1)));
1864 emit_insn (gen_rtx_SET (VOIDmode,
1865 op0, gen_rtx_LO_SUM (mode, temp, op1)));
1869 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1870 If TEMP is nonzero, we are forbidden to use any other scratch
1871 registers. Otherwise, we are allowed to generate them as needed.
1873 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1874 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1876 void
1877 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1879 rtx temp1, temp2, temp3, temp4, temp5;
1880 rtx ti_temp = 0;
1882 if (temp && GET_MODE (temp) == TImode)
1884 ti_temp = temp;
1885 temp = gen_rtx_REG (DImode, REGNO (temp));
1888 /* SPARC-V9 code-model support. */
1889 switch (sparc_cmodel)
1891 case CM_MEDLOW:
1892 /* The range spanned by all instructions in the object is less
1893 than 2^31 bytes (2GB) and the distance from any instruction
1894 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1895 than 2^31 bytes (2GB).
1897 The executable must be in the low 4TB of the virtual address
1898 space.
1900 sethi %hi(symbol), %temp1
1901 or %temp1, %lo(symbol), %reg */
1902 if (temp)
1903 temp1 = temp; /* op0 is allowed. */
1904 else
1905 temp1 = gen_reg_rtx (DImode);
1907 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1908 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1909 break;
1911 case CM_MEDMID:
1912 /* The range spanned by all instructions in the object is less
1913 than 2^31 bytes (2GB) and the distance from any instruction
1914 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1915 than 2^31 bytes (2GB).
1917 The executable must be in the low 16TB of the virtual address
1918 space.
1920 sethi %h44(symbol), %temp1
1921 or %temp1, %m44(symbol), %temp2
1922 sllx %temp2, 12, %temp3
1923 or %temp3, %l44(symbol), %reg */
1924 if (temp)
1926 temp1 = op0;
1927 temp2 = op0;
1928 temp3 = temp; /* op0 is allowed. */
1930 else
1932 temp1 = gen_reg_rtx (DImode);
1933 temp2 = gen_reg_rtx (DImode);
1934 temp3 = gen_reg_rtx (DImode);
1937 emit_insn (gen_seth44 (temp1, op1));
1938 emit_insn (gen_setm44 (temp2, temp1, op1));
1939 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1940 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1941 emit_insn (gen_setl44 (op0, temp3, op1));
1942 break;
1944 case CM_MEDANY:
1945 /* The range spanned by all instructions in the object is less
1946 than 2^31 bytes (2GB) and the distance from any instruction
1947 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1948 than 2^31 bytes (2GB).
1950 The executable can be placed anywhere in the virtual address
1951 space.
1953 sethi %hh(symbol), %temp1
1954 sethi %lm(symbol), %temp2
1955 or %temp1, %hm(symbol), %temp3
1956 sllx %temp3, 32, %temp4
1957 or %temp4, %temp2, %temp5
1958 or %temp5, %lo(symbol), %reg */
1959 if (temp)
1961 /* It is possible that one of the registers we got for operands[2]
1962 might coincide with that of operands[0] (which is why we made
1963 it TImode). Pick the other one to use as our scratch. */
1964 if (rtx_equal_p (temp, op0))
1966 gcc_assert (ti_temp);
1967 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1969 temp1 = op0;
1970 temp2 = temp; /* op0 is _not_ allowed, see above. */
1971 temp3 = op0;
1972 temp4 = op0;
1973 temp5 = op0;
1975 else
1977 temp1 = gen_reg_rtx (DImode);
1978 temp2 = gen_reg_rtx (DImode);
1979 temp3 = gen_reg_rtx (DImode);
1980 temp4 = gen_reg_rtx (DImode);
1981 temp5 = gen_reg_rtx (DImode);
1984 emit_insn (gen_sethh (temp1, op1));
1985 emit_insn (gen_setlm (temp2, op1));
1986 emit_insn (gen_sethm (temp3, temp1, op1));
1987 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1988 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1989 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1990 gen_rtx_PLUS (DImode, temp4, temp2)));
1991 emit_insn (gen_setlo (op0, temp5, op1));
1992 break;
1994 case CM_EMBMEDANY:
1995 /* Old old old backwards compatibility kruft here.
1996 Essentially it is MEDLOW with a fixed 64-bit
1997 virtual base added to all data segment addresses.
1998 Text-segment stuff is computed like MEDANY, we can't
1999 reuse the code above because the relocation knobs
2000 look different.
2002 Data segment: sethi %hi(symbol), %temp1
2003 add %temp1, EMBMEDANY_BASE_REG, %temp2
2004 or %temp2, %lo(symbol), %reg */
2005 if (data_segment_operand (op1, GET_MODE (op1)))
2007 if (temp)
2009 temp1 = temp; /* op0 is allowed. */
2010 temp2 = op0;
2012 else
2014 temp1 = gen_reg_rtx (DImode);
2015 temp2 = gen_reg_rtx (DImode);
2018 emit_insn (gen_embmedany_sethi (temp1, op1));
2019 emit_insn (gen_embmedany_brsum (temp2, temp1));
2020 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2023 /* Text segment: sethi %uhi(symbol), %temp1
2024 sethi %hi(symbol), %temp2
2025 or %temp1, %ulo(symbol), %temp3
2026 sllx %temp3, 32, %temp4
2027 or %temp4, %temp2, %temp5
2028 or %temp5, %lo(symbol), %reg */
2029 else
2031 if (temp)
2033 /* It is possible that one of the registers we got for operands[2]
2034 might coincide with that of operands[0] (which is why we made
2035 it TImode). Pick the other one to use as our scratch. */
2036 if (rtx_equal_p (temp, op0))
2038 gcc_assert (ti_temp);
2039 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2041 temp1 = op0;
2042 temp2 = temp; /* op0 is _not_ allowed, see above. */
2043 temp3 = op0;
2044 temp4 = op0;
2045 temp5 = op0;
2047 else
2049 temp1 = gen_reg_rtx (DImode);
2050 temp2 = gen_reg_rtx (DImode);
2051 temp3 = gen_reg_rtx (DImode);
2052 temp4 = gen_reg_rtx (DImode);
2053 temp5 = gen_reg_rtx (DImode);
2056 emit_insn (gen_embmedany_textuhi (temp1, op1));
2057 emit_insn (gen_embmedany_texthi (temp2, op1));
2058 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2059 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2060 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2061 emit_insn (gen_rtx_SET (VOIDmode, temp5,
2062 gen_rtx_PLUS (DImode, temp4, temp2)));
2063 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2065 break;
2067 default:
2068 gcc_unreachable ();
2072 #if HOST_BITS_PER_WIDE_INT == 32
2073 static void
2074 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
2076 gcc_unreachable ();
2078 #else
2079 /* These avoid problems when cross compiling. If we do not
2080 go through all this hair then the optimizer will see
2081 invalid REG_EQUAL notes or in some cases none at all. */
2082 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2083 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2084 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2085 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2087 /* The optimizer is not to assume anything about exactly
2088 which bits are set for a HIGH, they are unspecified.
2089 Unfortunately this leads to many missed optimizations
2090 during CSE. We mask out the non-HIGH bits, and matches
2091 a plain movdi, to alleviate this problem. */
2092 static rtx
2093 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2095 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2098 static rtx
2099 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2101 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
2104 static rtx
2105 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2107 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2110 static rtx
2111 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2113 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2116 /* Worker routines for 64-bit constant formation on arch64.
2117 One of the key things to be doing in these emissions is
2118 to create as many temp REGs as possible. This makes it
2119 possible for half-built constants to be used later when
2120 such values are similar to something required later on.
2121 Without doing this, the optimizer cannot see such
2122 opportunities. */
2124 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2125 unsigned HOST_WIDE_INT, int);
2127 static void
2128 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2129 unsigned HOST_WIDE_INT low_bits, int is_neg)
2131 unsigned HOST_WIDE_INT high_bits;
2133 if (is_neg)
2134 high_bits = (~low_bits) & 0xffffffff;
2135 else
2136 high_bits = low_bits;
2138 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2139 if (!is_neg)
2141 emit_insn (gen_rtx_SET (VOIDmode, op0,
2142 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2144 else
2146 /* If we are XOR'ing with -1, then we should emit a one's complement
2147 instead. This way the combiner will notice logical operations
2148 such as ANDN later on and substitute. */
2149 if ((low_bits & 0x3ff) == 0x3ff)
2151 emit_insn (gen_rtx_SET (VOIDmode, op0,
2152 gen_rtx_NOT (DImode, temp)));
2154 else
2156 emit_insn (gen_rtx_SET (VOIDmode, op0,
2157 gen_safe_XOR64 (temp,
2158 (-(HOST_WIDE_INT)0x400
2159 | (low_bits & 0x3ff)))));
2164 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2165 unsigned HOST_WIDE_INT, int);
2167 static void
2168 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2169 unsigned HOST_WIDE_INT high_bits,
2170 unsigned HOST_WIDE_INT low_immediate,
2171 int shift_count)
2173 rtx temp2 = op0;
2175 if ((high_bits & 0xfffffc00) != 0)
2177 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2178 if ((high_bits & ~0xfffffc00) != 0)
2179 emit_insn (gen_rtx_SET (VOIDmode, op0,
2180 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2181 else
2182 temp2 = temp;
2184 else
2186 emit_insn (gen_safe_SET64 (temp, high_bits));
2187 temp2 = temp;
2190 /* Now shift it up into place. */
2191 emit_insn (gen_rtx_SET (VOIDmode, op0,
2192 gen_rtx_ASHIFT (DImode, temp2,
2193 GEN_INT (shift_count))));
2195 /* If there is a low immediate part piece, finish up by
2196 putting that in as well. */
2197 if (low_immediate != 0)
2198 emit_insn (gen_rtx_SET (VOIDmode, op0,
2199 gen_safe_OR64 (op0, low_immediate)));
2202 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2203 unsigned HOST_WIDE_INT);
2205 /* Full 64-bit constant decomposition. Even though this is the
2206 'worst' case, we still optimize a few things away. */
2207 static void
2208 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2209 unsigned HOST_WIDE_INT high_bits,
2210 unsigned HOST_WIDE_INT low_bits)
2212 rtx sub_temp = op0;
2214 if (can_create_pseudo_p ())
2215 sub_temp = gen_reg_rtx (DImode);
2217 if ((high_bits & 0xfffffc00) != 0)
2219 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2220 if ((high_bits & ~0xfffffc00) != 0)
2221 emit_insn (gen_rtx_SET (VOIDmode,
2222 sub_temp,
2223 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2224 else
2225 sub_temp = temp;
2227 else
2229 emit_insn (gen_safe_SET64 (temp, high_bits));
2230 sub_temp = temp;
2233 if (can_create_pseudo_p ())
2235 rtx temp2 = gen_reg_rtx (DImode);
2236 rtx temp3 = gen_reg_rtx (DImode);
2237 rtx temp4 = gen_reg_rtx (DImode);
2239 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2240 gen_rtx_ASHIFT (DImode, sub_temp,
2241 GEN_INT (32))));
2243 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2244 if ((low_bits & ~0xfffffc00) != 0)
2246 emit_insn (gen_rtx_SET (VOIDmode, temp3,
2247 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2248 emit_insn (gen_rtx_SET (VOIDmode, op0,
2249 gen_rtx_PLUS (DImode, temp4, temp3)));
2251 else
2253 emit_insn (gen_rtx_SET (VOIDmode, op0,
2254 gen_rtx_PLUS (DImode, temp4, temp2)));
2257 else
2259 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2260 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2261 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2262 int to_shift = 12;
2264 /* We are in the middle of reload, so this is really
2265 painful. However we do still make an attempt to
2266 avoid emitting truly stupid code. */
2267 if (low1 != const0_rtx)
2269 emit_insn (gen_rtx_SET (VOIDmode, op0,
2270 gen_rtx_ASHIFT (DImode, sub_temp,
2271 GEN_INT (to_shift))));
2272 emit_insn (gen_rtx_SET (VOIDmode, op0,
2273 gen_rtx_IOR (DImode, op0, low1)));
2274 sub_temp = op0;
2275 to_shift = 12;
2277 else
2279 to_shift += 12;
2281 if (low2 != const0_rtx)
2283 emit_insn (gen_rtx_SET (VOIDmode, op0,
2284 gen_rtx_ASHIFT (DImode, sub_temp,
2285 GEN_INT (to_shift))));
2286 emit_insn (gen_rtx_SET (VOIDmode, op0,
2287 gen_rtx_IOR (DImode, op0, low2)));
2288 sub_temp = op0;
2289 to_shift = 8;
2291 else
2293 to_shift += 8;
2295 emit_insn (gen_rtx_SET (VOIDmode, op0,
2296 gen_rtx_ASHIFT (DImode, sub_temp,
2297 GEN_INT (to_shift))));
2298 if (low3 != const0_rtx)
2299 emit_insn (gen_rtx_SET (VOIDmode, op0,
2300 gen_rtx_IOR (DImode, op0, low3)));
2301 /* phew... */
2305 /* Analyze a 64-bit constant for certain properties. */
2306 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2307 unsigned HOST_WIDE_INT,
2308 int *, int *, int *);
2310 static void
2311 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2312 unsigned HOST_WIDE_INT low_bits,
2313 int *hbsp, int *lbsp, int *abbasp)
2315 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2316 int i;
2318 lowest_bit_set = highest_bit_set = -1;
2319 i = 0;
2322 if ((lowest_bit_set == -1)
2323 && ((low_bits >> i) & 1))
2324 lowest_bit_set = i;
2325 if ((highest_bit_set == -1)
2326 && ((high_bits >> (32 - i - 1)) & 1))
2327 highest_bit_set = (64 - i - 1);
2329 while (++i < 32
2330 && ((highest_bit_set == -1)
2331 || (lowest_bit_set == -1)));
2332 if (i == 32)
2334 i = 0;
2337 if ((lowest_bit_set == -1)
2338 && ((high_bits >> i) & 1))
2339 lowest_bit_set = i + 32;
2340 if ((highest_bit_set == -1)
2341 && ((low_bits >> (32 - i - 1)) & 1))
2342 highest_bit_set = 32 - i - 1;
2344 while (++i < 32
2345 && ((highest_bit_set == -1)
2346 || (lowest_bit_set == -1)));
2348 /* If there are no bits set this should have gone out
2349 as one instruction! */
2350 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2351 all_bits_between_are_set = 1;
2352 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2354 if (i < 32)
2356 if ((low_bits & (1 << i)) != 0)
2357 continue;
2359 else
2361 if ((high_bits & (1 << (i - 32))) != 0)
2362 continue;
2364 all_bits_between_are_set = 0;
2365 break;
2367 *hbsp = highest_bit_set;
2368 *lbsp = lowest_bit_set;
2369 *abbasp = all_bits_between_are_set;
2372 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2374 static int
2375 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2376 unsigned HOST_WIDE_INT low_bits)
2378 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2380 if (high_bits == 0
2381 || high_bits == 0xffffffff)
2382 return 1;
2384 analyze_64bit_constant (high_bits, low_bits,
2385 &highest_bit_set, &lowest_bit_set,
2386 &all_bits_between_are_set);
2388 if ((highest_bit_set == 63
2389 || lowest_bit_set == 0)
2390 && all_bits_between_are_set != 0)
2391 return 1;
2393 if ((highest_bit_set - lowest_bit_set) < 21)
2394 return 1;
2396 return 0;
2399 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2400 unsigned HOST_WIDE_INT,
2401 int, int);
2403 static unsigned HOST_WIDE_INT
2404 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2405 unsigned HOST_WIDE_INT low_bits,
2406 int lowest_bit_set, int shift)
2408 HOST_WIDE_INT hi, lo;
2410 if (lowest_bit_set < 32)
2412 lo = (low_bits >> lowest_bit_set) << shift;
2413 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2415 else
2417 lo = 0;
2418 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2420 gcc_assert (! (hi & lo));
2421 return (hi | lo);
2424 /* Here we are sure to be arch64 and this is an integer constant
2425 being loaded into a register. Emit the most efficient
2426 insn sequence possible. Detection of all the 1-insn cases
2427 has been done already. */
2428 static void
2429 sparc_emit_set_const64 (rtx op0, rtx op1)
2431 unsigned HOST_WIDE_INT high_bits, low_bits;
2432 int lowest_bit_set, highest_bit_set;
2433 int all_bits_between_are_set;
2434 rtx temp = 0;
2436 /* Sanity check that we know what we are working with. */
2437 gcc_assert (TARGET_ARCH64
2438 && (GET_CODE (op0) == SUBREG
2439 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2441 if (! can_create_pseudo_p ())
2442 temp = op0;
2444 if (GET_CODE (op1) != CONST_INT)
2446 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2447 return;
2450 if (! temp)
2451 temp = gen_reg_rtx (DImode);
2453 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2454 low_bits = (INTVAL (op1) & 0xffffffff);
2456 /* low_bits bits 0 --> 31
2457 high_bits bits 32 --> 63 */
2459 analyze_64bit_constant (high_bits, low_bits,
2460 &highest_bit_set, &lowest_bit_set,
2461 &all_bits_between_are_set);
2463 /* First try for a 2-insn sequence. */
2465 /* These situations are preferred because the optimizer can
2466 * do more things with them:
2467 * 1) mov -1, %reg
2468 * sllx %reg, shift, %reg
2469 * 2) mov -1, %reg
2470 * srlx %reg, shift, %reg
2471 * 3) mov some_small_const, %reg
2472 * sllx %reg, shift, %reg
2474 if (((highest_bit_set == 63
2475 || lowest_bit_set == 0)
2476 && all_bits_between_are_set != 0)
2477 || ((highest_bit_set - lowest_bit_set) < 12))
2479 HOST_WIDE_INT the_const = -1;
2480 int shift = lowest_bit_set;
2482 if ((highest_bit_set != 63
2483 && lowest_bit_set != 0)
2484 || all_bits_between_are_set == 0)
2486 the_const =
2487 create_simple_focus_bits (high_bits, low_bits,
2488 lowest_bit_set, 0);
2490 else if (lowest_bit_set == 0)
2491 shift = -(63 - highest_bit_set);
2493 gcc_assert (SPARC_SIMM13_P (the_const));
2494 gcc_assert (shift != 0);
2496 emit_insn (gen_safe_SET64 (temp, the_const));
2497 if (shift > 0)
2498 emit_insn (gen_rtx_SET (VOIDmode,
2499 op0,
2500 gen_rtx_ASHIFT (DImode,
2501 temp,
2502 GEN_INT (shift))));
2503 else if (shift < 0)
2504 emit_insn (gen_rtx_SET (VOIDmode,
2505 op0,
2506 gen_rtx_LSHIFTRT (DImode,
2507 temp,
2508 GEN_INT (-shift))));
2509 return;
2512 /* Now a range of 22 or less bits set somewhere.
2513 * 1) sethi %hi(focus_bits), %reg
2514 * sllx %reg, shift, %reg
2515 * 2) sethi %hi(focus_bits), %reg
2516 * srlx %reg, shift, %reg
2518 if ((highest_bit_set - lowest_bit_set) < 21)
2520 unsigned HOST_WIDE_INT focus_bits =
2521 create_simple_focus_bits (high_bits, low_bits,
2522 lowest_bit_set, 10);
2524 gcc_assert (SPARC_SETHI_P (focus_bits));
2525 gcc_assert (lowest_bit_set != 10);
2527 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2529 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2530 if (lowest_bit_set < 10)
2531 emit_insn (gen_rtx_SET (VOIDmode,
2532 op0,
2533 gen_rtx_LSHIFTRT (DImode, temp,
2534 GEN_INT (10 - lowest_bit_set))));
2535 else if (lowest_bit_set > 10)
2536 emit_insn (gen_rtx_SET (VOIDmode,
2537 op0,
2538 gen_rtx_ASHIFT (DImode, temp,
2539 GEN_INT (lowest_bit_set - 10))));
2540 return;
2543 /* 1) sethi %hi(low_bits), %reg
2544 * or %reg, %lo(low_bits), %reg
2545 * 2) sethi %hi(~low_bits), %reg
2546 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2548 if (high_bits == 0
2549 || high_bits == 0xffffffff)
2551 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2552 (high_bits == 0xffffffff));
2553 return;
2556 /* Now, try 3-insn sequences. */
2558 /* 1) sethi %hi(high_bits), %reg
2559 * or %reg, %lo(high_bits), %reg
2560 * sllx %reg, 32, %reg
2562 if (low_bits == 0)
2564 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2565 return;
2568 /* We may be able to do something quick
2569 when the constant is negated, so try that. */
2570 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2571 (~low_bits) & 0xfffffc00))
2573 /* NOTE: The trailing bits get XOR'd so we need the
2574 non-negated bits, not the negated ones. */
2575 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2577 if ((((~high_bits) & 0xffffffff) == 0
2578 && ((~low_bits) & 0x80000000) == 0)
2579 || (((~high_bits) & 0xffffffff) == 0xffffffff
2580 && ((~low_bits) & 0x80000000) != 0))
2582 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2584 if ((SPARC_SETHI_P (fast_int)
2585 && (~high_bits & 0xffffffff) == 0)
2586 || SPARC_SIMM13_P (fast_int))
2587 emit_insn (gen_safe_SET64 (temp, fast_int));
2588 else
2589 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2591 else
2593 rtx negated_const;
2594 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2595 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2596 sparc_emit_set_const64 (temp, negated_const);
2599 /* If we are XOR'ing with -1, then we should emit a one's complement
2600 instead. This way the combiner will notice logical operations
2601 such as ANDN later on and substitute. */
2602 if (trailing_bits == 0x3ff)
2604 emit_insn (gen_rtx_SET (VOIDmode, op0,
2605 gen_rtx_NOT (DImode, temp)));
2607 else
2609 emit_insn (gen_rtx_SET (VOIDmode,
2610 op0,
2611 gen_safe_XOR64 (temp,
2612 (-0x400 | trailing_bits))));
2614 return;
2617 /* 1) sethi %hi(xxx), %reg
2618 * or %reg, %lo(xxx), %reg
2619 * sllx %reg, yyy, %reg
2621 * ??? This is just a generalized version of the low_bits==0
2622 * thing above, FIXME...
2624 if ((highest_bit_set - lowest_bit_set) < 32)
2626 unsigned HOST_WIDE_INT focus_bits =
2627 create_simple_focus_bits (high_bits, low_bits,
2628 lowest_bit_set, 0);
2630 /* We can't get here in this state. */
2631 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2633 /* So what we know is that the set bits straddle the
2634 middle of the 64-bit word. */
2635 sparc_emit_set_const64_quick2 (op0, temp,
2636 focus_bits, 0,
2637 lowest_bit_set);
2638 return;
2641 /* 1) sethi %hi(high_bits), %reg
2642 * or %reg, %lo(high_bits), %reg
2643 * sllx %reg, 32, %reg
2644 * or %reg, low_bits, %reg
2646 if (SPARC_SIMM13_P(low_bits)
2647 && ((int)low_bits > 0))
2649 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2650 return;
2653 /* The easiest way when all else fails, is full decomposition. */
2654 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2656 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
2658 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2659 return the mode to be used for the comparison. For floating-point,
2660 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
2661 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2662 processing is needed. */
2664 enum machine_mode
2665 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2667 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2669 switch (op)
2671 case EQ:
2672 case NE:
2673 case UNORDERED:
2674 case ORDERED:
2675 case UNLT:
2676 case UNLE:
2677 case UNGT:
2678 case UNGE:
2679 case UNEQ:
2680 case LTGT:
2681 return CCFPmode;
2683 case LT:
2684 case LE:
2685 case GT:
2686 case GE:
2687 return CCFPEmode;
2689 default:
2690 gcc_unreachable ();
2693 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2694 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2696 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2697 return CCX_NOOVmode;
2698 else
2699 return CC_NOOVmode;
2701 else
2703 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2704 return CCXmode;
2705 else
2706 return CCmode;
2710 /* Emit the compare insn and return the CC reg for a CODE comparison
2711 with operands X and Y. */
2713 static rtx
2714 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2716 enum machine_mode mode;
2717 rtx cc_reg;
2719 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2720 return x;
2722 mode = SELECT_CC_MODE (code, x, y);
2724 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2725 fcc regs (cse can't tell they're really call clobbered regs and will
2726 remove a duplicate comparison even if there is an intervening function
2727 call - it will then try to reload the cc reg via an int reg which is why
2728 we need the movcc patterns). It is possible to provide the movcc
2729 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2730 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2731 to tell cse that CCFPE mode registers (even pseudos) are call
2732 clobbered. */
2734 /* ??? This is an experiment. Rather than making changes to cse which may
2735 or may not be easy/clean, we do our own cse. This is possible because
2736 we will generate hard registers. Cse knows they're call clobbered (it
2737 doesn't know the same thing about pseudos). If we guess wrong, no big
2738 deal, but if we win, great! */
2740 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2741 #if 1 /* experiment */
2743 int reg;
2744 /* We cycle through the registers to ensure they're all exercised. */
2745 static int next_fcc_reg = 0;
2746 /* Previous x,y for each fcc reg. */
2747 static rtx prev_args[4][2];
2749 /* Scan prev_args for x,y. */
2750 for (reg = 0; reg < 4; reg++)
2751 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2752 break;
2753 if (reg == 4)
2755 reg = next_fcc_reg;
2756 prev_args[reg][0] = x;
2757 prev_args[reg][1] = y;
2758 next_fcc_reg = (next_fcc_reg + 1) & 3;
2760 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2762 #else
2763 cc_reg = gen_reg_rtx (mode);
2764 #endif /* ! experiment */
2765 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2766 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2767 else
2768 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2770 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2771 will only result in an unrecognizable insn so no point in asserting. */
2772 emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2774 return cc_reg;
2778 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2781 gen_compare_reg (rtx cmp)
2783 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2786 /* This function is used for v9 only.
2787 DEST is the target of the Scc insn.
2788 CODE is the code for an Scc's comparison.
2789 X and Y are the values we compare.
2791 This function is needed to turn
2793 (set (reg:SI 110)
2794 (gt (reg:CCX 100 %icc)
2795 (const_int 0)))
2796 into
2797 (set (reg:SI 110)
2798 (gt:DI (reg:CCX 100 %icc)
2799 (const_int 0)))
2801 IE: The instruction recognizer needs to see the mode of the comparison to
2802 find the right instruction. We could use "gt:DI" right in the
2803 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2805 static int
2806 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2808 if (! TARGET_ARCH64
2809 && (GET_MODE (x) == DImode
2810 || GET_MODE (dest) == DImode))
2811 return 0;
2813 /* Try to use the movrCC insns. */
2814 if (TARGET_ARCH64
2815 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2816 && y == const0_rtx
2817 && v9_regcmp_p (compare_code))
2819 rtx op0 = x;
2820 rtx temp;
2822 /* Special case for op0 != 0. This can be done with one instruction if
2823 dest == x. */
2825 if (compare_code == NE
2826 && GET_MODE (dest) == DImode
2827 && rtx_equal_p (op0, dest))
2829 emit_insn (gen_rtx_SET (VOIDmode, dest,
2830 gen_rtx_IF_THEN_ELSE (DImode,
2831 gen_rtx_fmt_ee (compare_code, DImode,
2832 op0, const0_rtx),
2833 const1_rtx,
2834 dest)));
2835 return 1;
2838 if (reg_overlap_mentioned_p (dest, op0))
2840 /* Handle the case where dest == x.
2841 We "early clobber" the result. */
2842 op0 = gen_reg_rtx (GET_MODE (x));
2843 emit_move_insn (op0, x);
2846 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2847 if (GET_MODE (op0) != DImode)
2849 temp = gen_reg_rtx (DImode);
2850 convert_move (temp, op0, 0);
2852 else
2853 temp = op0;
2854 emit_insn (gen_rtx_SET (VOIDmode, dest,
2855 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2856 gen_rtx_fmt_ee (compare_code, DImode,
2857 temp, const0_rtx),
2858 const1_rtx,
2859 dest)));
2860 return 1;
2862 else
2864 x = gen_compare_reg_1 (compare_code, x, y);
2865 y = const0_rtx;
2867 gcc_assert (GET_MODE (x) != CC_NOOVmode
2868 && GET_MODE (x) != CCX_NOOVmode);
2870 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2871 emit_insn (gen_rtx_SET (VOIDmode, dest,
2872 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2873 gen_rtx_fmt_ee (compare_code,
2874 GET_MODE (x), x, y),
2875 const1_rtx, dest)));
2876 return 1;
2881 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2882 without jumps using the addx/subx instructions. */
2884 bool
2885 emit_scc_insn (rtx operands[])
2887 rtx tem;
2888 rtx x;
2889 rtx y;
2890 enum rtx_code code;
2892 /* The quad-word fp compare library routines all return nonzero to indicate
2893 true, which is different from the equivalent libgcc routines, so we must
2894 handle them specially here. */
2895 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2897 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2898 GET_CODE (operands[1]));
2899 operands[2] = XEXP (operands[1], 0);
2900 operands[3] = XEXP (operands[1], 1);
2903 code = GET_CODE (operands[1]);
2904 x = operands[2];
2905 y = operands[3];
2907 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2908 more applications). The exception to this is "reg != 0" which can
2909 be done in one instruction on v9 (so we do it). */
2910 if (code == EQ)
2912 if (GET_MODE (x) == SImode)
2914 rtx pat = gen_seqsi_special (operands[0], x, y);
2915 emit_insn (pat);
2916 return true;
2918 else if (GET_MODE (x) == DImode)
2920 rtx pat = gen_seqdi_special (operands[0], x, y);
2921 emit_insn (pat);
2922 return true;
2926 if (code == NE)
2928 if (GET_MODE (x) == SImode)
2930 rtx pat = gen_snesi_special (operands[0], x, y);
2931 emit_insn (pat);
2932 return true;
2934 else if (GET_MODE (x) == DImode)
2936 rtx pat;
2937 if (TARGET_VIS3)
2938 pat = gen_snedi_special_vis3 (operands[0], x, y);
2939 else
2940 pat = gen_snedi_special (operands[0], x, y);
2941 emit_insn (pat);
2942 return true;
2946 if (TARGET_V9
2947 && TARGET_ARCH64
2948 && GET_MODE (x) == DImode
2949 && !(TARGET_VIS3
2950 && (code == GTU || code == LTU))
2951 && gen_v9_scc (operands[0], code, x, y))
2952 return true;
2954 /* We can do LTU and GEU using the addx/subx instructions too. And
2955 for GTU/LEU, if both operands are registers swap them and fall
2956 back to the easy case. */
2957 if (code == GTU || code == LEU)
2959 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
2960 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
2962 tem = x;
2963 x = y;
2964 y = tem;
2965 code = swap_condition (code);
2969 if (code == LTU
2970 || (!TARGET_VIS3 && code == GEU))
2972 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2973 gen_rtx_fmt_ee (code, SImode,
2974 gen_compare_reg_1 (code, x, y),
2975 const0_rtx)));
2976 return true;
2979 /* All the posibilities to use addx/subx based sequences has been
2980 exhausted, try for a 3 instruction sequence using v9 conditional
2981 moves. */
2982 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
2983 return true;
2985 /* Nope, do branches. */
2986 return false;
2989 /* Emit a conditional jump insn for the v9 architecture using comparison code
2990 CODE and jump target LABEL.
2991 This function exists to take advantage of the v9 brxx insns. */
2993 static void
2994 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
2996 emit_jump_insn (gen_rtx_SET (VOIDmode,
2997 pc_rtx,
2998 gen_rtx_IF_THEN_ELSE (VOIDmode,
2999 gen_rtx_fmt_ee (code, GET_MODE (op0),
3000 op0, const0_rtx),
3001 gen_rtx_LABEL_REF (VOIDmode, label),
3002 pc_rtx)));
3005 /* Emit a conditional jump insn for the UA2011 architecture using
3006 comparison code CODE and jump target LABEL. This function exists
3007 to take advantage of the UA2011 Compare and Branch insns. */
3009 static void
3010 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3012 rtx if_then_else;
3014 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3015 gen_rtx_fmt_ee(code, GET_MODE(op0),
3016 op0, op1),
3017 gen_rtx_LABEL_REF (VOIDmode, label),
3018 pc_rtx);
3020 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, if_then_else));
3023 void
3024 emit_conditional_branch_insn (rtx operands[])
3026 /* The quad-word fp compare library routines all return nonzero to indicate
3027 true, which is different from the equivalent libgcc routines, so we must
3028 handle them specially here. */
3029 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3031 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3032 GET_CODE (operands[0]));
3033 operands[1] = XEXP (operands[0], 0);
3034 operands[2] = XEXP (operands[0], 1);
3037 /* If we can tell early on that the comparison is against a constant
3038 that won't fit in the 5-bit signed immediate field of a cbcond,
3039 use one of the other v9 conditional branch sequences. */
3040 if (TARGET_CBCOND
3041 && GET_CODE (operands[1]) == REG
3042 && (GET_MODE (operands[1]) == SImode
3043 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3044 && (GET_CODE (operands[2]) != CONST_INT
3045 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3047 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3048 return;
3051 if (TARGET_ARCH64 && operands[2] == const0_rtx
3052 && GET_CODE (operands[1]) == REG
3053 && GET_MODE (operands[1]) == DImode)
3055 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3056 return;
3059 operands[1] = gen_compare_reg (operands[0]);
3060 operands[2] = const0_rtx;
3061 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3062 operands[1], operands[2]);
3063 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3064 operands[3]));
3068 /* Generate a DFmode part of a hard TFmode register.
3069 REG is the TFmode hard register, LOW is 1 for the
3070 low 64bit of the register and 0 otherwise.
3073 gen_df_reg (rtx reg, int low)
3075 int regno = REGNO (reg);
3077 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3078 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3079 return gen_rtx_REG (DFmode, regno);
3082 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3083 Unlike normal calls, TFmode operands are passed by reference. It is
3084 assumed that no more than 3 operands are required. */
3086 static void
3087 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3089 rtx ret_slot = NULL, arg[3], func_sym;
3090 int i;
3092 /* We only expect to be called for conversions, unary, and binary ops. */
3093 gcc_assert (nargs == 2 || nargs == 3);
3095 for (i = 0; i < nargs; ++i)
3097 rtx this_arg = operands[i];
3098 rtx this_slot;
3100 /* TFmode arguments and return values are passed by reference. */
3101 if (GET_MODE (this_arg) == TFmode)
3103 int force_stack_temp;
3105 force_stack_temp = 0;
3106 if (TARGET_BUGGY_QP_LIB && i == 0)
3107 force_stack_temp = 1;
3109 if (GET_CODE (this_arg) == MEM
3110 && ! force_stack_temp)
3112 tree expr = MEM_EXPR (this_arg);
3113 if (expr)
3114 mark_addressable (expr);
3115 this_arg = XEXP (this_arg, 0);
3117 else if (CONSTANT_P (this_arg)
3118 && ! force_stack_temp)
3120 this_slot = force_const_mem (TFmode, this_arg);
3121 this_arg = XEXP (this_slot, 0);
3123 else
3125 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3127 /* Operand 0 is the return value. We'll copy it out later. */
3128 if (i > 0)
3129 emit_move_insn (this_slot, this_arg);
3130 else
3131 ret_slot = this_slot;
3133 this_arg = XEXP (this_slot, 0);
3137 arg[i] = this_arg;
3140 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3142 if (GET_MODE (operands[0]) == TFmode)
3144 if (nargs == 2)
3145 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3146 arg[0], GET_MODE (arg[0]),
3147 arg[1], GET_MODE (arg[1]));
3148 else
3149 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3150 arg[0], GET_MODE (arg[0]),
3151 arg[1], GET_MODE (arg[1]),
3152 arg[2], GET_MODE (arg[2]));
3154 if (ret_slot)
3155 emit_move_insn (operands[0], ret_slot);
3157 else
3159 rtx ret;
3161 gcc_assert (nargs == 2);
3163 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3164 GET_MODE (operands[0]), 1,
3165 arg[1], GET_MODE (arg[1]));
3167 if (ret != operands[0])
3168 emit_move_insn (operands[0], ret);
3172 /* Expand soft-float TFmode calls to sparc abi routines. */
3174 static void
3175 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3177 const char *func;
3179 switch (code)
3181 case PLUS:
3182 func = "_Qp_add";
3183 break;
3184 case MINUS:
3185 func = "_Qp_sub";
3186 break;
3187 case MULT:
3188 func = "_Qp_mul";
3189 break;
3190 case DIV:
3191 func = "_Qp_div";
3192 break;
3193 default:
3194 gcc_unreachable ();
3197 emit_soft_tfmode_libcall (func, 3, operands);
3200 static void
3201 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3203 const char *func;
3205 gcc_assert (code == SQRT);
3206 func = "_Qp_sqrt";
3208 emit_soft_tfmode_libcall (func, 2, operands);
3211 static void
3212 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3214 const char *func;
3216 switch (code)
3218 case FLOAT_EXTEND:
3219 switch (GET_MODE (operands[1]))
3221 case SFmode:
3222 func = "_Qp_stoq";
3223 break;
3224 case DFmode:
3225 func = "_Qp_dtoq";
3226 break;
3227 default:
3228 gcc_unreachable ();
3230 break;
3232 case FLOAT_TRUNCATE:
3233 switch (GET_MODE (operands[0]))
3235 case SFmode:
3236 func = "_Qp_qtos";
3237 break;
3238 case DFmode:
3239 func = "_Qp_qtod";
3240 break;
3241 default:
3242 gcc_unreachable ();
3244 break;
3246 case FLOAT:
3247 switch (GET_MODE (operands[1]))
3249 case SImode:
3250 func = "_Qp_itoq";
3251 if (TARGET_ARCH64)
3252 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3253 break;
3254 case DImode:
3255 func = "_Qp_xtoq";
3256 break;
3257 default:
3258 gcc_unreachable ();
3260 break;
3262 case UNSIGNED_FLOAT:
3263 switch (GET_MODE (operands[1]))
3265 case SImode:
3266 func = "_Qp_uitoq";
3267 if (TARGET_ARCH64)
3268 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3269 break;
3270 case DImode:
3271 func = "_Qp_uxtoq";
3272 break;
3273 default:
3274 gcc_unreachable ();
3276 break;
3278 case FIX:
3279 switch (GET_MODE (operands[0]))
3281 case SImode:
3282 func = "_Qp_qtoi";
3283 break;
3284 case DImode:
3285 func = "_Qp_qtox";
3286 break;
3287 default:
3288 gcc_unreachable ();
3290 break;
3292 case UNSIGNED_FIX:
3293 switch (GET_MODE (operands[0]))
3295 case SImode:
3296 func = "_Qp_qtoui";
3297 break;
3298 case DImode:
3299 func = "_Qp_qtoux";
3300 break;
3301 default:
3302 gcc_unreachable ();
3304 break;
3306 default:
3307 gcc_unreachable ();
3310 emit_soft_tfmode_libcall (func, 2, operands);
3313 /* Expand a hard-float tfmode operation. All arguments must be in
3314 registers. */
3316 static void
3317 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3319 rtx op, dest;
3321 if (GET_RTX_CLASS (code) == RTX_UNARY)
3323 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3324 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3326 else
3328 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3329 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3330 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3331 operands[1], operands[2]);
3334 if (register_operand (operands[0], VOIDmode))
3335 dest = operands[0];
3336 else
3337 dest = gen_reg_rtx (GET_MODE (operands[0]));
3339 emit_insn (gen_rtx_SET (VOIDmode, dest, op));
3341 if (dest != operands[0])
3342 emit_move_insn (operands[0], dest);
3345 void
3346 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3348 if (TARGET_HARD_QUAD)
3349 emit_hard_tfmode_operation (code, operands);
3350 else
3351 emit_soft_tfmode_binop (code, operands);
3354 void
3355 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3357 if (TARGET_HARD_QUAD)
3358 emit_hard_tfmode_operation (code, operands);
3359 else
3360 emit_soft_tfmode_unop (code, operands);
3363 void
3364 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3366 if (TARGET_HARD_QUAD)
3367 emit_hard_tfmode_operation (code, operands);
3368 else
3369 emit_soft_tfmode_cvt (code, operands);
3372 /* Return nonzero if a branch/jump/call instruction will be emitting
3373 nop into its delay slot. */
3376 empty_delay_slot (rtx insn)
3378 rtx seq;
3380 /* If no previous instruction (should not happen), return true. */
3381 if (PREV_INSN (insn) == NULL)
3382 return 1;
3384 seq = NEXT_INSN (PREV_INSN (insn));
3385 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3386 return 0;
3388 return 1;
3391 /* Return nonzero if we should emit a nop after a cbcond instruction.
3392 The cbcond instruction does not have a delay slot, however there is
3393 a severe performance penalty if a control transfer appears right
3394 after a cbcond. Therefore we emit a nop when we detect this
3395 situation. */
3398 emit_cbcond_nop (rtx insn)
3400 rtx next = next_active_insn (insn);
3402 if (!next)
3403 return 1;
3405 if (GET_CODE (next) == INSN
3406 && GET_CODE (PATTERN (next)) == SEQUENCE)
3407 next = XVECEXP (PATTERN (next), 0, 0);
3408 else if (GET_CODE (next) == CALL_INSN
3409 && GET_CODE (PATTERN (next)) == PARALLEL)
3411 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3413 if (GET_CODE (delay) == RETURN)
3415 /* It's a sibling call. Do not emit the nop if we're going
3416 to emit something other than the jump itself as the first
3417 instruction of the sibcall sequence. */
3418 if (sparc_leaf_function_p || TARGET_FLAT)
3419 return 0;
3423 if (NONJUMP_INSN_P (next))
3424 return 0;
3426 return 1;
3429 /* Return nonzero if TRIAL can go into the call delay slot. */
3432 eligible_for_call_delay (rtx trial)
3434 rtx pat;
3436 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3437 return 0;
3439 /* Binutils allows
3440 call __tls_get_addr, %tgd_call (foo)
3441 add %l7, %o0, %o0, %tgd_add (foo)
3442 while Sun as/ld does not. */
3443 if (TARGET_GNU_TLS || !TARGET_TLS)
3444 return 1;
3446 pat = PATTERN (trial);
3448 /* We must reject tgd_add{32|64}, i.e.
3449 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3450 and tldm_add{32|64}, i.e.
3451 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3452 for Sun as/ld. */
3453 if (GET_CODE (pat) == SET
3454 && GET_CODE (SET_SRC (pat)) == PLUS)
3456 rtx unspec = XEXP (SET_SRC (pat), 1);
3458 if (GET_CODE (unspec) == UNSPEC
3459 && (XINT (unspec, 1) == UNSPEC_TLSGD
3460 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3461 return 0;
3464 return 1;
3467 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3468 instruction. RETURN_P is true if the v9 variant 'return' is to be
3469 considered in the test too.
3471 TRIAL must be a SET whose destination is a REG appropriate for the
3472 'restore' instruction or, if RETURN_P is true, for the 'return'
3473 instruction. */
3475 static int
3476 eligible_for_restore_insn (rtx trial, bool return_p)
3478 rtx pat = PATTERN (trial);
3479 rtx src = SET_SRC (pat);
3480 bool src_is_freg = false;
3481 rtx src_reg;
3483 /* Since we now can do moves between float and integer registers when
3484 VIS3 is enabled, we have to catch this case. We can allow such
3485 moves when doing a 'return' however. */
3486 src_reg = src;
3487 if (GET_CODE (src_reg) == SUBREG)
3488 src_reg = SUBREG_REG (src_reg);
3489 if (GET_CODE (src_reg) == REG
3490 && SPARC_FP_REG_P (REGNO (src_reg)))
3491 src_is_freg = true;
3493 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3494 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3495 && arith_operand (src, GET_MODE (src))
3496 && ! src_is_freg)
3498 if (TARGET_ARCH64)
3499 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3500 else
3501 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3504 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3505 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3506 && arith_double_operand (src, GET_MODE (src))
3507 && ! src_is_freg)
3508 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3510 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3511 else if (! TARGET_FPU && register_operand (src, SFmode))
3512 return 1;
3514 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3515 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3516 return 1;
3518 /* If we have the 'return' instruction, anything that does not use
3519 local or output registers and can go into a delay slot wins. */
3520 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3521 return 1;
3523 /* The 'restore src1,src2,dest' pattern for SImode. */
3524 else if (GET_CODE (src) == PLUS
3525 && register_operand (XEXP (src, 0), SImode)
3526 && arith_operand (XEXP (src, 1), SImode))
3527 return 1;
3529 /* The 'restore src1,src2,dest' pattern for DImode. */
3530 else if (GET_CODE (src) == PLUS
3531 && register_operand (XEXP (src, 0), DImode)
3532 && arith_double_operand (XEXP (src, 1), DImode))
3533 return 1;
3535 /* The 'restore src1,%lo(src2),dest' pattern. */
3536 else if (GET_CODE (src) == LO_SUM
3537 && ! TARGET_CM_MEDMID
3538 && ((register_operand (XEXP (src, 0), SImode)
3539 && immediate_operand (XEXP (src, 1), SImode))
3540 || (TARGET_ARCH64
3541 && register_operand (XEXP (src, 0), DImode)
3542 && immediate_operand (XEXP (src, 1), DImode))))
3543 return 1;
3545 /* The 'restore src,src,dest' pattern. */
3546 else if (GET_CODE (src) == ASHIFT
3547 && (register_operand (XEXP (src, 0), SImode)
3548 || register_operand (XEXP (src, 0), DImode))
3549 && XEXP (src, 1) == const1_rtx)
3550 return 1;
3552 return 0;
3555 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3558 eligible_for_return_delay (rtx trial)
3560 int regno;
3561 rtx pat;
3563 /* If the function uses __builtin_eh_return, the eh_return machinery
3564 occupies the delay slot. */
3565 if (crtl->calls_eh_return)
3566 return 0;
3568 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3569 return 0;
3571 /* In the case of a leaf or flat function, anything can go into the slot. */
3572 if (sparc_leaf_function_p || TARGET_FLAT)
3573 return 1;
3575 if (!NONJUMP_INSN_P (trial))
3576 return 0;
3578 pat = PATTERN (trial);
3579 if (GET_CODE (pat) == PARALLEL)
3581 int i;
3583 if (! TARGET_V9)
3584 return 0;
3585 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3587 rtx expr = XVECEXP (pat, 0, i);
3588 if (GET_CODE (expr) != SET)
3589 return 0;
3590 if (GET_CODE (SET_DEST (expr)) != REG)
3591 return 0;
3592 regno = REGNO (SET_DEST (expr));
3593 if (regno >= 8 && regno < 24)
3594 return 0;
3596 return !epilogue_renumber (&pat, 1);
3599 if (GET_CODE (pat) != SET)
3600 return 0;
3602 if (GET_CODE (SET_DEST (pat)) != REG)
3603 return 0;
3605 regno = REGNO (SET_DEST (pat));
3607 /* Otherwise, only operations which can be done in tandem with
3608 a `restore' or `return' insn can go into the delay slot. */
3609 if (regno >= 8 && regno < 24)
3610 return 0;
3612 /* If this instruction sets up floating point register and we have a return
3613 instruction, it can probably go in. But restore will not work
3614 with FP_REGS. */
3615 if (! SPARC_INT_REG_P (regno))
3616 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3618 return eligible_for_restore_insn (trial, true);
3621 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3624 eligible_for_sibcall_delay (rtx trial)
3626 rtx pat;
3628 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3629 return 0;
3631 if (!NONJUMP_INSN_P (trial))
3632 return 0;
3634 pat = PATTERN (trial);
3636 if (sparc_leaf_function_p || TARGET_FLAT)
3638 /* If the tail call is done using the call instruction,
3639 we have to restore %o7 in the delay slot. */
3640 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3641 return 0;
3643 /* %g1 is used to build the function address */
3644 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3645 return 0;
3647 return 1;
3650 if (GET_CODE (pat) != SET)
3651 return 0;
3653 /* Otherwise, only operations which can be done in tandem with
3654 a `restore' insn can go into the delay slot. */
3655 if (GET_CODE (SET_DEST (pat)) != REG
3656 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3657 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3658 return 0;
3660 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3661 in most cases. */
3662 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3663 return 0;
3665 return eligible_for_restore_insn (trial, false);
3668 /* Determine if it's legal to put X into the constant pool. This
3669 is not possible if X contains the address of a symbol that is
3670 not constant (TLS) or not known at final link time (PIC). */
3672 static bool
3673 sparc_cannot_force_const_mem (enum machine_mode mode, rtx x)
3675 switch (GET_CODE (x))
3677 case CONST_INT:
3678 case CONST_DOUBLE:
3679 case CONST_VECTOR:
3680 /* Accept all non-symbolic constants. */
3681 return false;
3683 case LABEL_REF:
3684 /* Labels are OK iff we are non-PIC. */
3685 return flag_pic != 0;
3687 case SYMBOL_REF:
3688 /* 'Naked' TLS symbol references are never OK,
3689 non-TLS symbols are OK iff we are non-PIC. */
3690 if (SYMBOL_REF_TLS_MODEL (x))
3691 return true;
3692 else
3693 return flag_pic != 0;
3695 case CONST:
3696 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3697 case PLUS:
3698 case MINUS:
3699 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3700 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3701 case UNSPEC:
3702 return true;
3703 default:
3704 gcc_unreachable ();
3708 /* Global Offset Table support. */
3709 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3710 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3712 /* Return the SYMBOL_REF for the Global Offset Table. */
3714 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3716 static rtx
3717 sparc_got (void)
3719 if (!sparc_got_symbol)
3720 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3722 return sparc_got_symbol;
3725 /* Ensure that we are not using patterns that are not OK with PIC. */
3728 check_pic (int i)
3730 rtx op;
3732 switch (flag_pic)
3734 case 1:
3735 op = recog_data.operand[i];
3736 gcc_assert (GET_CODE (op) != SYMBOL_REF
3737 && (GET_CODE (op) != CONST
3738 || (GET_CODE (XEXP (op, 0)) == MINUS
3739 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3740 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3741 case 2:
3742 default:
3743 return 1;
3747 /* Return true if X is an address which needs a temporary register when
3748 reloaded while generating PIC code. */
3751 pic_address_needs_scratch (rtx x)
3753 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3754 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3755 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3756 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3757 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3758 return 1;
3760 return 0;
3763 /* Determine if a given RTX is a valid constant. We already know this
3764 satisfies CONSTANT_P. */
3766 static bool
3767 sparc_legitimate_constant_p (enum machine_mode mode, rtx x)
3769 switch (GET_CODE (x))
3771 case CONST:
3772 case SYMBOL_REF:
3773 if (sparc_tls_referenced_p (x))
3774 return false;
3775 break;
3777 case CONST_DOUBLE:
3778 if (GET_MODE (x) == VOIDmode)
3779 return true;
3781 /* Floating point constants are generally not ok.
3782 The only exception is 0.0 and all-ones in VIS. */
3783 if (TARGET_VIS
3784 && SCALAR_FLOAT_MODE_P (mode)
3785 && (const_zero_operand (x, mode)
3786 || const_all_ones_operand (x, mode)))
3787 return true;
3789 return false;
3791 case CONST_VECTOR:
3792 /* Vector constants are generally not ok.
3793 The only exception is 0 or -1 in VIS. */
3794 if (TARGET_VIS
3795 && (const_zero_operand (x, mode)
3796 || const_all_ones_operand (x, mode)))
3797 return true;
3799 return false;
3801 default:
3802 break;
3805 return true;
3808 /* Determine if a given RTX is a valid constant address. */
3810 bool
3811 constant_address_p (rtx x)
3813 switch (GET_CODE (x))
3815 case LABEL_REF:
3816 case CONST_INT:
3817 case HIGH:
3818 return true;
3820 case CONST:
3821 if (flag_pic && pic_address_needs_scratch (x))
3822 return false;
3823 return sparc_legitimate_constant_p (Pmode, x);
3825 case SYMBOL_REF:
3826 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3828 default:
3829 return false;
3833 /* Nonzero if the constant value X is a legitimate general operand
3834 when generating PIC code. It is given that flag_pic is on and
3835 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
3837 bool
3838 legitimate_pic_operand_p (rtx x)
3840 if (pic_address_needs_scratch (x))
3841 return false;
3842 if (sparc_tls_referenced_p (x))
3843 return false;
3844 return true;
3847 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3848 (CONST_INT_P (X) \
3849 && INTVAL (X) >= -0x1000 \
3850 && INTVAL (X) < (0x1000 - GET_MODE_SIZE (MODE)))
3852 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3853 (CONST_INT_P (X) \
3854 && INTVAL (X) >= -0x1000 \
3855 && INTVAL (X) < (0xc00 - GET_MODE_SIZE (MODE)))
3857 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3859 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3860 ordinarily. This changes a bit when generating PIC. */
3862 static bool
3863 sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3865 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3867 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3868 rs1 = addr;
3869 else if (GET_CODE (addr) == PLUS)
3871 rs1 = XEXP (addr, 0);
3872 rs2 = XEXP (addr, 1);
3874 /* Canonicalize. REG comes first, if there are no regs,
3875 LO_SUM comes first. */
3876 if (!REG_P (rs1)
3877 && GET_CODE (rs1) != SUBREG
3878 && (REG_P (rs2)
3879 || GET_CODE (rs2) == SUBREG
3880 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3882 rs1 = XEXP (addr, 1);
3883 rs2 = XEXP (addr, 0);
3886 if ((flag_pic == 1
3887 && rs1 == pic_offset_table_rtx
3888 && !REG_P (rs2)
3889 && GET_CODE (rs2) != SUBREG
3890 && GET_CODE (rs2) != LO_SUM
3891 && GET_CODE (rs2) != MEM
3892 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3893 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3894 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3895 || ((REG_P (rs1)
3896 || GET_CODE (rs1) == SUBREG)
3897 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3899 imm1 = rs2;
3900 rs2 = NULL;
3902 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3903 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3905 /* We prohibit REG + REG for TFmode when there are no quad move insns
3906 and we consequently need to split. We do this because REG+REG
3907 is not an offsettable address. If we get the situation in reload
3908 where source and destination of a movtf pattern are both MEMs with
3909 REG+REG address, then only one of them gets converted to an
3910 offsettable address. */
3911 if (mode == TFmode
3912 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
3913 return 0;
3915 /* Likewise for TImode, but in all cases. */
3916 if (mode == TImode)
3917 return 0;
3919 /* We prohibit REG + REG on ARCH32 if not optimizing for
3920 DFmode/DImode because then mem_min_alignment is likely to be zero
3921 after reload and the forced split would lack a matching splitter
3922 pattern. */
3923 if (TARGET_ARCH32 && !optimize
3924 && (mode == DFmode || mode == DImode))
3925 return 0;
3927 else if (USE_AS_OFFSETABLE_LO10
3928 && GET_CODE (rs1) == LO_SUM
3929 && TARGET_ARCH64
3930 && ! TARGET_CM_MEDMID
3931 && RTX_OK_FOR_OLO10_P (rs2, mode))
3933 rs2 = NULL;
3934 imm1 = XEXP (rs1, 1);
3935 rs1 = XEXP (rs1, 0);
3936 if (!CONSTANT_P (imm1)
3937 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3938 return 0;
3941 else if (GET_CODE (addr) == LO_SUM)
3943 rs1 = XEXP (addr, 0);
3944 imm1 = XEXP (addr, 1);
3946 if (!CONSTANT_P (imm1)
3947 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3948 return 0;
3950 /* We can't allow TFmode in 32-bit mode, because an offset greater
3951 than the alignment (8) may cause the LO_SUM to overflow. */
3952 if (mode == TFmode && TARGET_ARCH32)
3953 return 0;
3955 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
3956 return 1;
3957 else
3958 return 0;
3960 if (GET_CODE (rs1) == SUBREG)
3961 rs1 = SUBREG_REG (rs1);
3962 if (!REG_P (rs1))
3963 return 0;
3965 if (rs2)
3967 if (GET_CODE (rs2) == SUBREG)
3968 rs2 = SUBREG_REG (rs2);
3969 if (!REG_P (rs2))
3970 return 0;
3973 if (strict)
3975 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
3976 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
3977 return 0;
3979 else
3981 if ((! SPARC_INT_REG_P (REGNO (rs1))
3982 && REGNO (rs1) != FRAME_POINTER_REGNUM
3983 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
3984 || (rs2
3985 && (! SPARC_INT_REG_P (REGNO (rs2))
3986 && REGNO (rs2) != FRAME_POINTER_REGNUM
3987 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
3988 return 0;
3990 return 1;
3993 /* Return the SYMBOL_REF for the tls_get_addr function. */
3995 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
3997 static rtx
3998 sparc_tls_get_addr (void)
4000 if (!sparc_tls_symbol)
4001 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4003 return sparc_tls_symbol;
4006 /* Return the Global Offset Table to be used in TLS mode. */
4008 static rtx
4009 sparc_tls_got (void)
4011 /* In PIC mode, this is just the PIC offset table. */
4012 if (flag_pic)
4014 crtl->uses_pic_offset_table = 1;
4015 return pic_offset_table_rtx;
4018 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4019 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4020 if (TARGET_SUN_TLS && TARGET_ARCH32)
4022 load_got_register ();
4023 return global_offset_table_rtx;
4026 /* In all other cases, we load a new pseudo with the GOT symbol. */
4027 return copy_to_reg (sparc_got ());
4030 /* Return true if X contains a thread-local symbol. */
4032 static bool
4033 sparc_tls_referenced_p (rtx x)
4035 if (!TARGET_HAVE_TLS)
4036 return false;
4038 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4039 x = XEXP (XEXP (x, 0), 0);
4041 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4042 return true;
4044 /* That's all we handle in sparc_legitimize_tls_address for now. */
4045 return false;
4048 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4049 this (thread-local) address. */
4051 static rtx
4052 sparc_legitimize_tls_address (rtx addr)
4054 rtx temp1, temp2, temp3, ret, o0, got, insn;
4056 gcc_assert (can_create_pseudo_p ());
4058 if (GET_CODE (addr) == SYMBOL_REF)
4059 switch (SYMBOL_REF_TLS_MODEL (addr))
4061 case TLS_MODEL_GLOBAL_DYNAMIC:
4062 start_sequence ();
4063 temp1 = gen_reg_rtx (SImode);
4064 temp2 = gen_reg_rtx (SImode);
4065 ret = gen_reg_rtx (Pmode);
4066 o0 = gen_rtx_REG (Pmode, 8);
4067 got = sparc_tls_got ();
4068 emit_insn (gen_tgd_hi22 (temp1, addr));
4069 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4070 if (TARGET_ARCH32)
4072 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4073 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4074 addr, const1_rtx));
4076 else
4078 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4079 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4080 addr, const1_rtx));
4082 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4083 insn = get_insns ();
4084 end_sequence ();
4085 emit_libcall_block (insn, ret, o0, addr);
4086 break;
4088 case TLS_MODEL_LOCAL_DYNAMIC:
4089 start_sequence ();
4090 temp1 = gen_reg_rtx (SImode);
4091 temp2 = gen_reg_rtx (SImode);
4092 temp3 = gen_reg_rtx (Pmode);
4093 ret = gen_reg_rtx (Pmode);
4094 o0 = gen_rtx_REG (Pmode, 8);
4095 got = sparc_tls_got ();
4096 emit_insn (gen_tldm_hi22 (temp1));
4097 emit_insn (gen_tldm_lo10 (temp2, temp1));
4098 if (TARGET_ARCH32)
4100 emit_insn (gen_tldm_add32 (o0, got, temp2));
4101 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4102 const1_rtx));
4104 else
4106 emit_insn (gen_tldm_add64 (o0, got, temp2));
4107 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4108 const1_rtx));
4110 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4111 insn = get_insns ();
4112 end_sequence ();
4113 emit_libcall_block (insn, temp3, o0,
4114 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4115 UNSPEC_TLSLD_BASE));
4116 temp1 = gen_reg_rtx (SImode);
4117 temp2 = gen_reg_rtx (SImode);
4118 emit_insn (gen_tldo_hix22 (temp1, addr));
4119 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4120 if (TARGET_ARCH32)
4121 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4122 else
4123 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4124 break;
4126 case TLS_MODEL_INITIAL_EXEC:
4127 temp1 = gen_reg_rtx (SImode);
4128 temp2 = gen_reg_rtx (SImode);
4129 temp3 = gen_reg_rtx (Pmode);
4130 got = sparc_tls_got ();
4131 emit_insn (gen_tie_hi22 (temp1, addr));
4132 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4133 if (TARGET_ARCH32)
4134 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4135 else
4136 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4137 if (TARGET_SUN_TLS)
4139 ret = gen_reg_rtx (Pmode);
4140 if (TARGET_ARCH32)
4141 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4142 temp3, addr));
4143 else
4144 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4145 temp3, addr));
4147 else
4148 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4149 break;
4151 case TLS_MODEL_LOCAL_EXEC:
4152 temp1 = gen_reg_rtx (Pmode);
4153 temp2 = gen_reg_rtx (Pmode);
4154 if (TARGET_ARCH32)
4156 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4157 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4159 else
4161 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4162 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4164 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4165 break;
4167 default:
4168 gcc_unreachable ();
4171 else if (GET_CODE (addr) == CONST)
4173 rtx base, offset;
4175 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4177 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4178 offset = XEXP (XEXP (addr, 0), 1);
4180 base = force_operand (base, NULL_RTX);
4181 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4182 offset = force_reg (Pmode, offset);
4183 ret = gen_rtx_PLUS (Pmode, base, offset);
4186 else
4187 gcc_unreachable (); /* for now ... */
4189 return ret;
4192 /* Legitimize PIC addresses. If the address is already position-independent,
4193 we return ORIG. Newly generated position-independent addresses go into a
4194 reg. This is REG if nonzero, otherwise we allocate register(s) as
4195 necessary. */
4197 static rtx
4198 sparc_legitimize_pic_address (rtx orig, rtx reg)
4200 bool gotdata_op = false;
4202 if (GET_CODE (orig) == SYMBOL_REF
4203 /* See the comment in sparc_expand_move. */
4204 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4206 rtx pic_ref, address;
4207 rtx insn;
4209 if (reg == 0)
4211 gcc_assert (can_create_pseudo_p ());
4212 reg = gen_reg_rtx (Pmode);
4215 if (flag_pic == 2)
4217 /* If not during reload, allocate another temp reg here for loading
4218 in the address, so that these instructions can be optimized
4219 properly. */
4220 rtx temp_reg = (! can_create_pseudo_p ()
4221 ? reg : gen_reg_rtx (Pmode));
4223 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4224 won't get confused into thinking that these two instructions
4225 are loading in the true address of the symbol. If in the
4226 future a PIC rtx exists, that should be used instead. */
4227 if (TARGET_ARCH64)
4229 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4230 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4232 else
4234 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4235 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4237 address = temp_reg;
4238 gotdata_op = true;
4240 else
4241 address = orig;
4243 crtl->uses_pic_offset_table = 1;
4244 if (gotdata_op)
4246 if (TARGET_ARCH64)
4247 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4248 pic_offset_table_rtx,
4249 address, orig));
4250 else
4251 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4252 pic_offset_table_rtx,
4253 address, orig));
4255 else
4257 pic_ref
4258 = gen_const_mem (Pmode,
4259 gen_rtx_PLUS (Pmode,
4260 pic_offset_table_rtx, address));
4261 insn = emit_move_insn (reg, pic_ref);
4264 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4265 by loop. */
4266 set_unique_reg_note (insn, REG_EQUAL, orig);
4267 return reg;
4269 else if (GET_CODE (orig) == CONST)
4271 rtx base, offset;
4273 if (GET_CODE (XEXP (orig, 0)) == PLUS
4274 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4275 return orig;
4277 if (reg == 0)
4279 gcc_assert (can_create_pseudo_p ());
4280 reg = gen_reg_rtx (Pmode);
4283 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4284 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4285 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4286 base == reg ? NULL_RTX : reg);
4288 if (GET_CODE (offset) == CONST_INT)
4290 if (SMALL_INT (offset))
4291 return plus_constant (Pmode, base, INTVAL (offset));
4292 else if (can_create_pseudo_p ())
4293 offset = force_reg (Pmode, offset);
4294 else
4295 /* If we reach here, then something is seriously wrong. */
4296 gcc_unreachable ();
4298 return gen_rtx_PLUS (Pmode, base, offset);
4300 else if (GET_CODE (orig) == LABEL_REF)
4301 /* ??? We ought to be checking that the register is live instead, in case
4302 it is eliminated. */
4303 crtl->uses_pic_offset_table = 1;
4305 return orig;
4308 /* Try machine-dependent ways of modifying an illegitimate address X
4309 to be legitimate. If we find one, return the new, valid address.
4311 OLDX is the address as it was before break_out_memory_refs was called.
4312 In some cases it is useful to look at this to decide what needs to be done.
4314 MODE is the mode of the operand pointed to by X.
4316 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4318 static rtx
4319 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4320 enum machine_mode mode)
4322 rtx orig_x = x;
4324 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4325 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4326 force_operand (XEXP (x, 0), NULL_RTX));
4327 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4328 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4329 force_operand (XEXP (x, 1), NULL_RTX));
4330 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4331 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4332 XEXP (x, 1));
4333 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4334 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4335 force_operand (XEXP (x, 1), NULL_RTX));
4337 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4338 return x;
4340 if (sparc_tls_referenced_p (x))
4341 x = sparc_legitimize_tls_address (x);
4342 else if (flag_pic)
4343 x = sparc_legitimize_pic_address (x, NULL_RTX);
4344 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4345 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4346 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4347 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4348 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4349 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4350 else if (GET_CODE (x) == SYMBOL_REF
4351 || GET_CODE (x) == CONST
4352 || GET_CODE (x) == LABEL_REF)
4353 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4355 return x;
4358 /* Delegitimize an address that was legitimized by the above function. */
4360 static rtx
4361 sparc_delegitimize_address (rtx x)
4363 x = delegitimize_mem_from_attrs (x);
4365 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4366 switch (XINT (XEXP (x, 1), 1))
4368 case UNSPEC_MOVE_PIC:
4369 case UNSPEC_TLSLE:
4370 x = XVECEXP (XEXP (x, 1), 0, 0);
4371 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4372 break;
4373 default:
4374 break;
4377 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4378 if (GET_CODE (x) == MINUS
4379 && REG_P (XEXP (x, 0))
4380 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4381 && GET_CODE (XEXP (x, 1)) == LO_SUM
4382 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4383 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4385 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4386 gcc_assert (GET_CODE (x) == LABEL_REF);
4389 return x;
4392 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4393 replace the input X, or the original X if no replacement is called for.
4394 The output parameter *WIN is 1 if the calling macro should goto WIN,
4395 0 if it should not.
4397 For SPARC, we wish to handle addresses by splitting them into
4398 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4399 This cuts the number of extra insns by one.
4401 Do nothing when generating PIC code and the address is a symbolic
4402 operand or requires a scratch register. */
4405 sparc_legitimize_reload_address (rtx x, enum machine_mode mode,
4406 int opnum, int type,
4407 int ind_levels ATTRIBUTE_UNUSED, int *win)
4409 /* Decompose SImode constants into HIGH+LO_SUM. */
4410 if (CONSTANT_P (x)
4411 && (mode != TFmode || TARGET_ARCH64)
4412 && GET_MODE (x) == SImode
4413 && GET_CODE (x) != LO_SUM
4414 && GET_CODE (x) != HIGH
4415 && sparc_cmodel <= CM_MEDLOW
4416 && !(flag_pic
4417 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4419 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4420 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4421 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4422 opnum, (enum reload_type)type);
4423 *win = 1;
4424 return x;
4427 /* We have to recognize what we have already generated above. */
4428 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4430 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4431 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4432 opnum, (enum reload_type)type);
4433 *win = 1;
4434 return x;
4437 *win = 0;
4438 return x;
4441 /* Return true if ADDR (a legitimate address expression)
4442 has an effect that depends on the machine mode it is used for.
4444 In PIC mode,
4446 (mem:HI [%l7+a])
4448 is not equivalent to
4450 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4452 because [%l7+a+1] is interpreted as the address of (a+1). */
4455 static bool
4456 sparc_mode_dependent_address_p (const_rtx addr,
4457 addr_space_t as ATTRIBUTE_UNUSED)
4459 if (flag_pic && GET_CODE (addr) == PLUS)
4461 rtx op0 = XEXP (addr, 0);
4462 rtx op1 = XEXP (addr, 1);
4463 if (op0 == pic_offset_table_rtx
4464 && symbolic_operand (op1, VOIDmode))
4465 return true;
4468 return false;
4471 #ifdef HAVE_GAS_HIDDEN
4472 # define USE_HIDDEN_LINKONCE 1
4473 #else
4474 # define USE_HIDDEN_LINKONCE 0
4475 #endif
4477 static void
4478 get_pc_thunk_name (char name[32], unsigned int regno)
4480 const char *reg_name = reg_names[regno];
4482 /* Skip the leading '%' as that cannot be used in a
4483 symbol name. */
4484 reg_name += 1;
4486 if (USE_HIDDEN_LINKONCE)
4487 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4488 else
4489 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4492 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4494 static rtx
4495 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4497 int orig_flag_pic = flag_pic;
4498 rtx insn;
4500 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4501 flag_pic = 0;
4502 if (TARGET_ARCH64)
4503 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4504 else
4505 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4506 flag_pic = orig_flag_pic;
4508 return insn;
4511 /* Emit code to load the GOT register. */
4513 void
4514 load_got_register (void)
4516 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4517 if (!global_offset_table_rtx)
4518 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4520 if (TARGET_VXWORKS_RTP)
4521 emit_insn (gen_vxworks_load_got ());
4522 else
4524 /* The GOT symbol is subject to a PC-relative relocation so we need a
4525 helper function to add the PC value and thus get the final value. */
4526 if (!got_helper_rtx)
4528 char name[32];
4529 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4530 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4533 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4534 got_helper_rtx,
4535 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4538 /* Need to emit this whether or not we obey regdecls,
4539 since setjmp/longjmp can cause life info to screw up.
4540 ??? In the case where we don't obey regdecls, this is not sufficient
4541 since we may not fall out the bottom. */
4542 emit_use (global_offset_table_rtx);
4545 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4546 address of the call target. */
4548 void
4549 sparc_emit_call_insn (rtx pat, rtx addr)
4551 rtx insn;
4553 insn = emit_call_insn (pat);
4555 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4556 if (TARGET_VXWORKS_RTP
4557 && flag_pic
4558 && GET_CODE (addr) == SYMBOL_REF
4559 && (SYMBOL_REF_DECL (addr)
4560 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4561 : !SYMBOL_REF_LOCAL_P (addr)))
4563 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4564 crtl->uses_pic_offset_table = 1;
4568 /* Return 1 if RTX is a MEM which is known to be aligned to at
4569 least a DESIRED byte boundary. */
4572 mem_min_alignment (rtx mem, int desired)
4574 rtx addr, base, offset;
4576 /* If it's not a MEM we can't accept it. */
4577 if (GET_CODE (mem) != MEM)
4578 return 0;
4580 /* Obviously... */
4581 if (!TARGET_UNALIGNED_DOUBLES
4582 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4583 return 1;
4585 /* ??? The rest of the function predates MEM_ALIGN so
4586 there is probably a bit of redundancy. */
4587 addr = XEXP (mem, 0);
4588 base = offset = NULL_RTX;
4589 if (GET_CODE (addr) == PLUS)
4591 if (GET_CODE (XEXP (addr, 0)) == REG)
4593 base = XEXP (addr, 0);
4595 /* What we are saying here is that if the base
4596 REG is aligned properly, the compiler will make
4597 sure any REG based index upon it will be so
4598 as well. */
4599 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4600 offset = XEXP (addr, 1);
4601 else
4602 offset = const0_rtx;
4605 else if (GET_CODE (addr) == REG)
4607 base = addr;
4608 offset = const0_rtx;
4611 if (base != NULL_RTX)
4613 int regno = REGNO (base);
4615 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4617 /* Check if the compiler has recorded some information
4618 about the alignment of the base REG. If reload has
4619 completed, we already matched with proper alignments.
4620 If not running global_alloc, reload might give us
4621 unaligned pointer to local stack though. */
4622 if (((cfun != 0
4623 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4624 || (optimize && reload_completed))
4625 && (INTVAL (offset) & (desired - 1)) == 0)
4626 return 1;
4628 else
4630 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4631 return 1;
4634 else if (! TARGET_UNALIGNED_DOUBLES
4635 || CONSTANT_P (addr)
4636 || GET_CODE (addr) == LO_SUM)
4638 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4639 is true, in which case we can only assume that an access is aligned if
4640 it is to a constant address, or the address involves a LO_SUM. */
4641 return 1;
4644 /* An obviously unaligned address. */
4645 return 0;
4649 /* Vectors to keep interesting information about registers where it can easily
4650 be got. We used to use the actual mode value as the bit number, but there
4651 are more than 32 modes now. Instead we use two tables: one indexed by
4652 hard register number, and one indexed by mode. */
4654 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4655 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4656 mapped into one sparc_mode_class mode. */
4658 enum sparc_mode_class {
4659 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4660 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4661 CC_MODE, CCFP_MODE
4664 /* Modes for single-word and smaller quantities. */
4665 #define S_MODES \
4666 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4668 /* Modes for double-word and smaller quantities. */
4669 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4671 /* Modes for quad-word and smaller quantities. */
4672 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4674 /* Modes for 8-word and smaller quantities. */
4675 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4677 /* Modes for single-float quantities. */
4678 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4680 /* Modes for double-float and smaller quantities. */
4681 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4683 /* Modes for quad-float and smaller quantities. */
4684 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4686 /* Modes for quad-float pairs and smaller quantities. */
4687 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4689 /* Modes for double-float only quantities. */
4690 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4692 /* Modes for quad-float and double-float only quantities. */
4693 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4695 /* Modes for quad-float pairs and double-float only quantities. */
4696 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4698 /* Modes for condition codes. */
4699 #define CC_MODES (1 << (int) CC_MODE)
4700 #define CCFP_MODES (1 << (int) CCFP_MODE)
4702 /* Value is 1 if register/mode pair is acceptable on sparc.
4703 The funny mixture of D and T modes is because integer operations
4704 do not specially operate on tetra quantities, so non-quad-aligned
4705 registers can hold quadword quantities (except %o4 and %i4 because
4706 they cross fixed registers). */
4708 /* This points to either the 32 bit or the 64 bit version. */
4709 const int *hard_regno_mode_classes;
4711 static const int hard_32bit_mode_classes[] = {
4712 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4713 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4714 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4715 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4717 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4718 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4719 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4720 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4722 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4723 and none can hold SFmode/SImode values. */
4724 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4725 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4726 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4727 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4729 /* %fcc[0123] */
4730 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4732 /* %icc, %sfp, %gsr */
4733 CC_MODES, 0, D_MODES
4736 static const int hard_64bit_mode_classes[] = {
4737 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4738 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4739 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4740 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4742 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4743 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4744 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4745 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4747 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4748 and none can hold SFmode/SImode values. */
4749 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4750 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4751 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4752 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4754 /* %fcc[0123] */
4755 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4757 /* %icc, %sfp, %gsr */
4758 CC_MODES, 0, D_MODES
4761 int sparc_mode_class [NUM_MACHINE_MODES];
4763 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4765 static void
4766 sparc_init_modes (void)
4768 int i;
4770 for (i = 0; i < NUM_MACHINE_MODES; i++)
4772 switch (GET_MODE_CLASS (i))
4774 case MODE_INT:
4775 case MODE_PARTIAL_INT:
4776 case MODE_COMPLEX_INT:
4777 if (GET_MODE_SIZE (i) < 4)
4778 sparc_mode_class[i] = 1 << (int) H_MODE;
4779 else if (GET_MODE_SIZE (i) == 4)
4780 sparc_mode_class[i] = 1 << (int) S_MODE;
4781 else if (GET_MODE_SIZE (i) == 8)
4782 sparc_mode_class[i] = 1 << (int) D_MODE;
4783 else if (GET_MODE_SIZE (i) == 16)
4784 sparc_mode_class[i] = 1 << (int) T_MODE;
4785 else if (GET_MODE_SIZE (i) == 32)
4786 sparc_mode_class[i] = 1 << (int) O_MODE;
4787 else
4788 sparc_mode_class[i] = 0;
4789 break;
4790 case MODE_VECTOR_INT:
4791 if (GET_MODE_SIZE (i) == 4)
4792 sparc_mode_class[i] = 1 << (int) SF_MODE;
4793 else if (GET_MODE_SIZE (i) == 8)
4794 sparc_mode_class[i] = 1 << (int) DF_MODE;
4795 else
4796 sparc_mode_class[i] = 0;
4797 break;
4798 case MODE_FLOAT:
4799 case MODE_COMPLEX_FLOAT:
4800 if (GET_MODE_SIZE (i) == 4)
4801 sparc_mode_class[i] = 1 << (int) SF_MODE;
4802 else if (GET_MODE_SIZE (i) == 8)
4803 sparc_mode_class[i] = 1 << (int) DF_MODE;
4804 else if (GET_MODE_SIZE (i) == 16)
4805 sparc_mode_class[i] = 1 << (int) TF_MODE;
4806 else if (GET_MODE_SIZE (i) == 32)
4807 sparc_mode_class[i] = 1 << (int) OF_MODE;
4808 else
4809 sparc_mode_class[i] = 0;
4810 break;
4811 case MODE_CC:
4812 if (i == (int) CCFPmode || i == (int) CCFPEmode)
4813 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4814 else
4815 sparc_mode_class[i] = 1 << (int) CC_MODE;
4816 break;
4817 default:
4818 sparc_mode_class[i] = 0;
4819 break;
4823 if (TARGET_ARCH64)
4824 hard_regno_mode_classes = hard_64bit_mode_classes;
4825 else
4826 hard_regno_mode_classes = hard_32bit_mode_classes;
4828 /* Initialize the array used by REGNO_REG_CLASS. */
4829 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4831 if (i < 16 && TARGET_V8PLUS)
4832 sparc_regno_reg_class[i] = I64_REGS;
4833 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4834 sparc_regno_reg_class[i] = GENERAL_REGS;
4835 else if (i < 64)
4836 sparc_regno_reg_class[i] = FP_REGS;
4837 else if (i < 96)
4838 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4839 else if (i < 100)
4840 sparc_regno_reg_class[i] = FPCC_REGS;
4841 else
4842 sparc_regno_reg_class[i] = NO_REGS;
4846 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4848 static inline bool
4849 save_global_or_fp_reg_p (unsigned int regno,
4850 int leaf_function ATTRIBUTE_UNUSED)
4852 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4855 /* Return whether the return address register (%i7) is needed. */
4857 static inline bool
4858 return_addr_reg_needed_p (int leaf_function)
4860 /* If it is live, for example because of __builtin_return_address (0). */
4861 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4862 return true;
4864 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4865 if (!leaf_function
4866 /* Loading the GOT register clobbers %o7. */
4867 || crtl->uses_pic_offset_table
4868 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4869 return true;
4871 return false;
4874 /* Return whether REGNO, a local or in register, must be saved/restored. */
4876 static bool
4877 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4879 /* General case: call-saved registers live at some point. */
4880 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4881 return true;
4883 /* Frame pointer register (%fp) if needed. */
4884 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4885 return true;
4887 /* Return address register (%i7) if needed. */
4888 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4889 return true;
4891 /* GOT register (%l7) if needed. */
4892 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
4893 return true;
4895 /* If the function accesses prior frames, the frame pointer and the return
4896 address of the previous frame must be saved on the stack. */
4897 if (crtl->accesses_prior_frames
4898 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
4899 return true;
4901 return false;
4904 /* Compute the frame size required by the function. This function is called
4905 during the reload pass and also by sparc_expand_prologue. */
4907 HOST_WIDE_INT
4908 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
4910 HOST_WIDE_INT frame_size, apparent_frame_size;
4911 int args_size, n_global_fp_regs = 0;
4912 bool save_local_in_regs_p = false;
4913 unsigned int i;
4915 /* If the function allocates dynamic stack space, the dynamic offset is
4916 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
4917 if (leaf_function && !cfun->calls_alloca)
4918 args_size = 0;
4919 else
4920 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
4922 /* Calculate space needed for global registers. */
4923 if (TARGET_ARCH64)
4924 for (i = 0; i < 8; i++)
4925 if (save_global_or_fp_reg_p (i, 0))
4926 n_global_fp_regs += 2;
4927 else
4928 for (i = 0; i < 8; i += 2)
4929 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4930 n_global_fp_regs += 2;
4932 /* In the flat window model, find out which local and in registers need to
4933 be saved. We don't reserve space in the current frame for them as they
4934 will be spilled into the register window save area of the caller's frame.
4935 However, as soon as we use this register window save area, we must create
4936 that of the current frame to make it the live one. */
4937 if (TARGET_FLAT)
4938 for (i = 16; i < 32; i++)
4939 if (save_local_or_in_reg_p (i, leaf_function))
4941 save_local_in_regs_p = true;
4942 break;
4945 /* Calculate space needed for FP registers. */
4946 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
4947 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4948 n_global_fp_regs += 2;
4950 if (size == 0
4951 && n_global_fp_regs == 0
4952 && args_size == 0
4953 && !save_local_in_regs_p)
4954 frame_size = apparent_frame_size = 0;
4955 else
4957 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
4958 apparent_frame_size = (size - STARTING_FRAME_OFFSET + 7) & -8;
4959 apparent_frame_size += n_global_fp_regs * 4;
4961 /* We need to add the size of the outgoing argument area. */
4962 frame_size = apparent_frame_size + ((args_size + 7) & -8);
4964 /* And that of the register window save area. */
4965 frame_size += FIRST_PARM_OFFSET (cfun->decl);
4967 /* Finally, bump to the appropriate alignment. */
4968 frame_size = SPARC_STACK_ALIGN (frame_size);
4971 /* Set up values for use in prologue and epilogue. */
4972 sparc_frame_size = frame_size;
4973 sparc_apparent_frame_size = apparent_frame_size;
4974 sparc_n_global_fp_regs = n_global_fp_regs;
4975 sparc_save_local_in_regs_p = save_local_in_regs_p;
4977 return frame_size;
4980 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
4983 sparc_initial_elimination_offset (int to)
4985 int offset;
4987 if (to == STACK_POINTER_REGNUM)
4988 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
4989 else
4990 offset = 0;
4992 offset += SPARC_STACK_BIAS;
4993 return offset;
4996 /* Output any necessary .register pseudo-ops. */
4998 void
4999 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5001 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5002 int i;
5004 if (TARGET_ARCH32)
5005 return;
5007 /* Check if %g[2367] were used without
5008 .register being printed for them already. */
5009 for (i = 2; i < 8; i++)
5011 if (df_regs_ever_live_p (i)
5012 && ! sparc_hard_reg_printed [i])
5014 sparc_hard_reg_printed [i] = 1;
5015 /* %g7 is used as TLS base register, use #ignore
5016 for it instead of #scratch. */
5017 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5018 i == 7 ? "ignore" : "scratch");
5020 if (i == 3) i = 5;
5022 #endif
5025 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5027 #if PROBE_INTERVAL > 4096
5028 #error Cannot use indexed addressing mode for stack probing
5029 #endif
5031 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5032 inclusive. These are offsets from the current stack pointer.
5034 Note that we don't use the REG+REG addressing mode for the probes because
5035 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5036 so the advantages of having a single code win here. */
5038 static void
5039 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5041 rtx g1 = gen_rtx_REG (Pmode, 1);
5043 /* See if we have a constant small number of probes to generate. If so,
5044 that's the easy case. */
5045 if (size <= PROBE_INTERVAL)
5047 emit_move_insn (g1, GEN_INT (first));
5048 emit_insn (gen_rtx_SET (VOIDmode, g1,
5049 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5050 emit_stack_probe (plus_constant (Pmode, g1, -size));
5053 /* The run-time loop is made up of 10 insns in the generic case while the
5054 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5055 else if (size <= 5 * PROBE_INTERVAL)
5057 HOST_WIDE_INT i;
5059 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5060 emit_insn (gen_rtx_SET (VOIDmode, g1,
5061 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5062 emit_stack_probe (g1);
5064 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5065 it exceeds SIZE. If only two probes are needed, this will not
5066 generate any code. Then probe at FIRST + SIZE. */
5067 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5069 emit_insn (gen_rtx_SET (VOIDmode, g1,
5070 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5071 emit_stack_probe (g1);
5074 emit_stack_probe (plus_constant (Pmode, g1,
5075 (i - PROBE_INTERVAL) - size));
5078 /* Otherwise, do the same as above, but in a loop. Note that we must be
5079 extra careful with variables wrapping around because we might be at
5080 the very top (or the very bottom) of the address space and we have
5081 to be able to handle this case properly; in particular, we use an
5082 equality test for the loop condition. */
5083 else
5085 HOST_WIDE_INT rounded_size;
5086 rtx g4 = gen_rtx_REG (Pmode, 4);
5088 emit_move_insn (g1, GEN_INT (first));
5091 /* Step 1: round SIZE to the previous multiple of the interval. */
5093 rounded_size = size & -PROBE_INTERVAL;
5094 emit_move_insn (g4, GEN_INT (rounded_size));
5097 /* Step 2: compute initial and final value of the loop counter. */
5099 /* TEST_ADDR = SP + FIRST. */
5100 emit_insn (gen_rtx_SET (VOIDmode, g1,
5101 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5103 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5104 emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4)));
5107 /* Step 3: the loop
5109 while (TEST_ADDR != LAST_ADDR)
5111 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5112 probe at TEST_ADDR
5115 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5116 until it is equal to ROUNDED_SIZE. */
5118 if (TARGET_64BIT)
5119 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5120 else
5121 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5124 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5125 that SIZE is equal to ROUNDED_SIZE. */
5127 if (size != rounded_size)
5128 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5131 /* Make sure nothing is scheduled before we are done. */
5132 emit_insn (gen_blockage ());
5135 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5136 absolute addresses. */
5138 const char *
5139 output_probe_stack_range (rtx reg1, rtx reg2)
5141 static int labelno = 0;
5142 char loop_lab[32], end_lab[32];
5143 rtx xops[2];
5145 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
5146 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
5148 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5150 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
5151 xops[0] = reg1;
5152 xops[1] = reg2;
5153 output_asm_insn ("cmp\t%0, %1", xops);
5154 if (TARGET_ARCH64)
5155 fputs ("\tbe,pn\t%xcc,", asm_out_file);
5156 else
5157 fputs ("\tbe\t", asm_out_file);
5158 assemble_name_raw (asm_out_file, end_lab);
5159 fputc ('\n', asm_out_file);
5161 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5162 xops[1] = GEN_INT (-PROBE_INTERVAL);
5163 output_asm_insn (" add\t%0, %1, %0", xops);
5165 /* Probe at TEST_ADDR and branch. */
5166 if (TARGET_ARCH64)
5167 fputs ("\tba,pt\t%xcc,", asm_out_file);
5168 else
5169 fputs ("\tba\t", asm_out_file);
5170 assemble_name_raw (asm_out_file, loop_lab);
5171 fputc ('\n', asm_out_file);
5172 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5173 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5175 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
5177 return "";
5180 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5181 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5182 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5183 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5184 the action to be performed if it returns false. Return the new offset. */
5186 typedef bool (*sorr_pred_t) (unsigned int, int);
5187 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5189 static int
5190 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5191 int offset, int leaf_function, sorr_pred_t save_p,
5192 sorr_act_t action_true, sorr_act_t action_false)
5194 unsigned int i;
5195 rtx mem, insn;
5197 if (TARGET_ARCH64 && high <= 32)
5199 int fp_offset = -1;
5201 for (i = low; i < high; i++)
5203 if (save_p (i, leaf_function))
5205 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5206 base, offset));
5207 if (action_true == SORR_SAVE)
5209 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5210 RTX_FRAME_RELATED_P (insn) = 1;
5212 else /* action_true == SORR_RESTORE */
5214 /* The frame pointer must be restored last since its old
5215 value may be used as base address for the frame. This
5216 is problematic in 64-bit mode only because of the lack
5217 of double-word load instruction. */
5218 if (i == HARD_FRAME_POINTER_REGNUM)
5219 fp_offset = offset;
5220 else
5221 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5223 offset += 8;
5225 else if (action_false == SORR_ADVANCE)
5226 offset += 8;
5229 if (fp_offset >= 0)
5231 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5232 emit_move_insn (hard_frame_pointer_rtx, mem);
5235 else
5237 for (i = low; i < high; i += 2)
5239 bool reg0 = save_p (i, leaf_function);
5240 bool reg1 = save_p (i + 1, leaf_function);
5241 enum machine_mode mode;
5242 int regno;
5244 if (reg0 && reg1)
5246 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5247 regno = i;
5249 else if (reg0)
5251 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5252 regno = i;
5254 else if (reg1)
5256 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5257 regno = i + 1;
5258 offset += 4;
5260 else
5262 if (action_false == SORR_ADVANCE)
5263 offset += 8;
5264 continue;
5267 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5268 if (action_true == SORR_SAVE)
5270 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5271 RTX_FRAME_RELATED_P (insn) = 1;
5272 if (mode == DImode)
5274 rtx set1, set2;
5275 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5276 offset));
5277 set1 = gen_rtx_SET (VOIDmode, mem,
5278 gen_rtx_REG (SImode, regno));
5279 RTX_FRAME_RELATED_P (set1) = 1;
5281 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5282 offset + 4));
5283 set2 = gen_rtx_SET (VOIDmode, mem,
5284 gen_rtx_REG (SImode, regno + 1));
5285 RTX_FRAME_RELATED_P (set2) = 1;
5286 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5287 gen_rtx_PARALLEL (VOIDmode,
5288 gen_rtvec (2, set1, set2)));
5291 else /* action_true == SORR_RESTORE */
5292 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5294 /* Always preserve double-word alignment. */
5295 offset = (offset + 8) & -8;
5299 return offset;
5302 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5304 static rtx
5305 emit_adjust_base_to_offset (rtx base, int offset)
5307 /* ??? This might be optimized a little as %g1 might already have a
5308 value close enough that a single add insn will do. */
5309 /* ??? Although, all of this is probably only a temporary fix because
5310 if %g1 can hold a function result, then sparc_expand_epilogue will
5311 lose (the result will be clobbered). */
5312 rtx new_base = gen_rtx_REG (Pmode, 1);
5313 emit_move_insn (new_base, GEN_INT (offset));
5314 emit_insn (gen_rtx_SET (VOIDmode,
5315 new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5316 return new_base;
5319 /* Emit code to save/restore call-saved global and FP registers. */
5321 static void
5322 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5324 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5326 base = emit_adjust_base_to_offset (base, offset);
5327 offset = 0;
5330 offset
5331 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5332 save_global_or_fp_reg_p, action, SORR_NONE);
5333 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5334 save_global_or_fp_reg_p, action, SORR_NONE);
5337 /* Emit code to save/restore call-saved local and in registers. */
5339 static void
5340 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5342 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5344 base = emit_adjust_base_to_offset (base, offset);
5345 offset = 0;
5348 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5349 save_local_or_in_reg_p, action, SORR_ADVANCE);
5352 /* Emit a window_save insn. */
5354 static rtx
5355 emit_window_save (rtx increment)
5357 rtx insn = emit_insn (gen_window_save (increment));
5358 RTX_FRAME_RELATED_P (insn) = 1;
5360 /* The incoming return address (%o7) is saved in %i7. */
5361 add_reg_note (insn, REG_CFA_REGISTER,
5362 gen_rtx_SET (VOIDmode,
5363 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5364 gen_rtx_REG (Pmode,
5365 INCOMING_RETURN_ADDR_REGNUM)));
5367 /* The window save event. */
5368 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5370 /* The CFA is %fp, the hard frame pointer. */
5371 add_reg_note (insn, REG_CFA_DEF_CFA,
5372 plus_constant (Pmode, hard_frame_pointer_rtx,
5373 INCOMING_FRAME_SP_OFFSET));
5375 return insn;
5378 /* Generate an increment for the stack pointer. */
5380 static rtx
5381 gen_stack_pointer_inc (rtx increment)
5383 return gen_rtx_SET (VOIDmode,
5384 stack_pointer_rtx,
5385 gen_rtx_PLUS (Pmode,
5386 stack_pointer_rtx,
5387 increment));
5390 /* Expand the function prologue. The prologue is responsible for reserving
5391 storage for the frame, saving the call-saved registers and loading the
5392 GOT register if needed. */
5394 void
5395 sparc_expand_prologue (void)
5397 HOST_WIDE_INT size;
5398 rtx insn;
5400 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5401 on the final value of the flag means deferring the prologue/epilogue
5402 expansion until just before the second scheduling pass, which is too
5403 late to emit multiple epilogues or return insns.
5405 Of course we are making the assumption that the value of the flag
5406 will not change between now and its final value. Of the three parts
5407 of the formula, only the last one can reasonably vary. Let's take a
5408 closer look, after assuming that the first two ones are set to true
5409 (otherwise the last value is effectively silenced).
5411 If only_leaf_regs_used returns false, the global predicate will also
5412 be false so the actual frame size calculated below will be positive.
5413 As a consequence, the save_register_window insn will be emitted in
5414 the instruction stream; now this insn explicitly references %fp
5415 which is not a leaf register so only_leaf_regs_used will always
5416 return false subsequently.
5418 If only_leaf_regs_used returns true, we hope that the subsequent
5419 optimization passes won't cause non-leaf registers to pop up. For
5420 example, the regrename pass has special provisions to not rename to
5421 non-leaf registers in a leaf function. */
5422 sparc_leaf_function_p
5423 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5425 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5427 if (flag_stack_usage_info)
5428 current_function_static_stack_size = size;
5430 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
5431 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5433 if (size == 0)
5434 ; /* do nothing. */
5435 else if (sparc_leaf_function_p)
5437 rtx size_int_rtx = GEN_INT (-size);
5439 if (size <= 4096)
5440 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5441 else if (size <= 8192)
5443 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5444 RTX_FRAME_RELATED_P (insn) = 1;
5446 /* %sp is still the CFA register. */
5447 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5449 else
5451 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5452 emit_move_insn (size_rtx, size_int_rtx);
5453 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5454 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5455 gen_stack_pointer_inc (size_int_rtx));
5458 RTX_FRAME_RELATED_P (insn) = 1;
5460 else
5462 rtx size_int_rtx = GEN_INT (-size);
5464 if (size <= 4096)
5465 emit_window_save (size_int_rtx);
5466 else if (size <= 8192)
5468 emit_window_save (GEN_INT (-4096));
5470 /* %sp is not the CFA register anymore. */
5471 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5473 /* Make sure no %fp-based store is issued until after the frame is
5474 established. The offset between the frame pointer and the stack
5475 pointer is calculated relative to the value of the stack pointer
5476 at the end of the function prologue, and moving instructions that
5477 access the stack via the frame pointer between the instructions
5478 that decrement the stack pointer could result in accessing the
5479 register window save area, which is volatile. */
5480 emit_insn (gen_frame_blockage ());
5482 else
5484 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5485 emit_move_insn (size_rtx, size_int_rtx);
5486 emit_window_save (size_rtx);
5490 if (sparc_leaf_function_p)
5492 sparc_frame_base_reg = stack_pointer_rtx;
5493 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5495 else
5497 sparc_frame_base_reg = hard_frame_pointer_rtx;
5498 sparc_frame_base_offset = SPARC_STACK_BIAS;
5501 if (sparc_n_global_fp_regs > 0)
5502 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5503 sparc_frame_base_offset
5504 - sparc_apparent_frame_size,
5505 SORR_SAVE);
5507 /* Load the GOT register if needed. */
5508 if (crtl->uses_pic_offset_table)
5509 load_got_register ();
5511 /* Advertise that the data calculated just above are now valid. */
5512 sparc_prologue_data_valid_p = true;
5515 /* Expand the function prologue. The prologue is responsible for reserving
5516 storage for the frame, saving the call-saved registers and loading the
5517 GOT register if needed. */
5519 void
5520 sparc_flat_expand_prologue (void)
5522 HOST_WIDE_INT size;
5523 rtx insn;
5525 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5527 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5529 if (flag_stack_usage_info)
5530 current_function_static_stack_size = size;
5532 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
5533 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5535 if (sparc_save_local_in_regs_p)
5536 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5537 SORR_SAVE);
5539 if (size == 0)
5540 ; /* do nothing. */
5541 else
5543 rtx size_int_rtx, size_rtx;
5545 size_rtx = size_int_rtx = GEN_INT (-size);
5547 /* We establish the frame (i.e. decrement the stack pointer) first, even
5548 if we use a frame pointer, because we cannot clobber any call-saved
5549 registers, including the frame pointer, if we haven't created a new
5550 register save area, for the sake of compatibility with the ABI. */
5551 if (size <= 4096)
5552 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5553 else if (size <= 8192 && !frame_pointer_needed)
5555 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5556 RTX_FRAME_RELATED_P (insn) = 1;
5557 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5559 else
5561 size_rtx = gen_rtx_REG (Pmode, 1);
5562 emit_move_insn (size_rtx, size_int_rtx);
5563 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5564 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5565 gen_stack_pointer_inc (size_int_rtx));
5567 RTX_FRAME_RELATED_P (insn) = 1;
5569 /* Ensure nothing is scheduled until after the frame is established. */
5570 emit_insn (gen_blockage ());
5572 if (frame_pointer_needed)
5574 insn = emit_insn (gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5575 gen_rtx_MINUS (Pmode,
5576 stack_pointer_rtx,
5577 size_rtx)));
5578 RTX_FRAME_RELATED_P (insn) = 1;
5580 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5581 gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5582 plus_constant (Pmode, stack_pointer_rtx,
5583 size)));
5586 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5588 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5589 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5591 insn = emit_move_insn (i7, o7);
5592 RTX_FRAME_RELATED_P (insn) = 1;
5594 add_reg_note (insn, REG_CFA_REGISTER,
5595 gen_rtx_SET (VOIDmode, i7, o7));
5597 /* Prevent this instruction from ever being considered dead,
5598 even if this function has no epilogue. */
5599 emit_use (i7);
5603 if (frame_pointer_needed)
5605 sparc_frame_base_reg = hard_frame_pointer_rtx;
5606 sparc_frame_base_offset = SPARC_STACK_BIAS;
5608 else
5610 sparc_frame_base_reg = stack_pointer_rtx;
5611 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5614 if (sparc_n_global_fp_regs > 0)
5615 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5616 sparc_frame_base_offset
5617 - sparc_apparent_frame_size,
5618 SORR_SAVE);
5620 /* Load the GOT register if needed. */
5621 if (crtl->uses_pic_offset_table)
5622 load_got_register ();
5624 /* Advertise that the data calculated just above are now valid. */
5625 sparc_prologue_data_valid_p = true;
5628 /* This function generates the assembly code for function entry, which boils
5629 down to emitting the necessary .register directives. */
5631 static void
5632 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5634 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5635 if (!TARGET_FLAT)
5636 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5638 sparc_output_scratch_registers (file);
5641 /* Expand the function epilogue, either normal or part of a sibcall.
5642 We emit all the instructions except the return or the call. */
5644 void
5645 sparc_expand_epilogue (bool for_eh)
5647 HOST_WIDE_INT size = sparc_frame_size;
5649 if (sparc_n_global_fp_regs > 0)
5650 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5651 sparc_frame_base_offset
5652 - sparc_apparent_frame_size,
5653 SORR_RESTORE);
5655 if (size == 0 || for_eh)
5656 ; /* do nothing. */
5657 else if (sparc_leaf_function_p)
5659 if (size <= 4096)
5660 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5661 else if (size <= 8192)
5663 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5664 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5666 else
5668 rtx reg = gen_rtx_REG (Pmode, 1);
5669 emit_move_insn (reg, GEN_INT (size));
5670 emit_insn (gen_stack_pointer_inc (reg));
5675 /* Expand the function epilogue, either normal or part of a sibcall.
5676 We emit all the instructions except the return or the call. */
5678 void
5679 sparc_flat_expand_epilogue (bool for_eh)
5681 HOST_WIDE_INT size = sparc_frame_size;
5683 if (sparc_n_global_fp_regs > 0)
5684 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5685 sparc_frame_base_offset
5686 - sparc_apparent_frame_size,
5687 SORR_RESTORE);
5689 /* If we have a frame pointer, we'll need both to restore it before the
5690 frame is destroyed and use its current value in destroying the frame.
5691 Since we don't have an atomic way to do that in the flat window model,
5692 we save the current value into a temporary register (%g1). */
5693 if (frame_pointer_needed && !for_eh)
5694 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5696 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5697 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5698 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5700 if (sparc_save_local_in_regs_p)
5701 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5702 sparc_frame_base_offset,
5703 SORR_RESTORE);
5705 if (size == 0 || for_eh)
5706 ; /* do nothing. */
5707 else if (frame_pointer_needed)
5709 /* Make sure the frame is destroyed after everything else is done. */
5710 emit_insn (gen_blockage ());
5712 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5714 else
5716 /* Likewise. */
5717 emit_insn (gen_blockage ());
5719 if (size <= 4096)
5720 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5721 else if (size <= 8192)
5723 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5724 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5726 else
5728 rtx reg = gen_rtx_REG (Pmode, 1);
5729 emit_move_insn (reg, GEN_INT (size));
5730 emit_insn (gen_stack_pointer_inc (reg));
5735 /* Return true if it is appropriate to emit `return' instructions in the
5736 body of a function. */
5738 bool
5739 sparc_can_use_return_insn_p (void)
5741 return sparc_prologue_data_valid_p
5742 && sparc_n_global_fp_regs == 0
5743 && TARGET_FLAT
5744 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5745 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5748 /* This function generates the assembly code for function exit. */
5750 static void
5751 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5753 /* If the last two instructions of a function are "call foo; dslot;"
5754 the return address might point to the first instruction in the next
5755 function and we have to output a dummy nop for the sake of sane
5756 backtraces in such cases. This is pointless for sibling calls since
5757 the return address is explicitly adjusted. */
5759 rtx insn, last_real_insn;
5761 insn = get_last_insn ();
5763 last_real_insn = prev_real_insn (insn);
5764 if (last_real_insn
5765 && GET_CODE (last_real_insn) == INSN
5766 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5767 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5769 if (last_real_insn
5770 && CALL_P (last_real_insn)
5771 && !SIBLING_CALL_P (last_real_insn))
5772 fputs("\tnop\n", file);
5774 sparc_output_deferred_case_vectors ();
5777 /* Output a 'restore' instruction. */
5779 static void
5780 output_restore (rtx pat)
5782 rtx operands[3];
5784 if (! pat)
5786 fputs ("\t restore\n", asm_out_file);
5787 return;
5790 gcc_assert (GET_CODE (pat) == SET);
5792 operands[0] = SET_DEST (pat);
5793 pat = SET_SRC (pat);
5795 switch (GET_CODE (pat))
5797 case PLUS:
5798 operands[1] = XEXP (pat, 0);
5799 operands[2] = XEXP (pat, 1);
5800 output_asm_insn (" restore %r1, %2, %Y0", operands);
5801 break;
5802 case LO_SUM:
5803 operands[1] = XEXP (pat, 0);
5804 operands[2] = XEXP (pat, 1);
5805 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5806 break;
5807 case ASHIFT:
5808 operands[1] = XEXP (pat, 0);
5809 gcc_assert (XEXP (pat, 1) == const1_rtx);
5810 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5811 break;
5812 default:
5813 operands[1] = pat;
5814 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5815 break;
5819 /* Output a return. */
5821 const char *
5822 output_return (rtx insn)
5824 if (crtl->calls_eh_return)
5826 /* If the function uses __builtin_eh_return, the eh_return
5827 machinery occupies the delay slot. */
5828 gcc_assert (!final_sequence);
5830 if (flag_delayed_branch)
5832 if (!TARGET_FLAT && TARGET_V9)
5833 fputs ("\treturn\t%i7+8\n", asm_out_file);
5834 else
5836 if (!TARGET_FLAT)
5837 fputs ("\trestore\n", asm_out_file);
5839 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5842 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5844 else
5846 if (!TARGET_FLAT)
5847 fputs ("\trestore\n", asm_out_file);
5849 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5850 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5853 else if (sparc_leaf_function_p || TARGET_FLAT)
5855 /* This is a leaf or flat function so we don't have to bother restoring
5856 the register window, which frees us from dealing with the convoluted
5857 semantics of restore/return. We simply output the jump to the
5858 return address and the insn in the delay slot (if any). */
5860 return "jmp\t%%o7+%)%#";
5862 else
5864 /* This is a regular function so we have to restore the register window.
5865 We may have a pending insn for the delay slot, which will be either
5866 combined with the 'restore' instruction or put in the delay slot of
5867 the 'return' instruction. */
5869 if (final_sequence)
5871 rtx delay, pat;
5873 delay = NEXT_INSN (insn);
5874 gcc_assert (delay);
5876 pat = PATTERN (delay);
5878 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5880 epilogue_renumber (&pat, 0);
5881 return "return\t%%i7+%)%#";
5883 else
5885 output_asm_insn ("jmp\t%%i7+%)", NULL);
5886 output_restore (pat);
5887 PATTERN (delay) = gen_blockage ();
5888 INSN_CODE (delay) = -1;
5891 else
5893 /* The delay slot is empty. */
5894 if (TARGET_V9)
5895 return "return\t%%i7+%)\n\t nop";
5896 else if (flag_delayed_branch)
5897 return "jmp\t%%i7+%)\n\t restore";
5898 else
5899 return "restore\n\tjmp\t%%o7+%)\n\t nop";
5903 return "";
5906 /* Output a sibling call. */
5908 const char *
5909 output_sibcall (rtx insn, rtx call_operand)
5911 rtx operands[1];
5913 gcc_assert (flag_delayed_branch);
5915 operands[0] = call_operand;
5917 if (sparc_leaf_function_p || TARGET_FLAT)
5919 /* This is a leaf or flat function so we don't have to bother restoring
5920 the register window. We simply output the jump to the function and
5921 the insn in the delay slot (if any). */
5923 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
5925 if (final_sequence)
5926 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
5927 operands);
5928 else
5929 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
5930 it into branch if possible. */
5931 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
5932 operands);
5934 else
5936 /* This is a regular function so we have to restore the register window.
5937 We may have a pending insn for the delay slot, which will be combined
5938 with the 'restore' instruction. */
5940 output_asm_insn ("call\t%a0, 0", operands);
5942 if (final_sequence)
5944 rtx delay = NEXT_INSN (insn);
5945 gcc_assert (delay);
5947 output_restore (PATTERN (delay));
5949 PATTERN (delay) = gen_blockage ();
5950 INSN_CODE (delay) = -1;
5952 else
5953 output_restore (NULL_RTX);
5956 return "";
5959 /* Functions for handling argument passing.
5961 For 32-bit, the first 6 args are normally in registers and the rest are
5962 pushed. Any arg that starts within the first 6 words is at least
5963 partially passed in a register unless its data type forbids.
5965 For 64-bit, the argument registers are laid out as an array of 16 elements
5966 and arguments are added sequentially. The first 6 int args and up to the
5967 first 16 fp args (depending on size) are passed in regs.
5969 Slot Stack Integral Float Float in structure Double Long Double
5970 ---- ----- -------- ----- ------------------ ------ -----------
5971 15 [SP+248] %f31 %f30,%f31 %d30
5972 14 [SP+240] %f29 %f28,%f29 %d28 %q28
5973 13 [SP+232] %f27 %f26,%f27 %d26
5974 12 [SP+224] %f25 %f24,%f25 %d24 %q24
5975 11 [SP+216] %f23 %f22,%f23 %d22
5976 10 [SP+208] %f21 %f20,%f21 %d20 %q20
5977 9 [SP+200] %f19 %f18,%f19 %d18
5978 8 [SP+192] %f17 %f16,%f17 %d16 %q16
5979 7 [SP+184] %f15 %f14,%f15 %d14
5980 6 [SP+176] %f13 %f12,%f13 %d12 %q12
5981 5 [SP+168] %o5 %f11 %f10,%f11 %d10
5982 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
5983 3 [SP+152] %o3 %f7 %f6,%f7 %d6
5984 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
5985 1 [SP+136] %o1 %f3 %f2,%f3 %d2
5986 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
5988 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
5990 Integral arguments are always passed as 64-bit quantities appropriately
5991 extended.
5993 Passing of floating point values is handled as follows.
5994 If a prototype is in scope:
5995 If the value is in a named argument (i.e. not a stdarg function or a
5996 value not part of the `...') then the value is passed in the appropriate
5997 fp reg.
5998 If the value is part of the `...' and is passed in one of the first 6
5999 slots then the value is passed in the appropriate int reg.
6000 If the value is part of the `...' and is not passed in one of the first 6
6001 slots then the value is passed in memory.
6002 If a prototype is not in scope:
6003 If the value is one of the first 6 arguments the value is passed in the
6004 appropriate integer reg and the appropriate fp reg.
6005 If the value is not one of the first 6 arguments the value is passed in
6006 the appropriate fp reg and in memory.
6009 Summary of the calling conventions implemented by GCC on the SPARC:
6011 32-bit ABI:
6012 size argument return value
6014 small integer <4 int. reg. int. reg.
6015 word 4 int. reg. int. reg.
6016 double word 8 int. reg. int. reg.
6018 _Complex small integer <8 int. reg. int. reg.
6019 _Complex word 8 int. reg. int. reg.
6020 _Complex double word 16 memory int. reg.
6022 vector integer <=8 int. reg. FP reg.
6023 vector integer >8 memory memory
6025 float 4 int. reg. FP reg.
6026 double 8 int. reg. FP reg.
6027 long double 16 memory memory
6029 _Complex float 8 memory FP reg.
6030 _Complex double 16 memory FP reg.
6031 _Complex long double 32 memory FP reg.
6033 vector float any memory memory
6035 aggregate any memory memory
6039 64-bit ABI:
6040 size argument return value
6042 small integer <8 int. reg. int. reg.
6043 word 8 int. reg. int. reg.
6044 double word 16 int. reg. int. reg.
6046 _Complex small integer <16 int. reg. int. reg.
6047 _Complex word 16 int. reg. int. reg.
6048 _Complex double word 32 memory int. reg.
6050 vector integer <=16 FP reg. FP reg.
6051 vector integer 16<s<=32 memory FP reg.
6052 vector integer >32 memory memory
6054 float 4 FP reg. FP reg.
6055 double 8 FP reg. FP reg.
6056 long double 16 FP reg. FP reg.
6058 _Complex float 8 FP reg. FP reg.
6059 _Complex double 16 FP reg. FP reg.
6060 _Complex long double 32 memory FP reg.
6062 vector float <=16 FP reg. FP reg.
6063 vector float 16<s<=32 memory FP reg.
6064 vector float >32 memory memory
6066 aggregate <=16 reg. reg.
6067 aggregate 16<s<=32 memory reg.
6068 aggregate >32 memory memory
6072 Note #1: complex floating-point types follow the extended SPARC ABIs as
6073 implemented by the Sun compiler.
6075 Note #2: integral vector types follow the scalar floating-point types
6076 conventions to match what is implemented by the Sun VIS SDK.
6078 Note #3: floating-point vector types follow the aggregate types
6079 conventions. */
6082 /* Maximum number of int regs for args. */
6083 #define SPARC_INT_ARG_MAX 6
6084 /* Maximum number of fp regs for args. */
6085 #define SPARC_FP_ARG_MAX 16
6087 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
6089 /* Handle the INIT_CUMULATIVE_ARGS macro.
6090 Initialize a variable CUM of type CUMULATIVE_ARGS
6091 for a call to a function whose data type is FNTYPE.
6092 For a library call, FNTYPE is 0. */
6094 void
6095 init_cumulative_args (struct sparc_args *cum, tree fntype,
6096 rtx libname ATTRIBUTE_UNUSED,
6097 tree fndecl ATTRIBUTE_UNUSED)
6099 cum->words = 0;
6100 cum->prototype_p = fntype && prototype_p (fntype);
6101 cum->libcall_p = fntype == 0;
6104 /* Handle promotion of pointer and integer arguments. */
6106 static enum machine_mode
6107 sparc_promote_function_mode (const_tree type,
6108 enum machine_mode mode,
6109 int *punsignedp,
6110 const_tree fntype ATTRIBUTE_UNUSED,
6111 int for_return ATTRIBUTE_UNUSED)
6113 if (type != NULL_TREE && POINTER_TYPE_P (type))
6115 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6116 return Pmode;
6119 /* Integral arguments are passed as full words, as per the ABI. */
6120 if (GET_MODE_CLASS (mode) == MODE_INT
6121 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6122 return word_mode;
6124 return mode;
6127 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6129 static bool
6130 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6132 return TARGET_ARCH64 ? true : false;
6135 /* Scan the record type TYPE and return the following predicates:
6136 - INTREGS_P: the record contains at least one field or sub-field
6137 that is eligible for promotion in integer registers.
6138 - FP_REGS_P: the record contains at least one field or sub-field
6139 that is eligible for promotion in floating-point registers.
6140 - PACKED_P: the record contains at least one field that is packed.
6142 Sub-fields are not taken into account for the PACKED_P predicate. */
6144 static void
6145 scan_record_type (const_tree type, int *intregs_p, int *fpregs_p,
6146 int *packed_p)
6148 tree field;
6150 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6152 if (TREE_CODE (field) == FIELD_DECL)
6154 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6155 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
6156 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6157 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6158 && TARGET_FPU)
6159 *fpregs_p = 1;
6160 else
6161 *intregs_p = 1;
6163 if (packed_p && DECL_PACKED (field))
6164 *packed_p = 1;
6169 /* Compute the slot number to pass an argument in.
6170 Return the slot number or -1 if passing on the stack.
6172 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6173 the preceding args and about the function being called.
6174 MODE is the argument's machine mode.
6175 TYPE is the data type of the argument (as a tree).
6176 This is null for libcalls where that information may
6177 not be available.
6178 NAMED is nonzero if this argument is a named parameter
6179 (otherwise it is an extra parameter matching an ellipsis).
6180 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6181 *PREGNO records the register number to use if scalar type.
6182 *PPADDING records the amount of padding needed in words. */
6184 static int
6185 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
6186 const_tree type, bool named, bool incoming_p,
6187 int *pregno, int *ppadding)
6189 int regbase = (incoming_p
6190 ? SPARC_INCOMING_INT_ARG_FIRST
6191 : SPARC_OUTGOING_INT_ARG_FIRST);
6192 int slotno = cum->words;
6193 enum mode_class mclass;
6194 int regno;
6196 *ppadding = 0;
6198 if (type && TREE_ADDRESSABLE (type))
6199 return -1;
6201 if (TARGET_ARCH32
6202 && mode == BLKmode
6203 && type
6204 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6205 return -1;
6207 /* For SPARC64, objects requiring 16-byte alignment get it. */
6208 if (TARGET_ARCH64
6209 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6210 && (slotno & 1) != 0)
6211 slotno++, *ppadding = 1;
6213 mclass = GET_MODE_CLASS (mode);
6214 if (type && TREE_CODE (type) == VECTOR_TYPE)
6216 /* Vector types deserve special treatment because they are
6217 polymorphic wrt their mode, depending upon whether VIS
6218 instructions are enabled. */
6219 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6221 /* The SPARC port defines no floating-point vector modes. */
6222 gcc_assert (mode == BLKmode);
6224 else
6226 /* Integral vector types should either have a vector
6227 mode or an integral mode, because we are guaranteed
6228 by pass_by_reference that their size is not greater
6229 than 16 bytes and TImode is 16-byte wide. */
6230 gcc_assert (mode != BLKmode);
6232 /* Vector integers are handled like floats according to
6233 the Sun VIS SDK. */
6234 mclass = MODE_FLOAT;
6238 switch (mclass)
6240 case MODE_FLOAT:
6241 case MODE_COMPLEX_FLOAT:
6242 case MODE_VECTOR_INT:
6243 if (TARGET_ARCH64 && TARGET_FPU && named)
6245 if (slotno >= SPARC_FP_ARG_MAX)
6246 return -1;
6247 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6248 /* Arguments filling only one single FP register are
6249 right-justified in the outer double FP register. */
6250 if (GET_MODE_SIZE (mode) <= 4)
6251 regno++;
6252 break;
6254 /* fallthrough */
6256 case MODE_INT:
6257 case MODE_COMPLEX_INT:
6258 if (slotno >= SPARC_INT_ARG_MAX)
6259 return -1;
6260 regno = regbase + slotno;
6261 break;
6263 case MODE_RANDOM:
6264 if (mode == VOIDmode)
6265 /* MODE is VOIDmode when generating the actual call. */
6266 return -1;
6268 gcc_assert (mode == BLKmode);
6270 if (TARGET_ARCH32
6271 || !type
6272 || (TREE_CODE (type) != VECTOR_TYPE
6273 && TREE_CODE (type) != RECORD_TYPE))
6275 if (slotno >= SPARC_INT_ARG_MAX)
6276 return -1;
6277 regno = regbase + slotno;
6279 else /* TARGET_ARCH64 && type */
6281 int intregs_p = 0, fpregs_p = 0, packed_p = 0;
6283 /* First see what kinds of registers we would need. */
6284 if (TREE_CODE (type) == VECTOR_TYPE)
6285 fpregs_p = 1;
6286 else
6287 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
6289 /* The ABI obviously doesn't specify how packed structures
6290 are passed. These are defined to be passed in int regs
6291 if possible, otherwise memory. */
6292 if (packed_p || !named)
6293 fpregs_p = 0, intregs_p = 1;
6295 /* If all arg slots are filled, then must pass on stack. */
6296 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
6297 return -1;
6299 /* If there are only int args and all int arg slots are filled,
6300 then must pass on stack. */
6301 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
6302 return -1;
6304 /* Note that even if all int arg slots are filled, fp members may
6305 still be passed in regs if such regs are available.
6306 *PREGNO isn't set because there may be more than one, it's up
6307 to the caller to compute them. */
6308 return slotno;
6310 break;
6312 default :
6313 gcc_unreachable ();
6316 *pregno = regno;
6317 return slotno;
6320 /* Handle recursive register counting for structure field layout. */
6322 struct function_arg_record_value_parms
6324 rtx ret; /* return expression being built. */
6325 int slotno; /* slot number of the argument. */
6326 int named; /* whether the argument is named. */
6327 int regbase; /* regno of the base register. */
6328 int stack; /* 1 if part of the argument is on the stack. */
6329 int intoffset; /* offset of the first pending integer field. */
6330 unsigned int nregs; /* number of words passed in registers. */
6333 static void function_arg_record_value_3
6334 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
6335 static void function_arg_record_value_2
6336 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6337 static void function_arg_record_value_1
6338 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6339 static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
6340 static rtx function_arg_union_value (int, enum machine_mode, int, int);
6342 /* A subroutine of function_arg_record_value. Traverse the structure
6343 recursively and determine how many registers will be required. */
6345 static void
6346 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
6347 struct function_arg_record_value_parms *parms,
6348 bool packed_p)
6350 tree field;
6352 /* We need to compute how many registers are needed so we can
6353 allocate the PARALLEL but before we can do that we need to know
6354 whether there are any packed fields. The ABI obviously doesn't
6355 specify how structures are passed in this case, so they are
6356 defined to be passed in int regs if possible, otherwise memory,
6357 regardless of whether there are fp values present. */
6359 if (! packed_p)
6360 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6362 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6364 packed_p = true;
6365 break;
6369 /* Compute how many registers we need. */
6370 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6372 if (TREE_CODE (field) == FIELD_DECL)
6374 HOST_WIDE_INT bitpos = startbitpos;
6376 if (DECL_SIZE (field) != 0)
6378 if (integer_zerop (DECL_SIZE (field)))
6379 continue;
6381 if (host_integerp (bit_position (field), 1))
6382 bitpos += int_bit_position (field);
6385 /* ??? FIXME: else assume zero offset. */
6387 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6388 function_arg_record_value_1 (TREE_TYPE (field),
6389 bitpos,
6390 parms,
6391 packed_p);
6392 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6393 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6394 && TARGET_FPU
6395 && parms->named
6396 && ! packed_p)
6398 if (parms->intoffset != -1)
6400 unsigned int startbit, endbit;
6401 int intslots, this_slotno;
6403 startbit = parms->intoffset & -BITS_PER_WORD;
6404 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6406 intslots = (endbit - startbit) / BITS_PER_WORD;
6407 this_slotno = parms->slotno + parms->intoffset
6408 / BITS_PER_WORD;
6410 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6412 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6413 /* We need to pass this field on the stack. */
6414 parms->stack = 1;
6417 parms->nregs += intslots;
6418 parms->intoffset = -1;
6421 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
6422 If it wasn't true we wouldn't be here. */
6423 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6424 && DECL_MODE (field) == BLKmode)
6425 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6426 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6427 parms->nregs += 2;
6428 else
6429 parms->nregs += 1;
6431 else
6433 if (parms->intoffset == -1)
6434 parms->intoffset = bitpos;
6440 /* A subroutine of function_arg_record_value. Assign the bits of the
6441 structure between parms->intoffset and bitpos to integer registers. */
6443 static void
6444 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
6445 struct function_arg_record_value_parms *parms)
6447 enum machine_mode mode;
6448 unsigned int regno;
6449 unsigned int startbit, endbit;
6450 int this_slotno, intslots, intoffset;
6451 rtx reg;
6453 if (parms->intoffset == -1)
6454 return;
6456 intoffset = parms->intoffset;
6457 parms->intoffset = -1;
6459 startbit = intoffset & -BITS_PER_WORD;
6460 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6461 intslots = (endbit - startbit) / BITS_PER_WORD;
6462 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
6464 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
6465 if (intslots <= 0)
6466 return;
6468 /* If this is the trailing part of a word, only load that much into
6469 the register. Otherwise load the whole register. Note that in
6470 the latter case we may pick up unwanted bits. It's not a problem
6471 at the moment but may wish to revisit. */
6473 if (intoffset % BITS_PER_WORD != 0)
6474 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6475 MODE_INT);
6476 else
6477 mode = word_mode;
6479 intoffset /= BITS_PER_UNIT;
6482 regno = parms->regbase + this_slotno;
6483 reg = gen_rtx_REG (mode, regno);
6484 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6485 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6487 this_slotno += 1;
6488 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
6489 mode = word_mode;
6490 parms->nregs += 1;
6491 intslots -= 1;
6493 while (intslots > 0);
6496 /* A subroutine of function_arg_record_value. Traverse the structure
6497 recursively and assign bits to floating point registers. Track which
6498 bits in between need integer registers; invoke function_arg_record_value_3
6499 to make that happen. */
6501 static void
6502 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
6503 struct function_arg_record_value_parms *parms,
6504 bool packed_p)
6506 tree field;
6508 if (! packed_p)
6509 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6511 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6513 packed_p = true;
6514 break;
6518 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6520 if (TREE_CODE (field) == FIELD_DECL)
6522 HOST_WIDE_INT bitpos = startbitpos;
6524 if (DECL_SIZE (field) != 0)
6526 if (integer_zerop (DECL_SIZE (field)))
6527 continue;
6529 if (host_integerp (bit_position (field), 1))
6530 bitpos += int_bit_position (field);
6533 /* ??? FIXME: else assume zero offset. */
6535 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6536 function_arg_record_value_2 (TREE_TYPE (field),
6537 bitpos,
6538 parms,
6539 packed_p);
6540 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6541 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6542 && TARGET_FPU
6543 && parms->named
6544 && ! packed_p)
6546 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
6547 int regno, nregs, pos;
6548 enum machine_mode mode = DECL_MODE (field);
6549 rtx reg;
6551 function_arg_record_value_3 (bitpos, parms);
6553 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6554 && mode == BLKmode)
6556 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6557 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6559 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6561 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6562 nregs = 2;
6564 else
6565 nregs = 1;
6567 regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6568 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6569 regno++;
6570 reg = gen_rtx_REG (mode, regno);
6571 pos = bitpos / BITS_PER_UNIT;
6572 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6573 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6574 parms->nregs += 1;
6575 while (--nregs > 0)
6577 regno += GET_MODE_SIZE (mode) / 4;
6578 reg = gen_rtx_REG (mode, regno);
6579 pos += GET_MODE_SIZE (mode);
6580 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6581 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6582 parms->nregs += 1;
6585 else
6587 if (parms->intoffset == -1)
6588 parms->intoffset = bitpos;
6594 /* Used by function_arg and sparc_function_value_1 to implement the complex
6595 conventions of the 64-bit ABI for passing and returning structures.
6596 Return an expression valid as a return value for the FUNCTION_ARG
6597 and TARGET_FUNCTION_VALUE.
6599 TYPE is the data type of the argument (as a tree).
6600 This is null for libcalls where that information may
6601 not be available.
6602 MODE is the argument's machine mode.
6603 SLOTNO is the index number of the argument's slot in the parameter array.
6604 NAMED is nonzero if this argument is a named parameter
6605 (otherwise it is an extra parameter matching an ellipsis).
6606 REGBASE is the regno of the base register for the parameter array. */
6608 static rtx
6609 function_arg_record_value (const_tree type, enum machine_mode mode,
6610 int slotno, int named, int regbase)
6612 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6613 struct function_arg_record_value_parms parms;
6614 unsigned int nregs;
6616 parms.ret = NULL_RTX;
6617 parms.slotno = slotno;
6618 parms.named = named;
6619 parms.regbase = regbase;
6620 parms.stack = 0;
6622 /* Compute how many registers we need. */
6623 parms.nregs = 0;
6624 parms.intoffset = 0;
6625 function_arg_record_value_1 (type, 0, &parms, false);
6627 /* Take into account pending integer fields. */
6628 if (parms.intoffset != -1)
6630 unsigned int startbit, endbit;
6631 int intslots, this_slotno;
6633 startbit = parms.intoffset & -BITS_PER_WORD;
6634 endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6635 intslots = (endbit - startbit) / BITS_PER_WORD;
6636 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
6638 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6640 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6641 /* We need to pass this field on the stack. */
6642 parms.stack = 1;
6645 parms.nregs += intslots;
6647 nregs = parms.nregs;
6649 /* Allocate the vector and handle some annoying special cases. */
6650 if (nregs == 0)
6652 /* ??? Empty structure has no value? Duh? */
6653 if (typesize <= 0)
6655 /* Though there's nothing really to store, return a word register
6656 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6657 leads to breakage due to the fact that there are zero bytes to
6658 load. */
6659 return gen_rtx_REG (mode, regbase);
6661 else
6663 /* ??? C++ has structures with no fields, and yet a size. Give up
6664 for now and pass everything back in integer registers. */
6665 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6667 if (nregs + slotno > SPARC_INT_ARG_MAX)
6668 nregs = SPARC_INT_ARG_MAX - slotno;
6670 gcc_assert (nregs != 0);
6672 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
6674 /* If at least one field must be passed on the stack, generate
6675 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6676 also be passed on the stack. We can't do much better because the
6677 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6678 of structures for which the fields passed exclusively in registers
6679 are not at the beginning of the structure. */
6680 if (parms.stack)
6681 XVECEXP (parms.ret, 0, 0)
6682 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6684 /* Fill in the entries. */
6685 parms.nregs = 0;
6686 parms.intoffset = 0;
6687 function_arg_record_value_2 (type, 0, &parms, false);
6688 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
6690 gcc_assert (parms.nregs == nregs);
6692 return parms.ret;
6695 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6696 of the 64-bit ABI for passing and returning unions.
6697 Return an expression valid as a return value for the FUNCTION_ARG
6698 and TARGET_FUNCTION_VALUE.
6700 SIZE is the size in bytes of the union.
6701 MODE is the argument's machine mode.
6702 REGNO is the hard register the union will be passed in. */
6704 static rtx
6705 function_arg_union_value (int size, enum machine_mode mode, int slotno,
6706 int regno)
6708 int nwords = ROUND_ADVANCE (size), i;
6709 rtx regs;
6711 /* See comment in previous function for empty structures. */
6712 if (nwords == 0)
6713 return gen_rtx_REG (mode, regno);
6715 if (slotno == SPARC_INT_ARG_MAX - 1)
6716 nwords = 1;
6718 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6720 for (i = 0; i < nwords; i++)
6722 /* Unions are passed left-justified. */
6723 XVECEXP (regs, 0, i)
6724 = gen_rtx_EXPR_LIST (VOIDmode,
6725 gen_rtx_REG (word_mode, regno),
6726 GEN_INT (UNITS_PER_WORD * i));
6727 regno++;
6730 return regs;
6733 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6734 for passing and returning large (BLKmode) vectors.
6735 Return an expression valid as a return value for the FUNCTION_ARG
6736 and TARGET_FUNCTION_VALUE.
6738 SIZE is the size in bytes of the vector (at least 8 bytes).
6739 REGNO is the FP hard register the vector will be passed in. */
6741 static rtx
6742 function_arg_vector_value (int size, int regno)
6744 int i, nregs = size / 8;
6745 rtx regs;
6747 regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6749 for (i = 0; i < nregs; i++)
6751 XVECEXP (regs, 0, i)
6752 = gen_rtx_EXPR_LIST (VOIDmode,
6753 gen_rtx_REG (DImode, regno + 2*i),
6754 GEN_INT (i*8));
6757 return regs;
6760 /* Determine where to put an argument to a function.
6761 Value is zero to push the argument on the stack,
6762 or a hard register in which to store the argument.
6764 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6765 the preceding args and about the function being called.
6766 MODE is the argument's machine mode.
6767 TYPE is the data type of the argument (as a tree).
6768 This is null for libcalls where that information may
6769 not be available.
6770 NAMED is true if this argument is a named parameter
6771 (otherwise it is an extra parameter matching an ellipsis).
6772 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6773 TARGET_FUNCTION_INCOMING_ARG. */
6775 static rtx
6776 sparc_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
6777 const_tree type, bool named, bool incoming_p)
6779 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6781 int regbase = (incoming_p
6782 ? SPARC_INCOMING_INT_ARG_FIRST
6783 : SPARC_OUTGOING_INT_ARG_FIRST);
6784 int slotno, regno, padding;
6785 enum mode_class mclass = GET_MODE_CLASS (mode);
6787 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
6788 &regno, &padding);
6789 if (slotno == -1)
6790 return 0;
6792 /* Vector types deserve special treatment because they are polymorphic wrt
6793 their mode, depending upon whether VIS instructions are enabled. */
6794 if (type && TREE_CODE (type) == VECTOR_TYPE)
6796 HOST_WIDE_INT size = int_size_in_bytes (type);
6797 gcc_assert ((TARGET_ARCH32 && size <= 8)
6798 || (TARGET_ARCH64 && size <= 16));
6800 if (mode == BLKmode)
6801 return function_arg_vector_value (size,
6802 SPARC_FP_ARG_FIRST + 2*slotno);
6803 else
6804 mclass = MODE_FLOAT;
6807 if (TARGET_ARCH32)
6808 return gen_rtx_REG (mode, regno);
6810 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6811 and are promoted to registers if possible. */
6812 if (type && TREE_CODE (type) == RECORD_TYPE)
6814 HOST_WIDE_INT size = int_size_in_bytes (type);
6815 gcc_assert (size <= 16);
6817 return function_arg_record_value (type, mode, slotno, named, regbase);
6820 /* Unions up to 16 bytes in size are passed in integer registers. */
6821 else if (type && TREE_CODE (type) == UNION_TYPE)
6823 HOST_WIDE_INT size = int_size_in_bytes (type);
6824 gcc_assert (size <= 16);
6826 return function_arg_union_value (size, mode, slotno, regno);
6829 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6830 but also have the slot allocated for them.
6831 If no prototype is in scope fp values in register slots get passed
6832 in two places, either fp regs and int regs or fp regs and memory. */
6833 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6834 && SPARC_FP_REG_P (regno))
6836 rtx reg = gen_rtx_REG (mode, regno);
6837 if (cum->prototype_p || cum->libcall_p)
6839 /* "* 2" because fp reg numbers are recorded in 4 byte
6840 quantities. */
6841 #if 0
6842 /* ??? This will cause the value to be passed in the fp reg and
6843 in the stack. When a prototype exists we want to pass the
6844 value in the reg but reserve space on the stack. That's an
6845 optimization, and is deferred [for a bit]. */
6846 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
6847 return gen_rtx_PARALLEL (mode,
6848 gen_rtvec (2,
6849 gen_rtx_EXPR_LIST (VOIDmode,
6850 NULL_RTX, const0_rtx),
6851 gen_rtx_EXPR_LIST (VOIDmode,
6852 reg, const0_rtx)));
6853 else
6854 #else
6855 /* ??? It seems that passing back a register even when past
6856 the area declared by REG_PARM_STACK_SPACE will allocate
6857 space appropriately, and will not copy the data onto the
6858 stack, exactly as we desire.
6860 This is due to locate_and_pad_parm being called in
6861 expand_call whenever reg_parm_stack_space > 0, which
6862 while beneficial to our example here, would seem to be
6863 in error from what had been intended. Ho hum... -- r~ */
6864 #endif
6865 return reg;
6867 else
6869 rtx v0, v1;
6871 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6873 int intreg;
6875 /* On incoming, we don't need to know that the value
6876 is passed in %f0 and %i0, and it confuses other parts
6877 causing needless spillage even on the simplest cases. */
6878 if (incoming_p)
6879 return reg;
6881 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6882 + (regno - SPARC_FP_ARG_FIRST) / 2);
6884 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6885 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6886 const0_rtx);
6887 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6889 else
6891 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6892 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6893 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6898 /* All other aggregate types are passed in an integer register in a mode
6899 corresponding to the size of the type. */
6900 else if (type && AGGREGATE_TYPE_P (type))
6902 HOST_WIDE_INT size = int_size_in_bytes (type);
6903 gcc_assert (size <= 16);
6905 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6908 return gen_rtx_REG (mode, regno);
6911 /* Handle the TARGET_FUNCTION_ARG target hook. */
6913 static rtx
6914 sparc_function_arg (cumulative_args_t cum, enum machine_mode mode,
6915 const_tree type, bool named)
6917 return sparc_function_arg_1 (cum, mode, type, named, false);
6920 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6922 static rtx
6923 sparc_function_incoming_arg (cumulative_args_t cum, enum machine_mode mode,
6924 const_tree type, bool named)
6926 return sparc_function_arg_1 (cum, mode, type, named, true);
6929 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
6931 static unsigned int
6932 sparc_function_arg_boundary (enum machine_mode mode, const_tree type)
6934 return ((TARGET_ARCH64
6935 && (GET_MODE_ALIGNMENT (mode) == 128
6936 || (type && TYPE_ALIGN (type) == 128)))
6937 ? 128
6938 : PARM_BOUNDARY);
6941 /* For an arg passed partly in registers and partly in memory,
6942 this is the number of bytes of registers used.
6943 For args passed entirely in registers or entirely in memory, zero.
6945 Any arg that starts in the first 6 regs but won't entirely fit in them
6946 needs partial registers on v8. On v9, structures with integer
6947 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
6948 values that begin in the last fp reg [where "last fp reg" varies with the
6949 mode] will be split between that reg and memory. */
6951 static int
6952 sparc_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode,
6953 tree type, bool named)
6955 int slotno, regno, padding;
6957 /* We pass false for incoming_p here, it doesn't matter. */
6958 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
6959 false, &regno, &padding);
6961 if (slotno == -1)
6962 return 0;
6964 if (TARGET_ARCH32)
6966 if ((slotno + (mode == BLKmode
6967 ? ROUND_ADVANCE (int_size_in_bytes (type))
6968 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
6969 > SPARC_INT_ARG_MAX)
6970 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
6972 else
6974 /* We are guaranteed by pass_by_reference that the size of the
6975 argument is not greater than 16 bytes, so we only need to return
6976 one word if the argument is partially passed in registers. */
6978 if (type && AGGREGATE_TYPE_P (type))
6980 int size = int_size_in_bytes (type);
6982 if (size > UNITS_PER_WORD
6983 && slotno == SPARC_INT_ARG_MAX - 1)
6984 return UNITS_PER_WORD;
6986 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
6987 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6988 && ! (TARGET_FPU && named)))
6990 /* The complex types are passed as packed types. */
6991 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6992 && slotno == SPARC_INT_ARG_MAX - 1)
6993 return UNITS_PER_WORD;
6995 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6997 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
6998 > SPARC_FP_ARG_MAX)
6999 return UNITS_PER_WORD;
7003 return 0;
7006 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7007 Specify whether to pass the argument by reference. */
7009 static bool
7010 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7011 enum machine_mode mode, const_tree type,
7012 bool named ATTRIBUTE_UNUSED)
7014 if (TARGET_ARCH32)
7015 /* Original SPARC 32-bit ABI says that structures and unions,
7016 and quad-precision floats are passed by reference. For Pascal,
7017 also pass arrays by reference. All other base types are passed
7018 in registers.
7020 Extended ABI (as implemented by the Sun compiler) says that all
7021 complex floats are passed by reference. Pass complex integers
7022 in registers up to 8 bytes. More generally, enforce the 2-word
7023 cap for passing arguments in registers.
7025 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7026 integers are passed like floats of the same size, that is in
7027 registers up to 8 bytes. Pass all vector floats by reference
7028 like structure and unions. */
7029 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7030 || mode == SCmode
7031 /* Catch CDImode, TFmode, DCmode and TCmode. */
7032 || GET_MODE_SIZE (mode) > 8
7033 || (type
7034 && TREE_CODE (type) == VECTOR_TYPE
7035 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7036 else
7037 /* Original SPARC 64-bit ABI says that structures and unions
7038 smaller than 16 bytes are passed in registers, as well as
7039 all other base types.
7041 Extended ABI (as implemented by the Sun compiler) says that
7042 complex floats are passed in registers up to 16 bytes. Pass
7043 all complex integers in registers up to 16 bytes. More generally,
7044 enforce the 2-word cap for passing arguments in registers.
7046 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7047 integers are passed like floats of the same size, that is in
7048 registers (up to 16 bytes). Pass all vector floats like structure
7049 and unions. */
7050 return ((type
7051 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7052 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7053 /* Catch CTImode and TCmode. */
7054 || GET_MODE_SIZE (mode) > 16);
7057 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7058 Update the data in CUM to advance over an argument
7059 of mode MODE and data type TYPE.
7060 TYPE is null for libcalls where that information may not be available. */
7062 static void
7063 sparc_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7064 const_tree type, bool named)
7066 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7067 int regno, padding;
7069 /* We pass false for incoming_p here, it doesn't matter. */
7070 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7072 /* If argument requires leading padding, add it. */
7073 cum->words += padding;
7075 if (TARGET_ARCH32)
7077 cum->words += (mode != BLKmode
7078 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7079 : ROUND_ADVANCE (int_size_in_bytes (type)));
7081 else
7083 if (type && AGGREGATE_TYPE_P (type))
7085 int size = int_size_in_bytes (type);
7087 if (size <= 8)
7088 ++cum->words;
7089 else if (size <= 16)
7090 cum->words += 2;
7091 else /* passed by reference */
7092 ++cum->words;
7094 else
7096 cum->words += (mode != BLKmode
7097 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7098 : ROUND_ADVANCE (int_size_in_bytes (type)));
7103 /* Handle the FUNCTION_ARG_PADDING macro.
7104 For the 64 bit ABI structs are always stored left shifted in their
7105 argument slot. */
7107 enum direction
7108 function_arg_padding (enum machine_mode mode, const_tree type)
7110 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
7111 return upward;
7113 /* Fall back to the default. */
7114 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7117 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7118 Specify whether to return the return value in memory. */
7120 static bool
7121 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7123 if (TARGET_ARCH32)
7124 /* Original SPARC 32-bit ABI says that structures and unions,
7125 and quad-precision floats are returned in memory. All other
7126 base types are returned in registers.
7128 Extended ABI (as implemented by the Sun compiler) says that
7129 all complex floats are returned in registers (8 FP registers
7130 at most for '_Complex long double'). Return all complex integers
7131 in registers (4 at most for '_Complex long long').
7133 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7134 integers are returned like floats of the same size, that is in
7135 registers up to 8 bytes and in memory otherwise. Return all
7136 vector floats in memory like structure and unions; note that
7137 they always have BLKmode like the latter. */
7138 return (TYPE_MODE (type) == BLKmode
7139 || TYPE_MODE (type) == TFmode
7140 || (TREE_CODE (type) == VECTOR_TYPE
7141 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7142 else
7143 /* Original SPARC 64-bit ABI says that structures and unions
7144 smaller than 32 bytes are returned in registers, as well as
7145 all other base types.
7147 Extended ABI (as implemented by the Sun compiler) says that all
7148 complex floats are returned in registers (8 FP registers at most
7149 for '_Complex long double'). Return all complex integers in
7150 registers (4 at most for '_Complex TItype').
7152 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7153 integers are returned like floats of the same size, that is in
7154 registers. Return all vector floats like structure and unions;
7155 note that they always have BLKmode like the latter. */
7156 return (TYPE_MODE (type) == BLKmode
7157 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7160 /* Handle the TARGET_STRUCT_VALUE target hook.
7161 Return where to find the structure return value address. */
7163 static rtx
7164 sparc_struct_value_rtx (tree fndecl, int incoming)
7166 if (TARGET_ARCH64)
7167 return 0;
7168 else
7170 rtx mem;
7172 if (incoming)
7173 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7174 STRUCT_VALUE_OFFSET));
7175 else
7176 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7177 STRUCT_VALUE_OFFSET));
7179 /* Only follow the SPARC ABI for fixed-size structure returns.
7180 Variable size structure returns are handled per the normal
7181 procedures in GCC. This is enabled by -mstd-struct-return */
7182 if (incoming == 2
7183 && sparc_std_struct_return
7184 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7185 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7187 /* We must check and adjust the return address, as it is
7188 optional as to whether the return object is really
7189 provided. */
7190 rtx ret_reg = gen_rtx_REG (Pmode, 31);
7191 rtx scratch = gen_reg_rtx (SImode);
7192 rtx endlab = gen_label_rtx ();
7194 /* Calculate the return object size */
7195 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7196 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7197 /* Construct a temporary return value */
7198 rtx temp_val
7199 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7201 /* Implement SPARC 32-bit psABI callee return struct checking:
7203 Fetch the instruction where we will return to and see if
7204 it's an unimp instruction (the most significant 10 bits
7205 will be zero). */
7206 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7207 plus_constant (Pmode,
7208 ret_reg, 8)));
7209 /* Assume the size is valid and pre-adjust */
7210 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7211 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7212 0, endlab);
7213 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7214 /* Write the address of the memory pointed to by temp_val into
7215 the memory pointed to by mem */
7216 emit_move_insn (mem, XEXP (temp_val, 0));
7217 emit_label (endlab);
7220 return mem;
7224 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7225 For v9, function return values are subject to the same rules as arguments,
7226 except that up to 32 bytes may be returned in registers. */
7228 static rtx
7229 sparc_function_value_1 (const_tree type, enum machine_mode mode,
7230 bool outgoing)
7232 /* Beware that the two values are swapped here wrt function_arg. */
7233 int regbase = (outgoing
7234 ? SPARC_INCOMING_INT_ARG_FIRST
7235 : SPARC_OUTGOING_INT_ARG_FIRST);
7236 enum mode_class mclass = GET_MODE_CLASS (mode);
7237 int regno;
7239 /* Vector types deserve special treatment because they are polymorphic wrt
7240 their mode, depending upon whether VIS instructions are enabled. */
7241 if (type && TREE_CODE (type) == VECTOR_TYPE)
7243 HOST_WIDE_INT size = int_size_in_bytes (type);
7244 gcc_assert ((TARGET_ARCH32 && size <= 8)
7245 || (TARGET_ARCH64 && size <= 32));
7247 if (mode == BLKmode)
7248 return function_arg_vector_value (size,
7249 SPARC_FP_ARG_FIRST);
7250 else
7251 mclass = MODE_FLOAT;
7254 if (TARGET_ARCH64 && type)
7256 /* Structures up to 32 bytes in size are returned in registers. */
7257 if (TREE_CODE (type) == RECORD_TYPE)
7259 HOST_WIDE_INT size = int_size_in_bytes (type);
7260 gcc_assert (size <= 32);
7262 return function_arg_record_value (type, mode, 0, 1, regbase);
7265 /* Unions up to 32 bytes in size are returned in integer registers. */
7266 else if (TREE_CODE (type) == UNION_TYPE)
7268 HOST_WIDE_INT size = int_size_in_bytes (type);
7269 gcc_assert (size <= 32);
7271 return function_arg_union_value (size, mode, 0, regbase);
7274 /* Objects that require it are returned in FP registers. */
7275 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7278 /* All other aggregate types are returned in an integer register in a
7279 mode corresponding to the size of the type. */
7280 else if (AGGREGATE_TYPE_P (type))
7282 /* All other aggregate types are passed in an integer register
7283 in a mode corresponding to the size of the type. */
7284 HOST_WIDE_INT size = int_size_in_bytes (type);
7285 gcc_assert (size <= 32);
7287 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7289 /* ??? We probably should have made the same ABI change in
7290 3.4.0 as the one we made for unions. The latter was
7291 required by the SCD though, while the former is not
7292 specified, so we favored compatibility and efficiency.
7294 Now we're stuck for aggregates larger than 16 bytes,
7295 because OImode vanished in the meantime. Let's not
7296 try to be unduly clever, and simply follow the ABI
7297 for unions in that case. */
7298 if (mode == BLKmode)
7299 return function_arg_union_value (size, mode, 0, regbase);
7300 else
7301 mclass = MODE_INT;
7304 /* We should only have pointer and integer types at this point. This
7305 must match sparc_promote_function_mode. */
7306 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7307 mode = word_mode;
7310 /* We should only have pointer and integer types at this point. This must
7311 match sparc_promote_function_mode. */
7312 else if (TARGET_ARCH32
7313 && mclass == MODE_INT
7314 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7315 mode = word_mode;
7317 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7318 regno = SPARC_FP_ARG_FIRST;
7319 else
7320 regno = regbase;
7322 return gen_rtx_REG (mode, regno);
7325 /* Handle TARGET_FUNCTION_VALUE.
7326 On the SPARC, the value is found in the first "output" register, but the
7327 called function leaves it in the first "input" register. */
7329 static rtx
7330 sparc_function_value (const_tree valtype,
7331 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7332 bool outgoing)
7334 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7337 /* Handle TARGET_LIBCALL_VALUE. */
7339 static rtx
7340 sparc_libcall_value (enum machine_mode mode,
7341 const_rtx fun ATTRIBUTE_UNUSED)
7343 return sparc_function_value_1 (NULL_TREE, mode, false);
7346 /* Handle FUNCTION_VALUE_REGNO_P.
7347 On the SPARC, the first "output" reg is used for integer values, and the
7348 first floating point register is used for floating point values. */
7350 static bool
7351 sparc_function_value_regno_p (const unsigned int regno)
7353 return (regno == 8 || regno == 32);
7356 /* Do what is necessary for `va_start'. We look at the current function
7357 to determine if stdarg or varargs is used and return the address of
7358 the first unnamed parameter. */
7360 static rtx
7361 sparc_builtin_saveregs (void)
7363 int first_reg = crtl->args.info.words;
7364 rtx address;
7365 int regno;
7367 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7368 emit_move_insn (gen_rtx_MEM (word_mode,
7369 gen_rtx_PLUS (Pmode,
7370 frame_pointer_rtx,
7371 GEN_INT (FIRST_PARM_OFFSET (0)
7372 + (UNITS_PER_WORD
7373 * regno)))),
7374 gen_rtx_REG (word_mode,
7375 SPARC_INCOMING_INT_ARG_FIRST + regno));
7377 address = gen_rtx_PLUS (Pmode,
7378 frame_pointer_rtx,
7379 GEN_INT (FIRST_PARM_OFFSET (0)
7380 + UNITS_PER_WORD * first_reg));
7382 return address;
7385 /* Implement `va_start' for stdarg. */
7387 static void
7388 sparc_va_start (tree valist, rtx nextarg)
7390 nextarg = expand_builtin_saveregs ();
7391 std_expand_builtin_va_start (valist, nextarg);
7394 /* Implement `va_arg' for stdarg. */
7396 static tree
7397 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7398 gimple_seq *post_p)
7400 HOST_WIDE_INT size, rsize, align;
7401 tree addr, incr;
7402 bool indirect;
7403 tree ptrtype = build_pointer_type (type);
7405 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7407 indirect = true;
7408 size = rsize = UNITS_PER_WORD;
7409 align = 0;
7411 else
7413 indirect = false;
7414 size = int_size_in_bytes (type);
7415 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7416 align = 0;
7418 if (TARGET_ARCH64)
7420 /* For SPARC64, objects requiring 16-byte alignment get it. */
7421 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7422 align = 2 * UNITS_PER_WORD;
7424 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7425 are left-justified in their slots. */
7426 if (AGGREGATE_TYPE_P (type))
7428 if (size == 0)
7429 size = rsize = UNITS_PER_WORD;
7430 else
7431 size = rsize;
7436 incr = valist;
7437 if (align)
7439 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7440 incr = fold_convert (sizetype, incr);
7441 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7442 size_int (-align));
7443 incr = fold_convert (ptr_type_node, incr);
7446 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7447 addr = incr;
7449 if (BYTES_BIG_ENDIAN && size < rsize)
7450 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7452 if (indirect)
7454 addr = fold_convert (build_pointer_type (ptrtype), addr);
7455 addr = build_va_arg_indirect_ref (addr);
7458 /* If the address isn't aligned properly for the type, we need a temporary.
7459 FIXME: This is inefficient, usually we can do this in registers. */
7460 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7462 tree tmp = create_tmp_var (type, "va_arg_tmp");
7463 tree dest_addr = build_fold_addr_expr (tmp);
7464 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7465 3, dest_addr, addr, size_int (rsize));
7466 TREE_ADDRESSABLE (tmp) = 1;
7467 gimplify_and_add (copy, pre_p);
7468 addr = dest_addr;
7471 else
7472 addr = fold_convert (ptrtype, addr);
7474 incr = fold_build_pointer_plus_hwi (incr, rsize);
7475 gimplify_assign (valist, incr, post_p);
7477 return build_va_arg_indirect_ref (addr);
7480 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7481 Specify whether the vector mode is supported by the hardware. */
7483 static bool
7484 sparc_vector_mode_supported_p (enum machine_mode mode)
7486 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7489 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7491 static enum machine_mode
7492 sparc_preferred_simd_mode (enum machine_mode mode)
7494 if (TARGET_VIS)
7495 switch (mode)
7497 case SImode:
7498 return V2SImode;
7499 case HImode:
7500 return V4HImode;
7501 case QImode:
7502 return V8QImode;
7504 default:;
7507 return word_mode;
7510 /* Return the string to output an unconditional branch to LABEL, which is
7511 the operand number of the label.
7513 DEST is the destination insn (i.e. the label), INSN is the source. */
7515 const char *
7516 output_ubranch (rtx dest, rtx insn)
7518 static char string[64];
7519 bool v9_form = false;
7520 int delta;
7521 char *p;
7523 /* Even if we are trying to use cbcond for this, evaluate
7524 whether we can use V9 branches as our backup plan. */
7526 delta = 5000000;
7527 if (INSN_ADDRESSES_SET_P ())
7528 delta = (INSN_ADDRESSES (INSN_UID (dest))
7529 - INSN_ADDRESSES (INSN_UID (insn)));
7531 /* Leave some instructions for "slop". */
7532 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7533 v9_form = true;
7535 if (TARGET_CBCOND)
7537 bool emit_nop = emit_cbcond_nop (insn);
7538 bool far = false;
7539 const char *rval;
7541 if (delta < -500 || delta > 500)
7542 far = true;
7544 if (far)
7546 if (v9_form)
7547 rval = "ba,a,pt\t%%xcc, %l0";
7548 else
7549 rval = "b,a\t%l0";
7551 else
7553 if (emit_nop)
7554 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7555 else
7556 rval = "cwbe\t%%g0, %%g0, %l0";
7558 return rval;
7561 if (v9_form)
7562 strcpy (string, "ba%*,pt\t%%xcc, ");
7563 else
7564 strcpy (string, "b%*\t");
7566 p = strchr (string, '\0');
7567 *p++ = '%';
7568 *p++ = 'l';
7569 *p++ = '0';
7570 *p++ = '%';
7571 *p++ = '(';
7572 *p = '\0';
7574 return string;
7577 /* Return the string to output a conditional branch to LABEL, which is
7578 the operand number of the label. OP is the conditional expression.
7579 XEXP (OP, 0) is assumed to be a condition code register (integer or
7580 floating point) and its mode specifies what kind of comparison we made.
7582 DEST is the destination insn (i.e. the label), INSN is the source.
7584 REVERSED is nonzero if we should reverse the sense of the comparison.
7586 ANNUL is nonzero if we should generate an annulling branch. */
7588 const char *
7589 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7590 rtx insn)
7592 static char string[64];
7593 enum rtx_code code = GET_CODE (op);
7594 rtx cc_reg = XEXP (op, 0);
7595 enum machine_mode mode = GET_MODE (cc_reg);
7596 const char *labelno, *branch;
7597 int spaces = 8, far;
7598 char *p;
7600 /* v9 branches are limited to +-1MB. If it is too far away,
7601 change
7603 bne,pt %xcc, .LC30
7607 be,pn %xcc, .+12
7609 ba .LC30
7613 fbne,a,pn %fcc2, .LC29
7617 fbe,pt %fcc2, .+16
7619 ba .LC29 */
7621 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7622 if (reversed ^ far)
7624 /* Reversal of FP compares takes care -- an ordered compare
7625 becomes an unordered compare and vice versa. */
7626 if (mode == CCFPmode || mode == CCFPEmode)
7627 code = reverse_condition_maybe_unordered (code);
7628 else
7629 code = reverse_condition (code);
7632 /* Start by writing the branch condition. */
7633 if (mode == CCFPmode || mode == CCFPEmode)
7635 switch (code)
7637 case NE:
7638 branch = "fbne";
7639 break;
7640 case EQ:
7641 branch = "fbe";
7642 break;
7643 case GE:
7644 branch = "fbge";
7645 break;
7646 case GT:
7647 branch = "fbg";
7648 break;
7649 case LE:
7650 branch = "fble";
7651 break;
7652 case LT:
7653 branch = "fbl";
7654 break;
7655 case UNORDERED:
7656 branch = "fbu";
7657 break;
7658 case ORDERED:
7659 branch = "fbo";
7660 break;
7661 case UNGT:
7662 branch = "fbug";
7663 break;
7664 case UNLT:
7665 branch = "fbul";
7666 break;
7667 case UNEQ:
7668 branch = "fbue";
7669 break;
7670 case UNGE:
7671 branch = "fbuge";
7672 break;
7673 case UNLE:
7674 branch = "fbule";
7675 break;
7676 case LTGT:
7677 branch = "fblg";
7678 break;
7680 default:
7681 gcc_unreachable ();
7684 /* ??? !v9: FP branches cannot be preceded by another floating point
7685 insn. Because there is currently no concept of pre-delay slots,
7686 we can fix this only by always emitting a nop before a floating
7687 point branch. */
7689 string[0] = '\0';
7690 if (! TARGET_V9)
7691 strcpy (string, "nop\n\t");
7692 strcat (string, branch);
7694 else
7696 switch (code)
7698 case NE:
7699 branch = "bne";
7700 break;
7701 case EQ:
7702 branch = "be";
7703 break;
7704 case GE:
7705 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7706 branch = "bpos";
7707 else
7708 branch = "bge";
7709 break;
7710 case GT:
7711 branch = "bg";
7712 break;
7713 case LE:
7714 branch = "ble";
7715 break;
7716 case LT:
7717 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7718 branch = "bneg";
7719 else
7720 branch = "bl";
7721 break;
7722 case GEU:
7723 branch = "bgeu";
7724 break;
7725 case GTU:
7726 branch = "bgu";
7727 break;
7728 case LEU:
7729 branch = "bleu";
7730 break;
7731 case LTU:
7732 branch = "blu";
7733 break;
7735 default:
7736 gcc_unreachable ();
7738 strcpy (string, branch);
7740 spaces -= strlen (branch);
7741 p = strchr (string, '\0');
7743 /* Now add the annulling, the label, and a possible noop. */
7744 if (annul && ! far)
7746 strcpy (p, ",a");
7747 p += 2;
7748 spaces -= 2;
7751 if (TARGET_V9)
7753 rtx note;
7754 int v8 = 0;
7756 if (! far && insn && INSN_ADDRESSES_SET_P ())
7758 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7759 - INSN_ADDRESSES (INSN_UID (insn)));
7760 /* Leave some instructions for "slop". */
7761 if (delta < -260000 || delta >= 260000)
7762 v8 = 1;
7765 if (mode == CCFPmode || mode == CCFPEmode)
7767 static char v9_fcc_labelno[] = "%%fccX, ";
7768 /* Set the char indicating the number of the fcc reg to use. */
7769 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7770 labelno = v9_fcc_labelno;
7771 if (v8)
7773 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7774 labelno = "";
7777 else if (mode == CCXmode || mode == CCX_NOOVmode)
7779 labelno = "%%xcc, ";
7780 gcc_assert (! v8);
7782 else
7784 labelno = "%%icc, ";
7785 if (v8)
7786 labelno = "";
7789 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7791 strcpy (p,
7792 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
7793 ? ",pt" : ",pn");
7794 p += 3;
7795 spaces -= 3;
7798 else
7799 labelno = "";
7801 if (spaces > 0)
7802 *p++ = '\t';
7803 else
7804 *p++ = ' ';
7805 strcpy (p, labelno);
7806 p = strchr (p, '\0');
7807 if (far)
7809 strcpy (p, ".+12\n\t nop\n\tb\t");
7810 /* Skip the next insn if requested or
7811 if we know that it will be a nop. */
7812 if (annul || ! final_sequence)
7813 p[3] = '6';
7814 p += 14;
7816 *p++ = '%';
7817 *p++ = 'l';
7818 *p++ = label + '0';
7819 *p++ = '%';
7820 *p++ = '#';
7821 *p = '\0';
7823 return string;
7826 /* Emit a library call comparison between floating point X and Y.
7827 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7828 Return the new operator to be used in the comparison sequence.
7830 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7831 values as arguments instead of the TFmode registers themselves,
7832 that's why we cannot call emit_float_lib_cmp. */
7835 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7837 const char *qpfunc;
7838 rtx slot0, slot1, result, tem, tem2, libfunc;
7839 enum machine_mode mode;
7840 enum rtx_code new_comparison;
7842 switch (comparison)
7844 case EQ:
7845 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7846 break;
7848 case NE:
7849 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7850 break;
7852 case GT:
7853 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7854 break;
7856 case GE:
7857 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7858 break;
7860 case LT:
7861 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7862 break;
7864 case LE:
7865 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7866 break;
7868 case ORDERED:
7869 case UNORDERED:
7870 case UNGT:
7871 case UNLT:
7872 case UNEQ:
7873 case UNGE:
7874 case UNLE:
7875 case LTGT:
7876 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7877 break;
7879 default:
7880 gcc_unreachable ();
7883 if (TARGET_ARCH64)
7885 if (MEM_P (x))
7887 tree expr = MEM_EXPR (x);
7888 if (expr)
7889 mark_addressable (expr);
7890 slot0 = x;
7892 else
7894 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7895 emit_move_insn (slot0, x);
7898 if (MEM_P (y))
7900 tree expr = MEM_EXPR (y);
7901 if (expr)
7902 mark_addressable (expr);
7903 slot1 = y;
7905 else
7907 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7908 emit_move_insn (slot1, y);
7911 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7912 emit_library_call (libfunc, LCT_NORMAL,
7913 DImode, 2,
7914 XEXP (slot0, 0), Pmode,
7915 XEXP (slot1, 0), Pmode);
7916 mode = DImode;
7918 else
7920 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7921 emit_library_call (libfunc, LCT_NORMAL,
7922 SImode, 2,
7923 x, TFmode, y, TFmode);
7924 mode = SImode;
7928 /* Immediately move the result of the libcall into a pseudo
7929 register so reload doesn't clobber the value if it needs
7930 the return register for a spill reg. */
7931 result = gen_reg_rtx (mode);
7932 emit_move_insn (result, hard_libcall_value (mode, libfunc));
7934 switch (comparison)
7936 default:
7937 return gen_rtx_NE (VOIDmode, result, const0_rtx);
7938 case ORDERED:
7939 case UNORDERED:
7940 new_comparison = (comparison == UNORDERED ? EQ : NE);
7941 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
7942 case UNGT:
7943 case UNGE:
7944 new_comparison = (comparison == UNGT ? GT : NE);
7945 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
7946 case UNLE:
7947 return gen_rtx_NE (VOIDmode, result, const2_rtx);
7948 case UNLT:
7949 tem = gen_reg_rtx (mode);
7950 if (TARGET_ARCH32)
7951 emit_insn (gen_andsi3 (tem, result, const1_rtx));
7952 else
7953 emit_insn (gen_anddi3 (tem, result, const1_rtx));
7954 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
7955 case UNEQ:
7956 case LTGT:
7957 tem = gen_reg_rtx (mode);
7958 if (TARGET_ARCH32)
7959 emit_insn (gen_addsi3 (tem, result, const1_rtx));
7960 else
7961 emit_insn (gen_adddi3 (tem, result, const1_rtx));
7962 tem2 = gen_reg_rtx (mode);
7963 if (TARGET_ARCH32)
7964 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
7965 else
7966 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
7967 new_comparison = (comparison == UNEQ ? EQ : NE);
7968 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
7971 gcc_unreachable ();
7974 /* Generate an unsigned DImode to FP conversion. This is the same code
7975 optabs would emit if we didn't have TFmode patterns. */
7977 void
7978 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
7980 rtx neglab, donelab, i0, i1, f0, in, out;
7982 out = operands[0];
7983 in = force_reg (DImode, operands[1]);
7984 neglab = gen_label_rtx ();
7985 donelab = gen_label_rtx ();
7986 i0 = gen_reg_rtx (DImode);
7987 i1 = gen_reg_rtx (DImode);
7988 f0 = gen_reg_rtx (mode);
7990 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
7992 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
7993 emit_jump_insn (gen_jump (donelab));
7994 emit_barrier ();
7996 emit_label (neglab);
7998 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
7999 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8000 emit_insn (gen_iordi3 (i0, i0, i1));
8001 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
8002 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
8004 emit_label (donelab);
8007 /* Generate an FP to unsigned DImode conversion. This is the same code
8008 optabs would emit if we didn't have TFmode patterns. */
8010 void
8011 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
8013 rtx neglab, donelab, i0, i1, f0, in, out, limit;
8015 out = operands[0];
8016 in = force_reg (mode, operands[1]);
8017 neglab = gen_label_rtx ();
8018 donelab = gen_label_rtx ();
8019 i0 = gen_reg_rtx (DImode);
8020 i1 = gen_reg_rtx (DImode);
8021 limit = gen_reg_rtx (mode);
8022 f0 = gen_reg_rtx (mode);
8024 emit_move_insn (limit,
8025 CONST_DOUBLE_FROM_REAL_VALUE (
8026 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8027 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8029 emit_insn (gen_rtx_SET (VOIDmode,
8030 out,
8031 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8032 emit_jump_insn (gen_jump (donelab));
8033 emit_barrier ();
8035 emit_label (neglab);
8037 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
8038 emit_insn (gen_rtx_SET (VOIDmode,
8040 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8041 emit_insn (gen_movdi (i1, const1_rtx));
8042 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8043 emit_insn (gen_xordi3 (out, i0, i1));
8045 emit_label (donelab);
8048 /* Return the string to output a compare and branch instruction to DEST.
8049 DEST is the destination insn (i.e. the label), INSN is the source,
8050 and OP is the conditional expression. */
8052 const char *
8053 output_cbcond (rtx op, rtx dest, rtx insn)
8055 enum machine_mode mode = GET_MODE (XEXP (op, 0));
8056 enum rtx_code code = GET_CODE (op);
8057 const char *cond_str, *tmpl;
8058 int far, emit_nop, len;
8059 static char string[64];
8060 char size_char;
8062 /* Compare and Branch is limited to +-2KB. If it is too far away,
8063 change
8065 cxbne X, Y, .LC30
8069 cxbe X, Y, .+16
8071 ba,pt xcc, .LC30
8072 nop */
8074 len = get_attr_length (insn);
8076 far = len == 4;
8077 emit_nop = len == 2;
8079 if (far)
8080 code = reverse_condition (code);
8082 size_char = ((mode == SImode) ? 'w' : 'x');
8084 switch (code)
8086 case NE:
8087 cond_str = "ne";
8088 break;
8090 case EQ:
8091 cond_str = "e";
8092 break;
8094 case GE:
8095 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8096 cond_str = "pos";
8097 else
8098 cond_str = "ge";
8099 break;
8101 case GT:
8102 cond_str = "g";
8103 break;
8105 case LE:
8106 cond_str = "le";
8107 break;
8109 case LT:
8110 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8111 cond_str = "neg";
8112 else
8113 cond_str = "l";
8114 break;
8116 case GEU:
8117 cond_str = "cc";
8118 break;
8120 case GTU:
8121 cond_str = "gu";
8122 break;
8124 case LEU:
8125 cond_str = "leu";
8126 break;
8128 case LTU:
8129 cond_str = "cs";
8130 break;
8132 default:
8133 gcc_unreachable ();
8136 if (far)
8138 int veryfar = 1, delta;
8140 if (INSN_ADDRESSES_SET_P ())
8142 delta = (INSN_ADDRESSES (INSN_UID (dest))
8143 - INSN_ADDRESSES (INSN_UID (insn)));
8144 /* Leave some instructions for "slop". */
8145 if (delta >= -260000 && delta < 260000)
8146 veryfar = 0;
8149 if (veryfar)
8150 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8151 else
8152 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8154 else
8156 if (emit_nop)
8157 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8158 else
8159 tmpl = "c%cb%s\t%%1, %%2, %%3";
8162 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8164 return string;
8167 /* Return the string to output a conditional branch to LABEL, testing
8168 register REG. LABEL is the operand number of the label; REG is the
8169 operand number of the reg. OP is the conditional expression. The mode
8170 of REG says what kind of comparison we made.
8172 DEST is the destination insn (i.e. the label), INSN is the source.
8174 REVERSED is nonzero if we should reverse the sense of the comparison.
8176 ANNUL is nonzero if we should generate an annulling branch. */
8178 const char *
8179 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8180 int annul, rtx insn)
8182 static char string[64];
8183 enum rtx_code code = GET_CODE (op);
8184 enum machine_mode mode = GET_MODE (XEXP (op, 0));
8185 rtx note;
8186 int far;
8187 char *p;
8189 /* branch on register are limited to +-128KB. If it is too far away,
8190 change
8192 brnz,pt %g1, .LC30
8196 brz,pn %g1, .+12
8198 ba,pt %xcc, .LC30
8202 brgez,a,pn %o1, .LC29
8206 brlz,pt %o1, .+16
8208 ba,pt %xcc, .LC29 */
8210 far = get_attr_length (insn) >= 3;
8212 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8213 if (reversed ^ far)
8214 code = reverse_condition (code);
8216 /* Only 64 bit versions of these instructions exist. */
8217 gcc_assert (mode == DImode);
8219 /* Start by writing the branch condition. */
8221 switch (code)
8223 case NE:
8224 strcpy (string, "brnz");
8225 break;
8227 case EQ:
8228 strcpy (string, "brz");
8229 break;
8231 case GE:
8232 strcpy (string, "brgez");
8233 break;
8235 case LT:
8236 strcpy (string, "brlz");
8237 break;
8239 case LE:
8240 strcpy (string, "brlez");
8241 break;
8243 case GT:
8244 strcpy (string, "brgz");
8245 break;
8247 default:
8248 gcc_unreachable ();
8251 p = strchr (string, '\0');
8253 /* Now add the annulling, reg, label, and nop. */
8254 if (annul && ! far)
8256 strcpy (p, ",a");
8257 p += 2;
8260 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8262 strcpy (p,
8263 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
8264 ? ",pt" : ",pn");
8265 p += 3;
8268 *p = p < string + 8 ? '\t' : ' ';
8269 p++;
8270 *p++ = '%';
8271 *p++ = '0' + reg;
8272 *p++ = ',';
8273 *p++ = ' ';
8274 if (far)
8276 int veryfar = 1, delta;
8278 if (INSN_ADDRESSES_SET_P ())
8280 delta = (INSN_ADDRESSES (INSN_UID (dest))
8281 - INSN_ADDRESSES (INSN_UID (insn)));
8282 /* Leave some instructions for "slop". */
8283 if (delta >= -260000 && delta < 260000)
8284 veryfar = 0;
8287 strcpy (p, ".+12\n\t nop\n\t");
8288 /* Skip the next insn if requested or
8289 if we know that it will be a nop. */
8290 if (annul || ! final_sequence)
8291 p[3] = '6';
8292 p += 12;
8293 if (veryfar)
8295 strcpy (p, "b\t");
8296 p += 2;
8298 else
8300 strcpy (p, "ba,pt\t%%xcc, ");
8301 p += 13;
8304 *p++ = '%';
8305 *p++ = 'l';
8306 *p++ = '0' + label;
8307 *p++ = '%';
8308 *p++ = '#';
8309 *p = '\0';
8311 return string;
8314 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8315 Such instructions cannot be used in the delay slot of return insn on v9.
8316 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8319 static int
8320 epilogue_renumber (register rtx *where, int test)
8322 register const char *fmt;
8323 register int i;
8324 register enum rtx_code code;
8326 if (*where == 0)
8327 return 0;
8329 code = GET_CODE (*where);
8331 switch (code)
8333 case REG:
8334 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8335 return 1;
8336 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8337 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8338 case SCRATCH:
8339 case CC0:
8340 case PC:
8341 case CONST_INT:
8342 case CONST_DOUBLE:
8343 return 0;
8345 /* Do not replace the frame pointer with the stack pointer because
8346 it can cause the delayed instruction to load below the stack.
8347 This occurs when instructions like:
8349 (set (reg/i:SI 24 %i0)
8350 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8351 (const_int -20 [0xffffffec])) 0))
8353 are in the return delayed slot. */
8354 case PLUS:
8355 if (GET_CODE (XEXP (*where, 0)) == REG
8356 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8357 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8358 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8359 return 1;
8360 break;
8362 case MEM:
8363 if (SPARC_STACK_BIAS
8364 && GET_CODE (XEXP (*where, 0)) == REG
8365 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8366 return 1;
8367 break;
8369 default:
8370 break;
8373 fmt = GET_RTX_FORMAT (code);
8375 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8377 if (fmt[i] == 'E')
8379 register int j;
8380 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8381 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8382 return 1;
8384 else if (fmt[i] == 'e'
8385 && epilogue_renumber (&(XEXP (*where, i)), test))
8386 return 1;
8388 return 0;
8391 /* Leaf functions and non-leaf functions have different needs. */
8393 static const int
8394 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8396 static const int
8397 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8399 static const int *const reg_alloc_orders[] = {
8400 reg_leaf_alloc_order,
8401 reg_nonleaf_alloc_order};
8403 void
8404 order_regs_for_local_alloc (void)
8406 static int last_order_nonleaf = 1;
8408 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8410 last_order_nonleaf = !last_order_nonleaf;
8411 memcpy ((char *) reg_alloc_order,
8412 (const char *) reg_alloc_orders[last_order_nonleaf],
8413 FIRST_PSEUDO_REGISTER * sizeof (int));
8417 /* Return 1 if REG and MEM are legitimate enough to allow the various
8418 mem<-->reg splits to be run. */
8421 sparc_splitdi_legitimate (rtx reg, rtx mem)
8423 /* Punt if we are here by mistake. */
8424 gcc_assert (reload_completed);
8426 /* We must have an offsettable memory reference. */
8427 if (! offsettable_memref_p (mem))
8428 return 0;
8430 /* If we have legitimate args for ldd/std, we do not want
8431 the split to happen. */
8432 if ((REGNO (reg) % 2) == 0
8433 && mem_min_alignment (mem, 8))
8434 return 0;
8436 /* Success. */
8437 return 1;
8440 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8443 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8445 int regno1, regno2;
8447 if (GET_CODE (reg1) == SUBREG)
8448 reg1 = SUBREG_REG (reg1);
8449 if (GET_CODE (reg1) != REG)
8450 return 0;
8451 regno1 = REGNO (reg1);
8453 if (GET_CODE (reg2) == SUBREG)
8454 reg2 = SUBREG_REG (reg2);
8455 if (GET_CODE (reg2) != REG)
8456 return 0;
8457 regno2 = REGNO (reg2);
8459 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8460 return 1;
8462 if (TARGET_VIS3)
8464 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8465 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8466 return 1;
8469 return 0;
8472 /* Return 1 if x and y are some kind of REG and they refer to
8473 different hard registers. This test is guaranteed to be
8474 run after reload. */
8477 sparc_absnegfloat_split_legitimate (rtx x, rtx y)
8479 if (GET_CODE (x) != REG)
8480 return 0;
8481 if (GET_CODE (y) != REG)
8482 return 0;
8483 if (REGNO (x) == REGNO (y))
8484 return 0;
8485 return 1;
8488 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8489 This makes them candidates for using ldd and std insns.
8491 Note reg1 and reg2 *must* be hard registers. */
8494 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8496 /* We might have been passed a SUBREG. */
8497 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8498 return 0;
8500 if (REGNO (reg1) % 2 != 0)
8501 return 0;
8503 /* Integer ldd is deprecated in SPARC V9 */
8504 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8505 return 0;
8507 return (REGNO (reg1) == REGNO (reg2) - 1);
8510 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8511 an ldd or std insn.
8513 This can only happen when addr1 and addr2, the addresses in mem1
8514 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8515 addr1 must also be aligned on a 64-bit boundary.
8517 Also iff dependent_reg_rtx is not null it should not be used to
8518 compute the address for mem1, i.e. we cannot optimize a sequence
8519 like:
8520 ld [%o0], %o0
8521 ld [%o0 + 4], %o1
8523 ldd [%o0], %o0
8524 nor:
8525 ld [%g3 + 4], %g3
8526 ld [%g3], %g2
8528 ldd [%g3], %g2
8530 But, note that the transformation from:
8531 ld [%g2 + 4], %g3
8532 ld [%g2], %g2
8534 ldd [%g2], %g2
8535 is perfectly fine. Thus, the peephole2 patterns always pass us
8536 the destination register of the first load, never the second one.
8538 For stores we don't have a similar problem, so dependent_reg_rtx is
8539 NULL_RTX. */
8542 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8544 rtx addr1, addr2;
8545 unsigned int reg1;
8546 HOST_WIDE_INT offset1;
8548 /* The mems cannot be volatile. */
8549 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8550 return 0;
8552 /* MEM1 should be aligned on a 64-bit boundary. */
8553 if (MEM_ALIGN (mem1) < 64)
8554 return 0;
8556 addr1 = XEXP (mem1, 0);
8557 addr2 = XEXP (mem2, 0);
8559 /* Extract a register number and offset (if used) from the first addr. */
8560 if (GET_CODE (addr1) == PLUS)
8562 /* If not a REG, return zero. */
8563 if (GET_CODE (XEXP (addr1, 0)) != REG)
8564 return 0;
8565 else
8567 reg1 = REGNO (XEXP (addr1, 0));
8568 /* The offset must be constant! */
8569 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8570 return 0;
8571 offset1 = INTVAL (XEXP (addr1, 1));
8574 else if (GET_CODE (addr1) != REG)
8575 return 0;
8576 else
8578 reg1 = REGNO (addr1);
8579 /* This was a simple (mem (reg)) expression. Offset is 0. */
8580 offset1 = 0;
8583 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8584 if (GET_CODE (addr2) != PLUS)
8585 return 0;
8587 if (GET_CODE (XEXP (addr2, 0)) != REG
8588 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8589 return 0;
8591 if (reg1 != REGNO (XEXP (addr2, 0)))
8592 return 0;
8594 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8595 return 0;
8597 /* The first offset must be evenly divisible by 8 to ensure the
8598 address is 64 bit aligned. */
8599 if (offset1 % 8 != 0)
8600 return 0;
8602 /* The offset for the second addr must be 4 more than the first addr. */
8603 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8604 return 0;
8606 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8607 instructions. */
8608 return 1;
8611 /* Return 1 if reg is a pseudo, or is the first register in
8612 a hard register pair. This makes it suitable for use in
8613 ldd and std insns. */
8616 register_ok_for_ldd (rtx reg)
8618 /* We might have been passed a SUBREG. */
8619 if (!REG_P (reg))
8620 return 0;
8622 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8623 return (REGNO (reg) % 2 == 0);
8625 return 1;
8628 /* Return 1 if OP, a MEM, has an address which is known to be
8629 aligned to an 8-byte boundary. */
8632 memory_ok_for_ldd (rtx op)
8634 /* In 64-bit mode, we assume that the address is word-aligned. */
8635 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8636 return 0;
8638 if (! can_create_pseudo_p ()
8639 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8640 return 0;
8642 return 1;
8645 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8647 static bool
8648 sparc_print_operand_punct_valid_p (unsigned char code)
8650 if (code == '#'
8651 || code == '*'
8652 || code == '('
8653 || code == ')'
8654 || code == '_'
8655 || code == '&')
8656 return true;
8658 return false;
8661 /* Implement TARGET_PRINT_OPERAND.
8662 Print operand X (an rtx) in assembler syntax to file FILE.
8663 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8664 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8666 static void
8667 sparc_print_operand (FILE *file, rtx x, int code)
8669 switch (code)
8671 case '#':
8672 /* Output an insn in a delay slot. */
8673 if (final_sequence)
8674 sparc_indent_opcode = 1;
8675 else
8676 fputs ("\n\t nop", file);
8677 return;
8678 case '*':
8679 /* Output an annul flag if there's nothing for the delay slot and we
8680 are optimizing. This is always used with '(' below.
8681 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8682 this is a dbx bug. So, we only do this when optimizing.
8683 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8684 Always emit a nop in case the next instruction is a branch. */
8685 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8686 fputs (",a", file);
8687 return;
8688 case '(':
8689 /* Output a 'nop' if there's nothing for the delay slot and we are
8690 not optimizing. This is always used with '*' above. */
8691 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8692 fputs ("\n\t nop", file);
8693 else if (final_sequence)
8694 sparc_indent_opcode = 1;
8695 return;
8696 case ')':
8697 /* Output the right displacement from the saved PC on function return.
8698 The caller may have placed an "unimp" insn immediately after the call
8699 so we have to account for it. This insn is used in the 32-bit ABI
8700 when calling a function that returns a non zero-sized structure. The
8701 64-bit ABI doesn't have it. Be careful to have this test be the same
8702 as that for the call. The exception is when sparc_std_struct_return
8703 is enabled, the psABI is followed exactly and the adjustment is made
8704 by the code in sparc_struct_value_rtx. The call emitted is the same
8705 when sparc_std_struct_return is enabled. */
8706 if (!TARGET_ARCH64
8707 && cfun->returns_struct
8708 && !sparc_std_struct_return
8709 && DECL_SIZE (DECL_RESULT (current_function_decl))
8710 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8711 == INTEGER_CST
8712 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8713 fputs ("12", file);
8714 else
8715 fputc ('8', file);
8716 return;
8717 case '_':
8718 /* Output the Embedded Medium/Anywhere code model base register. */
8719 fputs (EMBMEDANY_BASE_REG, file);
8720 return;
8721 case '&':
8722 /* Print some local dynamic TLS name. */
8723 assemble_name (file, get_some_local_dynamic_name ());
8724 return;
8726 case 'Y':
8727 /* Adjust the operand to take into account a RESTORE operation. */
8728 if (GET_CODE (x) == CONST_INT)
8729 break;
8730 else if (GET_CODE (x) != REG)
8731 output_operand_lossage ("invalid %%Y operand");
8732 else if (REGNO (x) < 8)
8733 fputs (reg_names[REGNO (x)], file);
8734 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8735 fputs (reg_names[REGNO (x)-16], file);
8736 else
8737 output_operand_lossage ("invalid %%Y operand");
8738 return;
8739 case 'L':
8740 /* Print out the low order register name of a register pair. */
8741 if (WORDS_BIG_ENDIAN)
8742 fputs (reg_names[REGNO (x)+1], file);
8743 else
8744 fputs (reg_names[REGNO (x)], file);
8745 return;
8746 case 'H':
8747 /* Print out the high order register name of a register pair. */
8748 if (WORDS_BIG_ENDIAN)
8749 fputs (reg_names[REGNO (x)], file);
8750 else
8751 fputs (reg_names[REGNO (x)+1], file);
8752 return;
8753 case 'R':
8754 /* Print out the second register name of a register pair or quad.
8755 I.e., R (%o0) => %o1. */
8756 fputs (reg_names[REGNO (x)+1], file);
8757 return;
8758 case 'S':
8759 /* Print out the third register name of a register quad.
8760 I.e., S (%o0) => %o2. */
8761 fputs (reg_names[REGNO (x)+2], file);
8762 return;
8763 case 'T':
8764 /* Print out the fourth register name of a register quad.
8765 I.e., T (%o0) => %o3. */
8766 fputs (reg_names[REGNO (x)+3], file);
8767 return;
8768 case 'x':
8769 /* Print a condition code register. */
8770 if (REGNO (x) == SPARC_ICC_REG)
8772 /* We don't handle CC[X]_NOOVmode because they're not supposed
8773 to occur here. */
8774 if (GET_MODE (x) == CCmode)
8775 fputs ("%icc", file);
8776 else if (GET_MODE (x) == CCXmode)
8777 fputs ("%xcc", file);
8778 else
8779 gcc_unreachable ();
8781 else
8782 /* %fccN register */
8783 fputs (reg_names[REGNO (x)], file);
8784 return;
8785 case 'm':
8786 /* Print the operand's address only. */
8787 output_address (XEXP (x, 0));
8788 return;
8789 case 'r':
8790 /* In this case we need a register. Use %g0 if the
8791 operand is const0_rtx. */
8792 if (x == const0_rtx
8793 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8795 fputs ("%g0", file);
8796 return;
8798 else
8799 break;
8801 case 'A':
8802 switch (GET_CODE (x))
8804 case IOR: fputs ("or", file); break;
8805 case AND: fputs ("and", file); break;
8806 case XOR: fputs ("xor", file); break;
8807 default: output_operand_lossage ("invalid %%A operand");
8809 return;
8811 case 'B':
8812 switch (GET_CODE (x))
8814 case IOR: fputs ("orn", file); break;
8815 case AND: fputs ("andn", file); break;
8816 case XOR: fputs ("xnor", file); break;
8817 default: output_operand_lossage ("invalid %%B operand");
8819 return;
8821 /* This is used by the conditional move instructions. */
8822 case 'C':
8824 enum rtx_code rc = GET_CODE (x);
8826 switch (rc)
8828 case NE: fputs ("ne", file); break;
8829 case EQ: fputs ("e", file); break;
8830 case GE: fputs ("ge", file); break;
8831 case GT: fputs ("g", file); break;
8832 case LE: fputs ("le", file); break;
8833 case LT: fputs ("l", file); break;
8834 case GEU: fputs ("geu", file); break;
8835 case GTU: fputs ("gu", file); break;
8836 case LEU: fputs ("leu", file); break;
8837 case LTU: fputs ("lu", file); break;
8838 case LTGT: fputs ("lg", file); break;
8839 case UNORDERED: fputs ("u", file); break;
8840 case ORDERED: fputs ("o", file); break;
8841 case UNLT: fputs ("ul", file); break;
8842 case UNLE: fputs ("ule", file); break;
8843 case UNGT: fputs ("ug", file); break;
8844 case UNGE: fputs ("uge", file); break;
8845 case UNEQ: fputs ("ue", file); break;
8846 default: output_operand_lossage ("invalid %%C operand");
8848 return;
8851 /* This are used by the movr instruction pattern. */
8852 case 'D':
8854 enum rtx_code rc = GET_CODE (x);
8855 switch (rc)
8857 case NE: fputs ("ne", file); break;
8858 case EQ: fputs ("e", file); break;
8859 case GE: fputs ("gez", file); break;
8860 case LT: fputs ("lz", file); break;
8861 case LE: fputs ("lez", file); break;
8862 case GT: fputs ("gz", file); break;
8863 default: output_operand_lossage ("invalid %%D operand");
8865 return;
8868 case 'b':
8870 /* Print a sign-extended character. */
8871 int i = trunc_int_for_mode (INTVAL (x), QImode);
8872 fprintf (file, "%d", i);
8873 return;
8876 case 'f':
8877 /* Operand must be a MEM; write its address. */
8878 if (GET_CODE (x) != MEM)
8879 output_operand_lossage ("invalid %%f operand");
8880 output_address (XEXP (x, 0));
8881 return;
8883 case 's':
8885 /* Print a sign-extended 32-bit value. */
8886 HOST_WIDE_INT i;
8887 if (GET_CODE(x) == CONST_INT)
8888 i = INTVAL (x);
8889 else if (GET_CODE(x) == CONST_DOUBLE)
8890 i = CONST_DOUBLE_LOW (x);
8891 else
8893 output_operand_lossage ("invalid %%s operand");
8894 return;
8896 i = trunc_int_for_mode (i, SImode);
8897 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
8898 return;
8901 case 0:
8902 /* Do nothing special. */
8903 break;
8905 default:
8906 /* Undocumented flag. */
8907 output_operand_lossage ("invalid operand output code");
8910 if (GET_CODE (x) == REG)
8911 fputs (reg_names[REGNO (x)], file);
8912 else if (GET_CODE (x) == MEM)
8914 fputc ('[', file);
8915 /* Poor Sun assembler doesn't understand absolute addressing. */
8916 if (CONSTANT_P (XEXP (x, 0)))
8917 fputs ("%g0+", file);
8918 output_address (XEXP (x, 0));
8919 fputc (']', file);
8921 else if (GET_CODE (x) == HIGH)
8923 fputs ("%hi(", file);
8924 output_addr_const (file, XEXP (x, 0));
8925 fputc (')', file);
8927 else if (GET_CODE (x) == LO_SUM)
8929 sparc_print_operand (file, XEXP (x, 0), 0);
8930 if (TARGET_CM_MEDMID)
8931 fputs ("+%l44(", file);
8932 else
8933 fputs ("+%lo(", file);
8934 output_addr_const (file, XEXP (x, 1));
8935 fputc (')', file);
8937 else if (GET_CODE (x) == CONST_DOUBLE
8938 && (GET_MODE (x) == VOIDmode
8939 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
8941 if (CONST_DOUBLE_HIGH (x) == 0)
8942 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
8943 else if (CONST_DOUBLE_HIGH (x) == -1
8944 && CONST_DOUBLE_LOW (x) < 0)
8945 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
8946 else
8947 output_operand_lossage ("long long constant not a valid immediate operand");
8949 else if (GET_CODE (x) == CONST_DOUBLE)
8950 output_operand_lossage ("floating point constant not a valid immediate operand");
8951 else { output_addr_const (file, x); }
8954 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
8956 static void
8957 sparc_print_operand_address (FILE *file, rtx x)
8959 register rtx base, index = 0;
8960 int offset = 0;
8961 register rtx addr = x;
8963 if (REG_P (addr))
8964 fputs (reg_names[REGNO (addr)], file);
8965 else if (GET_CODE (addr) == PLUS)
8967 if (CONST_INT_P (XEXP (addr, 0)))
8968 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
8969 else if (CONST_INT_P (XEXP (addr, 1)))
8970 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
8971 else
8972 base = XEXP (addr, 0), index = XEXP (addr, 1);
8973 if (GET_CODE (base) == LO_SUM)
8975 gcc_assert (USE_AS_OFFSETABLE_LO10
8976 && TARGET_ARCH64
8977 && ! TARGET_CM_MEDMID);
8978 output_operand (XEXP (base, 0), 0);
8979 fputs ("+%lo(", file);
8980 output_address (XEXP (base, 1));
8981 fprintf (file, ")+%d", offset);
8983 else
8985 fputs (reg_names[REGNO (base)], file);
8986 if (index == 0)
8987 fprintf (file, "%+d", offset);
8988 else if (REG_P (index))
8989 fprintf (file, "+%s", reg_names[REGNO (index)]);
8990 else if (GET_CODE (index) == SYMBOL_REF
8991 || GET_CODE (index) == LABEL_REF
8992 || GET_CODE (index) == CONST)
8993 fputc ('+', file), output_addr_const (file, index);
8994 else gcc_unreachable ();
8997 else if (GET_CODE (addr) == MINUS
8998 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9000 output_addr_const (file, XEXP (addr, 0));
9001 fputs ("-(", file);
9002 output_addr_const (file, XEXP (addr, 1));
9003 fputs ("-.)", file);
9005 else if (GET_CODE (addr) == LO_SUM)
9007 output_operand (XEXP (addr, 0), 0);
9008 if (TARGET_CM_MEDMID)
9009 fputs ("+%l44(", file);
9010 else
9011 fputs ("+%lo(", file);
9012 output_address (XEXP (addr, 1));
9013 fputc (')', file);
9015 else if (flag_pic
9016 && GET_CODE (addr) == CONST
9017 && GET_CODE (XEXP (addr, 0)) == MINUS
9018 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9019 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9020 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9022 addr = XEXP (addr, 0);
9023 output_addr_const (file, XEXP (addr, 0));
9024 /* Group the args of the second CONST in parenthesis. */
9025 fputs ("-(", file);
9026 /* Skip past the second CONST--it does nothing for us. */
9027 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9028 /* Close the parenthesis. */
9029 fputc (')', file);
9031 else
9033 output_addr_const (file, addr);
9037 /* Target hook for assembling integer objects. The sparc version has
9038 special handling for aligned DI-mode objects. */
9040 static bool
9041 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9043 /* ??? We only output .xword's for symbols and only then in environments
9044 where the assembler can handle them. */
9045 if (aligned_p && size == 8
9046 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
9048 if (TARGET_V9)
9050 assemble_integer_with_op ("\t.xword\t", x);
9051 return true;
9053 else
9055 assemble_aligned_integer (4, const0_rtx);
9056 assemble_aligned_integer (4, x);
9057 return true;
9060 return default_assemble_integer (x, size, aligned_p);
9063 /* Return the value of a code used in the .proc pseudo-op that says
9064 what kind of result this function returns. For non-C types, we pick
9065 the closest C type. */
9067 #ifndef SHORT_TYPE_SIZE
9068 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9069 #endif
9071 #ifndef INT_TYPE_SIZE
9072 #define INT_TYPE_SIZE BITS_PER_WORD
9073 #endif
9075 #ifndef LONG_TYPE_SIZE
9076 #define LONG_TYPE_SIZE BITS_PER_WORD
9077 #endif
9079 #ifndef LONG_LONG_TYPE_SIZE
9080 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9081 #endif
9083 #ifndef FLOAT_TYPE_SIZE
9084 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9085 #endif
9087 #ifndef DOUBLE_TYPE_SIZE
9088 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9089 #endif
9091 #ifndef LONG_DOUBLE_TYPE_SIZE
9092 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9093 #endif
9095 unsigned long
9096 sparc_type_code (register tree type)
9098 register unsigned long qualifiers = 0;
9099 register unsigned shift;
9101 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9102 setting more, since some assemblers will give an error for this. Also,
9103 we must be careful to avoid shifts of 32 bits or more to avoid getting
9104 unpredictable results. */
9106 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9108 switch (TREE_CODE (type))
9110 case ERROR_MARK:
9111 return qualifiers;
9113 case ARRAY_TYPE:
9114 qualifiers |= (3 << shift);
9115 break;
9117 case FUNCTION_TYPE:
9118 case METHOD_TYPE:
9119 qualifiers |= (2 << shift);
9120 break;
9122 case POINTER_TYPE:
9123 case REFERENCE_TYPE:
9124 case OFFSET_TYPE:
9125 qualifiers |= (1 << shift);
9126 break;
9128 case RECORD_TYPE:
9129 return (qualifiers | 8);
9131 case UNION_TYPE:
9132 case QUAL_UNION_TYPE:
9133 return (qualifiers | 9);
9135 case ENUMERAL_TYPE:
9136 return (qualifiers | 10);
9138 case VOID_TYPE:
9139 return (qualifiers | 16);
9141 case INTEGER_TYPE:
9142 /* If this is a range type, consider it to be the underlying
9143 type. */
9144 if (TREE_TYPE (type) != 0)
9145 break;
9147 /* Carefully distinguish all the standard types of C,
9148 without messing up if the language is not C. We do this by
9149 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9150 look at both the names and the above fields, but that's redundant.
9151 Any type whose size is between two C types will be considered
9152 to be the wider of the two types. Also, we do not have a
9153 special code to use for "long long", so anything wider than
9154 long is treated the same. Note that we can't distinguish
9155 between "int" and "long" in this code if they are the same
9156 size, but that's fine, since neither can the assembler. */
9158 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9159 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9161 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9162 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9164 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9165 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9167 else
9168 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9170 case REAL_TYPE:
9171 /* If this is a range type, consider it to be the underlying
9172 type. */
9173 if (TREE_TYPE (type) != 0)
9174 break;
9176 /* Carefully distinguish all the standard types of C,
9177 without messing up if the language is not C. */
9179 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9180 return (qualifiers | 6);
9182 else
9183 return (qualifiers | 7);
9185 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9186 /* ??? We need to distinguish between double and float complex types,
9187 but I don't know how yet because I can't reach this code from
9188 existing front-ends. */
9189 return (qualifiers | 7); /* Who knows? */
9191 case VECTOR_TYPE:
9192 case BOOLEAN_TYPE: /* Boolean truth value type. */
9193 case LANG_TYPE:
9194 case NULLPTR_TYPE:
9195 return qualifiers;
9197 default:
9198 gcc_unreachable (); /* Not a type! */
9202 return qualifiers;
9205 /* Nested function support. */
9207 /* Emit RTL insns to initialize the variable parts of a trampoline.
9208 FNADDR is an RTX for the address of the function's pure code.
9209 CXT is an RTX for the static chain value for the function.
9211 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9212 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9213 (to store insns). This is a bit excessive. Perhaps a different
9214 mechanism would be better here.
9216 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9218 static void
9219 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9221 /* SPARC 32-bit trampoline:
9223 sethi %hi(fn), %g1
9224 sethi %hi(static), %g2
9225 jmp %g1+%lo(fn)
9226 or %g2, %lo(static), %g2
9228 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9229 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9232 emit_move_insn
9233 (adjust_address (m_tramp, SImode, 0),
9234 expand_binop (SImode, ior_optab,
9235 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9236 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9237 NULL_RTX, 1, OPTAB_DIRECT));
9239 emit_move_insn
9240 (adjust_address (m_tramp, SImode, 4),
9241 expand_binop (SImode, ior_optab,
9242 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9243 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9244 NULL_RTX, 1, OPTAB_DIRECT));
9246 emit_move_insn
9247 (adjust_address (m_tramp, SImode, 8),
9248 expand_binop (SImode, ior_optab,
9249 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9250 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9251 NULL_RTX, 1, OPTAB_DIRECT));
9253 emit_move_insn
9254 (adjust_address (m_tramp, SImode, 12),
9255 expand_binop (SImode, ior_optab,
9256 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9257 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9258 NULL_RTX, 1, OPTAB_DIRECT));
9260 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9261 aligned on a 16 byte boundary so one flush clears it all. */
9262 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 0))));
9263 if (sparc_cpu != PROCESSOR_ULTRASPARC
9264 && sparc_cpu != PROCESSOR_ULTRASPARC3
9265 && sparc_cpu != PROCESSOR_NIAGARA
9266 && sparc_cpu != PROCESSOR_NIAGARA2
9267 && sparc_cpu != PROCESSOR_NIAGARA3
9268 && sparc_cpu != PROCESSOR_NIAGARA4)
9269 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 8))));
9271 /* Call __enable_execute_stack after writing onto the stack to make sure
9272 the stack address is accessible. */
9273 #ifdef HAVE_ENABLE_EXECUTE_STACK
9274 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9275 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9276 #endif
9280 /* The 64-bit version is simpler because it makes more sense to load the
9281 values as "immediate" data out of the trampoline. It's also easier since
9282 we can read the PC without clobbering a register. */
9284 static void
9285 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9287 /* SPARC 64-bit trampoline:
9289 rd %pc, %g1
9290 ldx [%g1+24], %g5
9291 jmp %g5
9292 ldx [%g1+16], %g5
9293 +16 bytes data
9296 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9297 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9298 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9299 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9300 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9301 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9302 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9303 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9304 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9305 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9306 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9308 if (sparc_cpu != PROCESSOR_ULTRASPARC
9309 && sparc_cpu != PROCESSOR_ULTRASPARC3
9310 && sparc_cpu != PROCESSOR_NIAGARA
9311 && sparc_cpu != PROCESSOR_NIAGARA2
9312 && sparc_cpu != PROCESSOR_NIAGARA3
9313 && sparc_cpu != PROCESSOR_NIAGARA4)
9314 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9316 /* Call __enable_execute_stack after writing onto the stack to make sure
9317 the stack address is accessible. */
9318 #ifdef HAVE_ENABLE_EXECUTE_STACK
9319 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9320 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9321 #endif
9324 /* Worker for TARGET_TRAMPOLINE_INIT. */
9326 static void
9327 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9329 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9330 cxt = force_reg (Pmode, cxt);
9331 if (TARGET_ARCH64)
9332 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9333 else
9334 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9337 /* Adjust the cost of a scheduling dependency. Return the new cost of
9338 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9340 static int
9341 supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9343 enum attr_type insn_type;
9345 if (! recog_memoized (insn))
9346 return 0;
9348 insn_type = get_attr_type (insn);
9350 if (REG_NOTE_KIND (link) == 0)
9352 /* Data dependency; DEP_INSN writes a register that INSN reads some
9353 cycles later. */
9355 /* if a load, then the dependence must be on the memory address;
9356 add an extra "cycle". Note that the cost could be two cycles
9357 if the reg was written late in an instruction group; we ca not tell
9358 here. */
9359 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9360 return cost + 3;
9362 /* Get the delay only if the address of the store is the dependence. */
9363 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9365 rtx pat = PATTERN(insn);
9366 rtx dep_pat = PATTERN (dep_insn);
9368 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9369 return cost; /* This should not happen! */
9371 /* The dependency between the two instructions was on the data that
9372 is being stored. Assume that this implies that the address of the
9373 store is not dependent. */
9374 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9375 return cost;
9377 return cost + 3; /* An approximation. */
9380 /* A shift instruction cannot receive its data from an instruction
9381 in the same cycle; add a one cycle penalty. */
9382 if (insn_type == TYPE_SHIFT)
9383 return cost + 3; /* Split before cascade into shift. */
9385 else
9387 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9388 INSN writes some cycles later. */
9390 /* These are only significant for the fpu unit; writing a fp reg before
9391 the fpu has finished with it stalls the processor. */
9393 /* Reusing an integer register causes no problems. */
9394 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9395 return 0;
9398 return cost;
9401 static int
9402 hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9404 enum attr_type insn_type, dep_type;
9405 rtx pat = PATTERN(insn);
9406 rtx dep_pat = PATTERN (dep_insn);
9408 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9409 return cost;
9411 insn_type = get_attr_type (insn);
9412 dep_type = get_attr_type (dep_insn);
9414 switch (REG_NOTE_KIND (link))
9416 case 0:
9417 /* Data dependency; DEP_INSN writes a register that INSN reads some
9418 cycles later. */
9420 switch (insn_type)
9422 case TYPE_STORE:
9423 case TYPE_FPSTORE:
9424 /* Get the delay iff the address of the store is the dependence. */
9425 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9426 return cost;
9428 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9429 return cost;
9430 return cost + 3;
9432 case TYPE_LOAD:
9433 case TYPE_SLOAD:
9434 case TYPE_FPLOAD:
9435 /* If a load, then the dependence must be on the memory address. If
9436 the addresses aren't equal, then it might be a false dependency */
9437 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9439 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9440 || GET_CODE (SET_DEST (dep_pat)) != MEM
9441 || GET_CODE (SET_SRC (pat)) != MEM
9442 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9443 XEXP (SET_SRC (pat), 0)))
9444 return cost + 2;
9446 return cost + 8;
9448 break;
9450 case TYPE_BRANCH:
9451 /* Compare to branch latency is 0. There is no benefit from
9452 separating compare and branch. */
9453 if (dep_type == TYPE_COMPARE)
9454 return 0;
9455 /* Floating point compare to branch latency is less than
9456 compare to conditional move. */
9457 if (dep_type == TYPE_FPCMP)
9458 return cost - 1;
9459 break;
9460 default:
9461 break;
9463 break;
9465 case REG_DEP_ANTI:
9466 /* Anti-dependencies only penalize the fpu unit. */
9467 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9468 return 0;
9469 break;
9471 default:
9472 break;
9475 return cost;
9478 static int
9479 sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
9481 switch (sparc_cpu)
9483 case PROCESSOR_SUPERSPARC:
9484 cost = supersparc_adjust_cost (insn, link, dep, cost);
9485 break;
9486 case PROCESSOR_HYPERSPARC:
9487 case PROCESSOR_SPARCLITE86X:
9488 cost = hypersparc_adjust_cost (insn, link, dep, cost);
9489 break;
9490 default:
9491 break;
9493 return cost;
9496 static void
9497 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9498 int sched_verbose ATTRIBUTE_UNUSED,
9499 int max_ready ATTRIBUTE_UNUSED)
9502 static int
9503 sparc_use_sched_lookahead (void)
9505 if (sparc_cpu == PROCESSOR_NIAGARA
9506 || sparc_cpu == PROCESSOR_NIAGARA2
9507 || sparc_cpu == PROCESSOR_NIAGARA3)
9508 return 0;
9509 if (sparc_cpu == PROCESSOR_NIAGARA4)
9510 return 2;
9511 if (sparc_cpu == PROCESSOR_ULTRASPARC
9512 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9513 return 4;
9514 if ((1 << sparc_cpu) &
9515 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9516 (1 << PROCESSOR_SPARCLITE86X)))
9517 return 3;
9518 return 0;
9521 static int
9522 sparc_issue_rate (void)
9524 switch (sparc_cpu)
9526 case PROCESSOR_NIAGARA:
9527 case PROCESSOR_NIAGARA2:
9528 case PROCESSOR_NIAGARA3:
9529 default:
9530 return 1;
9531 case PROCESSOR_NIAGARA4:
9532 case PROCESSOR_V9:
9533 /* Assume V9 processors are capable of at least dual-issue. */
9534 return 2;
9535 case PROCESSOR_SUPERSPARC:
9536 return 3;
9537 case PROCESSOR_HYPERSPARC:
9538 case PROCESSOR_SPARCLITE86X:
9539 return 2;
9540 case PROCESSOR_ULTRASPARC:
9541 case PROCESSOR_ULTRASPARC3:
9542 return 4;
9546 static int
9547 set_extends (rtx insn)
9549 register rtx pat = PATTERN (insn);
9551 switch (GET_CODE (SET_SRC (pat)))
9553 /* Load and some shift instructions zero extend. */
9554 case MEM:
9555 case ZERO_EXTEND:
9556 /* sethi clears the high bits */
9557 case HIGH:
9558 /* LO_SUM is used with sethi. sethi cleared the high
9559 bits and the values used with lo_sum are positive */
9560 case LO_SUM:
9561 /* Store flag stores 0 or 1 */
9562 case LT: case LTU:
9563 case GT: case GTU:
9564 case LE: case LEU:
9565 case GE: case GEU:
9566 case EQ:
9567 case NE:
9568 return 1;
9569 case AND:
9571 rtx op0 = XEXP (SET_SRC (pat), 0);
9572 rtx op1 = XEXP (SET_SRC (pat), 1);
9573 if (GET_CODE (op1) == CONST_INT)
9574 return INTVAL (op1) >= 0;
9575 if (GET_CODE (op0) != REG)
9576 return 0;
9577 if (sparc_check_64 (op0, insn) == 1)
9578 return 1;
9579 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9581 case IOR:
9582 case XOR:
9584 rtx op0 = XEXP (SET_SRC (pat), 0);
9585 rtx op1 = XEXP (SET_SRC (pat), 1);
9586 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9587 return 0;
9588 if (GET_CODE (op1) == CONST_INT)
9589 return INTVAL (op1) >= 0;
9590 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9592 case LSHIFTRT:
9593 return GET_MODE (SET_SRC (pat)) == SImode;
9594 /* Positive integers leave the high bits zero. */
9595 case CONST_DOUBLE:
9596 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
9597 case CONST_INT:
9598 return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
9599 case ASHIFTRT:
9600 case SIGN_EXTEND:
9601 return - (GET_MODE (SET_SRC (pat)) == SImode);
9602 case REG:
9603 return sparc_check_64 (SET_SRC (pat), insn);
9604 default:
9605 return 0;
9609 /* We _ought_ to have only one kind per function, but... */
9610 static GTY(()) rtx sparc_addr_diff_list;
9611 static GTY(()) rtx sparc_addr_list;
9613 void
9614 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9616 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9617 if (diff)
9618 sparc_addr_diff_list
9619 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9620 else
9621 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9624 static void
9625 sparc_output_addr_vec (rtx vec)
9627 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9628 int idx, vlen = XVECLEN (body, 0);
9630 #ifdef ASM_OUTPUT_ADDR_VEC_START
9631 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9632 #endif
9634 #ifdef ASM_OUTPUT_CASE_LABEL
9635 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9636 NEXT_INSN (lab));
9637 #else
9638 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9639 #endif
9641 for (idx = 0; idx < vlen; idx++)
9643 ASM_OUTPUT_ADDR_VEC_ELT
9644 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9647 #ifdef ASM_OUTPUT_ADDR_VEC_END
9648 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9649 #endif
9652 static void
9653 sparc_output_addr_diff_vec (rtx vec)
9655 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9656 rtx base = XEXP (XEXP (body, 0), 0);
9657 int idx, vlen = XVECLEN (body, 1);
9659 #ifdef ASM_OUTPUT_ADDR_VEC_START
9660 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9661 #endif
9663 #ifdef ASM_OUTPUT_CASE_LABEL
9664 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9665 NEXT_INSN (lab));
9666 #else
9667 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9668 #endif
9670 for (idx = 0; idx < vlen; idx++)
9672 ASM_OUTPUT_ADDR_DIFF_ELT
9673 (asm_out_file,
9674 body,
9675 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9676 CODE_LABEL_NUMBER (base));
9679 #ifdef ASM_OUTPUT_ADDR_VEC_END
9680 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9681 #endif
9684 static void
9685 sparc_output_deferred_case_vectors (void)
9687 rtx t;
9688 int align;
9690 if (sparc_addr_list == NULL_RTX
9691 && sparc_addr_diff_list == NULL_RTX)
9692 return;
9694 /* Align to cache line in the function's code section. */
9695 switch_to_section (current_function_section ());
9697 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9698 if (align > 0)
9699 ASM_OUTPUT_ALIGN (asm_out_file, align);
9701 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9702 sparc_output_addr_vec (XEXP (t, 0));
9703 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9704 sparc_output_addr_diff_vec (XEXP (t, 0));
9706 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9709 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9710 unknown. Return 1 if the high bits are zero, -1 if the register is
9711 sign extended. */
9713 sparc_check_64 (rtx x, rtx insn)
9715 /* If a register is set only once it is safe to ignore insns this
9716 code does not know how to handle. The loop will either recognize
9717 the single set and return the correct value or fail to recognize
9718 it and return 0. */
9719 int set_once = 0;
9720 rtx y = x;
9722 gcc_assert (GET_CODE (x) == REG);
9724 if (GET_MODE (x) == DImode)
9725 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9727 if (flag_expensive_optimizations
9728 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9729 set_once = 1;
9731 if (insn == 0)
9733 if (set_once)
9734 insn = get_last_insn_anywhere ();
9735 else
9736 return 0;
9739 while ((insn = PREV_INSN (insn)))
9741 switch (GET_CODE (insn))
9743 case JUMP_INSN:
9744 case NOTE:
9745 break;
9746 case CODE_LABEL:
9747 case CALL_INSN:
9748 default:
9749 if (! set_once)
9750 return 0;
9751 break;
9752 case INSN:
9754 rtx pat = PATTERN (insn);
9755 if (GET_CODE (pat) != SET)
9756 return 0;
9757 if (rtx_equal_p (x, SET_DEST (pat)))
9758 return set_extends (insn);
9759 if (y && rtx_equal_p (y, SET_DEST (pat)))
9760 return set_extends (insn);
9761 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9762 return 0;
9766 return 0;
9769 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9770 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9772 const char *
9773 output_v8plus_shift (rtx insn, rtx *operands, const char *opcode)
9775 static char asm_code[60];
9777 /* The scratch register is only required when the destination
9778 register is not a 64-bit global or out register. */
9779 if (which_alternative != 2)
9780 operands[3] = operands[0];
9782 /* We can only shift by constants <= 63. */
9783 if (GET_CODE (operands[2]) == CONST_INT)
9784 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9786 if (GET_CODE (operands[1]) == CONST_INT)
9788 output_asm_insn ("mov\t%1, %3", operands);
9790 else
9792 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9793 if (sparc_check_64 (operands[1], insn) <= 0)
9794 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9795 output_asm_insn ("or\t%L1, %3, %3", operands);
9798 strcpy (asm_code, opcode);
9800 if (which_alternative != 2)
9801 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9802 else
9803 return
9804 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9807 /* Output rtl to increment the profiler label LABELNO
9808 for profiling a function entry. */
9810 void
9811 sparc_profile_hook (int labelno)
9813 char buf[32];
9814 rtx lab, fun;
9816 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9817 if (NO_PROFILE_COUNTERS)
9819 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9821 else
9823 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9824 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9825 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9829 #ifdef TARGET_SOLARIS
9830 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9832 static void
9833 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9834 tree decl ATTRIBUTE_UNUSED)
9836 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9838 solaris_elf_asm_comdat_section (name, flags, decl);
9839 return;
9842 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9844 if (!(flags & SECTION_DEBUG))
9845 fputs (",#alloc", asm_out_file);
9846 if (flags & SECTION_WRITE)
9847 fputs (",#write", asm_out_file);
9848 if (flags & SECTION_TLS)
9849 fputs (",#tls", asm_out_file);
9850 if (flags & SECTION_CODE)
9851 fputs (",#execinstr", asm_out_file);
9853 /* Sun as only supports #nobits/#progbits since Solaris 10. */
9854 if (HAVE_AS_SPARC_NOBITS)
9856 if (flags & SECTION_BSS)
9857 fputs (",#nobits", asm_out_file);
9858 else
9859 fputs (",#progbits", asm_out_file);
9862 fputc ('\n', asm_out_file);
9864 #endif /* TARGET_SOLARIS */
9866 /* We do not allow indirect calls to be optimized into sibling calls.
9868 We cannot use sibling calls when delayed branches are disabled
9869 because they will likely require the call delay slot to be filled.
9871 Also, on SPARC 32-bit we cannot emit a sibling call when the
9872 current function returns a structure. This is because the "unimp
9873 after call" convention would cause the callee to return to the
9874 wrong place. The generic code already disallows cases where the
9875 function being called returns a structure.
9877 It may seem strange how this last case could occur. Usually there
9878 is code after the call which jumps to epilogue code which dumps the
9879 return value into the struct return area. That ought to invalidate
9880 the sibling call right? Well, in the C++ case we can end up passing
9881 the pointer to the struct return area to a constructor (which returns
9882 void) and then nothing else happens. Such a sibling call would look
9883 valid without the added check here.
9885 VxWorks PIC PLT entries require the global pointer to be initialized
9886 on entry. We therefore can't emit sibling calls to them. */
9887 static bool
9888 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9890 return (decl
9891 && flag_delayed_branch
9892 && (TARGET_ARCH64 || ! cfun->returns_struct)
9893 && !(TARGET_VXWORKS_RTP
9894 && flag_pic
9895 && !targetm.binds_local_p (decl)));
9898 /* libfunc renaming. */
9900 static void
9901 sparc_init_libfuncs (void)
9903 if (TARGET_ARCH32)
9905 /* Use the subroutines that Sun's library provides for integer
9906 multiply and divide. The `*' prevents an underscore from
9907 being prepended by the compiler. .umul is a little faster
9908 than .mul. */
9909 set_optab_libfunc (smul_optab, SImode, "*.umul");
9910 set_optab_libfunc (sdiv_optab, SImode, "*.div");
9911 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
9912 set_optab_libfunc (smod_optab, SImode, "*.rem");
9913 set_optab_libfunc (umod_optab, SImode, "*.urem");
9915 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
9916 set_optab_libfunc (add_optab, TFmode, "_Q_add");
9917 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
9918 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
9919 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
9920 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
9922 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
9923 is because with soft-float, the SFmode and DFmode sqrt
9924 instructions will be absent, and the compiler will notice and
9925 try to use the TFmode sqrt instruction for calls to the
9926 builtin function sqrt, but this fails. */
9927 if (TARGET_FPU)
9928 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
9930 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
9931 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
9932 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
9933 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
9934 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
9935 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
9937 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
9938 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
9939 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
9940 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
9942 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
9943 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
9944 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
9945 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
9947 if (DITF_CONVERSION_LIBFUNCS)
9949 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
9950 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
9951 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
9952 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
9955 if (SUN_CONVERSION_LIBFUNCS)
9957 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
9958 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
9959 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
9960 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
9963 if (TARGET_ARCH64)
9965 /* In the SPARC 64bit ABI, SImode multiply and divide functions
9966 do not exist in the library. Make sure the compiler does not
9967 emit calls to them by accident. (It should always use the
9968 hardware instructions.) */
9969 set_optab_libfunc (smul_optab, SImode, 0);
9970 set_optab_libfunc (sdiv_optab, SImode, 0);
9971 set_optab_libfunc (udiv_optab, SImode, 0);
9972 set_optab_libfunc (smod_optab, SImode, 0);
9973 set_optab_libfunc (umod_optab, SImode, 0);
9975 if (SUN_INTEGER_MULTIPLY_64)
9977 set_optab_libfunc (smul_optab, DImode, "__mul64");
9978 set_optab_libfunc (sdiv_optab, DImode, "__div64");
9979 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
9980 set_optab_libfunc (smod_optab, DImode, "__rem64");
9981 set_optab_libfunc (umod_optab, DImode, "__urem64");
9984 if (SUN_CONVERSION_LIBFUNCS)
9986 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
9987 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
9988 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
9989 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
9994 static tree def_builtin(const char *name, int code, tree type)
9996 return add_builtin_function(name, type, code, BUILT_IN_MD, NULL,
9997 NULL_TREE);
10000 static tree def_builtin_const(const char *name, int code, tree type)
10002 tree t = def_builtin(name, code, type);
10004 if (t)
10005 TREE_READONLY (t) = 1;
10007 return t;
10010 /* Implement the TARGET_INIT_BUILTINS target hook.
10011 Create builtin functions for special SPARC instructions. */
10013 static void
10014 sparc_init_builtins (void)
10016 if (TARGET_VIS)
10017 sparc_vis_init_builtins ();
10020 /* Create builtin functions for VIS 1.0 instructions. */
10022 static void
10023 sparc_vis_init_builtins (void)
10025 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10026 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10027 tree v4hi = build_vector_type (intHI_type_node, 4);
10028 tree v2hi = build_vector_type (intHI_type_node, 2);
10029 tree v2si = build_vector_type (intSI_type_node, 2);
10030 tree v1si = build_vector_type (intSI_type_node, 1);
10032 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10033 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10034 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10035 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10036 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10037 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10038 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10039 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10040 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10041 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10042 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10043 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10044 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10045 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10046 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10047 v8qi, v8qi,
10048 intDI_type_node, 0);
10049 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10050 v8qi, v8qi, 0);
10051 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10052 v8qi, v8qi, 0);
10053 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10054 intDI_type_node,
10055 intDI_type_node, 0);
10056 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10057 intSI_type_node,
10058 intSI_type_node, 0);
10059 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10060 ptr_type_node,
10061 intSI_type_node, 0);
10062 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10063 ptr_type_node,
10064 intDI_type_node, 0);
10065 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10066 ptr_type_node,
10067 ptr_type_node, 0);
10068 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10069 ptr_type_node,
10070 ptr_type_node, 0);
10071 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10072 v4hi, v4hi, 0);
10073 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10074 v2si, v2si, 0);
10075 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10076 v4hi, v4hi, 0);
10077 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10078 v2si, v2si, 0);
10079 tree void_ftype_di = build_function_type_list (void_type_node,
10080 intDI_type_node, 0);
10081 tree di_ftype_void = build_function_type_list (intDI_type_node,
10082 void_type_node, 0);
10083 tree void_ftype_si = build_function_type_list (void_type_node,
10084 intSI_type_node, 0);
10085 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10086 float_type_node,
10087 float_type_node, 0);
10088 tree df_ftype_df_df = build_function_type_list (double_type_node,
10089 double_type_node,
10090 double_type_node, 0);
10092 /* Packing and expanding vectors. */
10093 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10094 v4qi_ftype_v4hi);
10095 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10096 v8qi_ftype_v2si_v8qi);
10097 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10098 v2hi_ftype_v2si);
10099 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10100 v4hi_ftype_v4qi);
10101 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10102 v8qi_ftype_v4qi_v4qi);
10104 /* Multiplications. */
10105 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10106 v4hi_ftype_v4qi_v4hi);
10107 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10108 v4hi_ftype_v4qi_v2hi);
10109 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10110 v4hi_ftype_v4qi_v2hi);
10111 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10112 v4hi_ftype_v8qi_v4hi);
10113 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10114 v4hi_ftype_v8qi_v4hi);
10115 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10116 v2si_ftype_v4qi_v2hi);
10117 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10118 v2si_ftype_v4qi_v2hi);
10120 /* Data aligning. */
10121 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10122 v4hi_ftype_v4hi_v4hi);
10123 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10124 v8qi_ftype_v8qi_v8qi);
10125 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10126 v2si_ftype_v2si_v2si);
10127 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10128 di_ftype_di_di);
10130 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10131 void_ftype_di);
10132 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10133 di_ftype_void);
10135 if (TARGET_ARCH64)
10137 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10138 ptr_ftype_ptr_di);
10139 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10140 ptr_ftype_ptr_di);
10142 else
10144 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10145 ptr_ftype_ptr_si);
10146 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10147 ptr_ftype_ptr_si);
10150 /* Pixel distance. */
10151 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10152 di_ftype_v8qi_v8qi_di);
10154 /* Edge handling. */
10155 if (TARGET_ARCH64)
10157 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10158 di_ftype_ptr_ptr);
10159 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10160 di_ftype_ptr_ptr);
10161 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10162 di_ftype_ptr_ptr);
10163 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10164 di_ftype_ptr_ptr);
10165 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10166 di_ftype_ptr_ptr);
10167 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10168 di_ftype_ptr_ptr);
10169 if (TARGET_VIS2)
10171 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10172 di_ftype_ptr_ptr);
10173 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10174 di_ftype_ptr_ptr);
10175 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10176 di_ftype_ptr_ptr);
10177 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10178 di_ftype_ptr_ptr);
10179 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10180 di_ftype_ptr_ptr);
10181 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10182 di_ftype_ptr_ptr);
10185 else
10187 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10188 si_ftype_ptr_ptr);
10189 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10190 si_ftype_ptr_ptr);
10191 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10192 si_ftype_ptr_ptr);
10193 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10194 si_ftype_ptr_ptr);
10195 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10196 si_ftype_ptr_ptr);
10197 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10198 si_ftype_ptr_ptr);
10199 if (TARGET_VIS2)
10201 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10202 si_ftype_ptr_ptr);
10203 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10204 si_ftype_ptr_ptr);
10205 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10206 si_ftype_ptr_ptr);
10207 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10208 si_ftype_ptr_ptr);
10209 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10210 si_ftype_ptr_ptr);
10211 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10212 si_ftype_ptr_ptr);
10216 /* Pixel compare. */
10217 if (TARGET_ARCH64)
10219 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10220 di_ftype_v4hi_v4hi);
10221 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10222 di_ftype_v2si_v2si);
10223 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10224 di_ftype_v4hi_v4hi);
10225 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10226 di_ftype_v2si_v2si);
10227 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10228 di_ftype_v4hi_v4hi);
10229 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10230 di_ftype_v2si_v2si);
10231 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10232 di_ftype_v4hi_v4hi);
10233 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10234 di_ftype_v2si_v2si);
10236 else
10238 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10239 si_ftype_v4hi_v4hi);
10240 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10241 si_ftype_v2si_v2si);
10242 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10243 si_ftype_v4hi_v4hi);
10244 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10245 si_ftype_v2si_v2si);
10246 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10247 si_ftype_v4hi_v4hi);
10248 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10249 si_ftype_v2si_v2si);
10250 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10251 si_ftype_v4hi_v4hi);
10252 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10253 si_ftype_v2si_v2si);
10256 /* Addition and subtraction. */
10257 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10258 v4hi_ftype_v4hi_v4hi);
10259 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10260 v2hi_ftype_v2hi_v2hi);
10261 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10262 v2si_ftype_v2si_v2si);
10263 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10264 v1si_ftype_v1si_v1si);
10265 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10266 v4hi_ftype_v4hi_v4hi);
10267 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10268 v2hi_ftype_v2hi_v2hi);
10269 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10270 v2si_ftype_v2si_v2si);
10271 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10272 v1si_ftype_v1si_v1si);
10274 /* Three-dimensional array addressing. */
10275 if (TARGET_ARCH64)
10277 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10278 di_ftype_di_di);
10279 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10280 di_ftype_di_di);
10281 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10282 di_ftype_di_di);
10284 else
10286 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10287 si_ftype_si_si);
10288 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10289 si_ftype_si_si);
10290 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10291 si_ftype_si_si);
10294 if (TARGET_VIS2)
10296 /* Byte mask and shuffle */
10297 if (TARGET_ARCH64)
10298 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10299 di_ftype_di_di);
10300 else
10301 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10302 si_ftype_si_si);
10303 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10304 v4hi_ftype_v4hi_v4hi);
10305 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10306 v8qi_ftype_v8qi_v8qi);
10307 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10308 v2si_ftype_v2si_v2si);
10309 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10310 di_ftype_di_di);
10313 if (TARGET_VIS3)
10315 if (TARGET_ARCH64)
10317 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10318 void_ftype_di);
10319 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10320 void_ftype_di);
10321 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10322 void_ftype_di);
10324 else
10326 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10327 void_ftype_si);
10328 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10329 void_ftype_si);
10330 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10331 void_ftype_si);
10334 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10335 v4hi_ftype_v4hi_v4hi);
10337 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10338 v4hi_ftype_v4hi_v4hi);
10339 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10340 v4hi_ftype_v4hi_v4hi);
10341 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10342 v4hi_ftype_v4hi_v4hi);
10343 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10344 v4hi_ftype_v4hi_v4hi);
10345 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10346 v2si_ftype_v2si_v2si);
10347 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10348 v2si_ftype_v2si_v2si);
10349 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10350 v2si_ftype_v2si_v2si);
10351 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10352 v2si_ftype_v2si_v2si);
10354 if (TARGET_ARCH64)
10355 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10356 di_ftype_v8qi_v8qi);
10357 else
10358 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10359 si_ftype_v8qi_v8qi);
10361 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10362 v4hi_ftype_v4hi_v4hi);
10363 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10364 di_ftype_di_di);
10365 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10366 di_ftype_di_di);
10368 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10369 v4hi_ftype_v4hi_v4hi);
10370 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10371 v2hi_ftype_v2hi_v2hi);
10372 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10373 v4hi_ftype_v4hi_v4hi);
10374 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10375 v2hi_ftype_v2hi_v2hi);
10376 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10377 v2si_ftype_v2si_v2si);
10378 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10379 v1si_ftype_v1si_v1si);
10380 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10381 v2si_ftype_v2si_v2si);
10382 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10383 v1si_ftype_v1si_v1si);
10385 if (TARGET_ARCH64)
10387 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10388 di_ftype_v8qi_v8qi);
10389 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10390 di_ftype_v8qi_v8qi);
10391 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10392 di_ftype_v8qi_v8qi);
10393 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10394 di_ftype_v8qi_v8qi);
10396 else
10398 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10399 si_ftype_v8qi_v8qi);
10400 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10401 si_ftype_v8qi_v8qi);
10402 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10403 si_ftype_v8qi_v8qi);
10404 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10405 si_ftype_v8qi_v8qi);
10408 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10409 sf_ftype_sf_sf);
10410 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10411 df_ftype_df_df);
10412 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10413 sf_ftype_sf_sf);
10414 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10415 df_ftype_df_df);
10416 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10417 sf_ftype_sf_sf);
10418 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10419 df_ftype_df_df);
10421 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10422 di_ftype_di_di);
10423 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10424 di_ftype_di_di);
10425 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10426 di_ftype_di_di);
10430 /* Handle TARGET_EXPAND_BUILTIN target hook.
10431 Expand builtin functions for sparc intrinsics. */
10433 static rtx
10434 sparc_expand_builtin (tree exp, rtx target,
10435 rtx subtarget ATTRIBUTE_UNUSED,
10436 enum machine_mode tmode ATTRIBUTE_UNUSED,
10437 int ignore ATTRIBUTE_UNUSED)
10439 tree arg;
10440 call_expr_arg_iterator iter;
10441 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10442 unsigned int icode = DECL_FUNCTION_CODE (fndecl);
10443 rtx pat, op[4];
10444 int arg_count = 0;
10445 bool nonvoid;
10447 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10449 if (nonvoid)
10451 enum machine_mode tmode = insn_data[icode].operand[0].mode;
10452 if (!target
10453 || GET_MODE (target) != tmode
10454 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10455 op[0] = gen_reg_rtx (tmode);
10456 else
10457 op[0] = target;
10459 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10461 const struct insn_operand_data *insn_op;
10462 int idx;
10464 if (arg == error_mark_node)
10465 return NULL_RTX;
10467 arg_count++;
10468 idx = arg_count - !nonvoid;
10469 insn_op = &insn_data[icode].operand[idx];
10470 op[arg_count] = expand_normal (arg);
10472 if (insn_op->mode == V1DImode
10473 && GET_MODE (op[arg_count]) == DImode)
10474 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10475 else if (insn_op->mode == V1SImode
10476 && GET_MODE (op[arg_count]) == SImode)
10477 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10479 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10480 insn_op->mode))
10481 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10484 switch (arg_count)
10486 case 0:
10487 pat = GEN_FCN (icode) (op[0]);
10488 break;
10489 case 1:
10490 if (nonvoid)
10491 pat = GEN_FCN (icode) (op[0], op[1]);
10492 else
10493 pat = GEN_FCN (icode) (op[1]);
10494 break;
10495 case 2:
10496 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10497 break;
10498 case 3:
10499 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10500 break;
10501 default:
10502 gcc_unreachable ();
10505 if (!pat)
10506 return NULL_RTX;
10508 emit_insn (pat);
10510 if (nonvoid)
10511 return op[0];
10512 else
10513 return const0_rtx;
10516 static int
10517 sparc_vis_mul8x16 (int e8, int e16)
10519 return (e8 * e16 + 128) / 256;
10522 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10523 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10525 static void
10526 sparc_handle_vis_mul8x16 (tree *n_elts, int fncode, tree inner_type,
10527 tree cst0, tree cst1)
10529 unsigned i, num = VECTOR_CST_NELTS (cst0);
10530 int scale;
10532 switch (fncode)
10534 case CODE_FOR_fmul8x16_vis:
10535 for (i = 0; i < num; ++i)
10537 int val
10538 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10539 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10540 n_elts[i] = build_int_cst (inner_type, val);
10542 break;
10544 case CODE_FOR_fmul8x16au_vis:
10545 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10547 for (i = 0; i < num; ++i)
10549 int val
10550 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10551 scale);
10552 n_elts[i] = build_int_cst (inner_type, val);
10554 break;
10556 case CODE_FOR_fmul8x16al_vis:
10557 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10559 for (i = 0; i < num; ++i)
10561 int val
10562 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10563 scale);
10564 n_elts[i] = build_int_cst (inner_type, val);
10566 break;
10568 default:
10569 gcc_unreachable ();
10573 /* Handle TARGET_FOLD_BUILTIN target hook.
10574 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
10575 result of the function call is ignored. NULL_TREE is returned if the
10576 function could not be folded. */
10578 static tree
10579 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10580 tree *args, bool ignore)
10582 tree arg0, arg1, arg2;
10583 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10584 enum insn_code icode = (enum insn_code) DECL_FUNCTION_CODE (fndecl);
10586 if (ignore)
10588 /* Note that a switch statement instead of the sequence of tests would
10589 be incorrect as many of the CODE_FOR values could be CODE_FOR_nothing
10590 and that would yield multiple alternatives with identical values. */
10591 if (icode == CODE_FOR_alignaddrsi_vis
10592 || icode == CODE_FOR_alignaddrdi_vis
10593 || icode == CODE_FOR_wrgsr_vis
10594 || icode == CODE_FOR_bmasksi_vis
10595 || icode == CODE_FOR_bmaskdi_vis
10596 || icode == CODE_FOR_cmask8si_vis
10597 || icode == CODE_FOR_cmask8di_vis
10598 || icode == CODE_FOR_cmask16si_vis
10599 || icode == CODE_FOR_cmask16di_vis
10600 || icode == CODE_FOR_cmask32si_vis
10601 || icode == CODE_FOR_cmask32di_vis)
10603 else
10604 return build_zero_cst (rtype);
10607 switch (icode)
10609 case CODE_FOR_fexpand_vis:
10610 arg0 = args[0];
10611 STRIP_NOPS (arg0);
10613 if (TREE_CODE (arg0) == VECTOR_CST)
10615 tree inner_type = TREE_TYPE (rtype);
10616 tree *n_elts;
10617 unsigned i;
10619 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10620 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10621 n_elts[i] = build_int_cst (inner_type,
10622 TREE_INT_CST_LOW
10623 (VECTOR_CST_ELT (arg0, i)) << 4);
10624 return build_vector (rtype, n_elts);
10626 break;
10628 case CODE_FOR_fmul8x16_vis:
10629 case CODE_FOR_fmul8x16au_vis:
10630 case CODE_FOR_fmul8x16al_vis:
10631 arg0 = args[0];
10632 arg1 = args[1];
10633 STRIP_NOPS (arg0);
10634 STRIP_NOPS (arg1);
10636 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10638 tree inner_type = TREE_TYPE (rtype);
10639 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10640 sparc_handle_vis_mul8x16 (n_elts, icode, inner_type, arg0, arg1);
10641 return build_vector (rtype, n_elts);
10643 break;
10645 case CODE_FOR_fpmerge_vis:
10646 arg0 = args[0];
10647 arg1 = args[1];
10648 STRIP_NOPS (arg0);
10649 STRIP_NOPS (arg1);
10651 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10653 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
10654 unsigned i;
10655 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10657 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
10658 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
10661 return build_vector (rtype, n_elts);
10663 break;
10665 case CODE_FOR_pdist_vis:
10666 arg0 = args[0];
10667 arg1 = args[1];
10668 arg2 = args[2];
10669 STRIP_NOPS (arg0);
10670 STRIP_NOPS (arg1);
10671 STRIP_NOPS (arg2);
10673 if (TREE_CODE (arg0) == VECTOR_CST
10674 && TREE_CODE (arg1) == VECTOR_CST
10675 && TREE_CODE (arg2) == INTEGER_CST)
10677 bool overflow = false;
10678 double_int result = TREE_INT_CST (arg2);
10679 double_int tmp;
10680 unsigned i;
10682 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10684 double_int e0 = TREE_INT_CST (VECTOR_CST_ELT (arg0, i));
10685 double_int e1 = TREE_INT_CST (VECTOR_CST_ELT (arg1, i));
10687 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
10689 tmp = e1.neg_with_overflow (&neg1_ovf);
10690 tmp = e0.add_with_sign (tmp, false, &add1_ovf);
10691 if (tmp.is_negative ())
10692 tmp = tmp.neg_with_overflow (&neg2_ovf);
10693 else
10694 neg2_ovf = false;
10695 result = result.add_with_sign (tmp, false, &add2_ovf);
10696 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
10699 gcc_assert (!overflow);
10701 return build_int_cst_wide (rtype, result.low, result.high);
10704 default:
10705 break;
10708 return NULL_TREE;
10711 /* ??? This duplicates information provided to the compiler by the
10712 ??? scheduler description. Some day, teach genautomata to output
10713 ??? the latencies and then CSE will just use that. */
10715 static bool
10716 sparc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10717 int *total, bool speed ATTRIBUTE_UNUSED)
10719 enum machine_mode mode = GET_MODE (x);
10720 bool float_mode_p = FLOAT_MODE_P (mode);
10722 switch (code)
10724 case CONST_INT:
10725 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
10727 *total = 0;
10728 return true;
10730 /* FALLTHRU */
10732 case HIGH:
10733 *total = 2;
10734 return true;
10736 case CONST:
10737 case LABEL_REF:
10738 case SYMBOL_REF:
10739 *total = 4;
10740 return true;
10742 case CONST_DOUBLE:
10743 if (GET_MODE (x) == VOIDmode
10744 && ((CONST_DOUBLE_HIGH (x) == 0
10745 && CONST_DOUBLE_LOW (x) < 0x1000)
10746 || (CONST_DOUBLE_HIGH (x) == -1
10747 && CONST_DOUBLE_LOW (x) < 0
10748 && CONST_DOUBLE_LOW (x) >= -0x1000)))
10749 *total = 0;
10750 else
10751 *total = 8;
10752 return true;
10754 case MEM:
10755 /* If outer-code was a sign or zero extension, a cost
10756 of COSTS_N_INSNS (1) was already added in. This is
10757 why we are subtracting it back out. */
10758 if (outer_code == ZERO_EXTEND)
10760 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
10762 else if (outer_code == SIGN_EXTEND)
10764 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
10766 else if (float_mode_p)
10768 *total = sparc_costs->float_load;
10770 else
10772 *total = sparc_costs->int_load;
10775 return true;
10777 case PLUS:
10778 case MINUS:
10779 if (float_mode_p)
10780 *total = sparc_costs->float_plusminus;
10781 else
10782 *total = COSTS_N_INSNS (1);
10783 return false;
10785 case FMA:
10787 rtx sub;
10789 gcc_assert (float_mode_p);
10790 *total = sparc_costs->float_mul;
10792 sub = XEXP (x, 0);
10793 if (GET_CODE (sub) == NEG)
10794 sub = XEXP (sub, 0);
10795 *total += rtx_cost (sub, FMA, 0, speed);
10797 sub = XEXP (x, 2);
10798 if (GET_CODE (sub) == NEG)
10799 sub = XEXP (sub, 0);
10800 *total += rtx_cost (sub, FMA, 2, speed);
10801 return true;
10804 case MULT:
10805 if (float_mode_p)
10806 *total = sparc_costs->float_mul;
10807 else if (! TARGET_HARD_MUL)
10808 *total = COSTS_N_INSNS (25);
10809 else
10811 int bit_cost;
10813 bit_cost = 0;
10814 if (sparc_costs->int_mul_bit_factor)
10816 int nbits;
10818 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
10820 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
10821 for (nbits = 0; value != 0; value &= value - 1)
10822 nbits++;
10824 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
10825 && GET_MODE (XEXP (x, 1)) == VOIDmode)
10827 rtx x1 = XEXP (x, 1);
10828 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
10829 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
10831 for (nbits = 0; value1 != 0; value1 &= value1 - 1)
10832 nbits++;
10833 for (; value2 != 0; value2 &= value2 - 1)
10834 nbits++;
10836 else
10837 nbits = 7;
10839 if (nbits < 3)
10840 nbits = 3;
10841 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
10842 bit_cost = COSTS_N_INSNS (bit_cost);
10845 if (mode == DImode)
10846 *total = sparc_costs->int_mulX + bit_cost;
10847 else
10848 *total = sparc_costs->int_mul + bit_cost;
10850 return false;
10852 case ASHIFT:
10853 case ASHIFTRT:
10854 case LSHIFTRT:
10855 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
10856 return false;
10858 case DIV:
10859 case UDIV:
10860 case MOD:
10861 case UMOD:
10862 if (float_mode_p)
10864 if (mode == DFmode)
10865 *total = sparc_costs->float_div_df;
10866 else
10867 *total = sparc_costs->float_div_sf;
10869 else
10871 if (mode == DImode)
10872 *total = sparc_costs->int_divX;
10873 else
10874 *total = sparc_costs->int_div;
10876 return false;
10878 case NEG:
10879 if (! float_mode_p)
10881 *total = COSTS_N_INSNS (1);
10882 return false;
10884 /* FALLTHRU */
10886 case ABS:
10887 case FLOAT:
10888 case UNSIGNED_FLOAT:
10889 case FIX:
10890 case UNSIGNED_FIX:
10891 case FLOAT_EXTEND:
10892 case FLOAT_TRUNCATE:
10893 *total = sparc_costs->float_move;
10894 return false;
10896 case SQRT:
10897 if (mode == DFmode)
10898 *total = sparc_costs->float_sqrt_df;
10899 else
10900 *total = sparc_costs->float_sqrt_sf;
10901 return false;
10903 case COMPARE:
10904 if (float_mode_p)
10905 *total = sparc_costs->float_cmp;
10906 else
10907 *total = COSTS_N_INSNS (1);
10908 return false;
10910 case IF_THEN_ELSE:
10911 if (float_mode_p)
10912 *total = sparc_costs->float_cmove;
10913 else
10914 *total = sparc_costs->int_cmove;
10915 return false;
10917 case IOR:
10918 /* Handle the NAND vector patterns. */
10919 if (sparc_vector_mode_supported_p (GET_MODE (x))
10920 && GET_CODE (XEXP (x, 0)) == NOT
10921 && GET_CODE (XEXP (x, 1)) == NOT)
10923 *total = COSTS_N_INSNS (1);
10924 return true;
10926 else
10927 return false;
10929 default:
10930 return false;
10934 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
10936 static inline bool
10937 general_or_i64_p (reg_class_t rclass)
10939 return (rclass == GENERAL_REGS || rclass == I64_REGS);
10942 /* Implement TARGET_REGISTER_MOVE_COST. */
10944 static int
10945 sparc_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
10946 reg_class_t from, reg_class_t to)
10948 bool need_memory = false;
10950 if (from == FPCC_REGS || to == FPCC_REGS)
10951 need_memory = true;
10952 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
10953 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
10955 if (TARGET_VIS3)
10957 int size = GET_MODE_SIZE (mode);
10958 if (size == 8 || size == 4)
10960 if (! TARGET_ARCH32 || size == 4)
10961 return 4;
10962 else
10963 return 6;
10966 need_memory = true;
10969 if (need_memory)
10971 if (sparc_cpu == PROCESSOR_ULTRASPARC
10972 || sparc_cpu == PROCESSOR_ULTRASPARC3
10973 || sparc_cpu == PROCESSOR_NIAGARA
10974 || sparc_cpu == PROCESSOR_NIAGARA2
10975 || sparc_cpu == PROCESSOR_NIAGARA3
10976 || sparc_cpu == PROCESSOR_NIAGARA4)
10977 return 12;
10979 return 6;
10982 return 2;
10985 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
10986 This is achieved by means of a manual dynamic stack space allocation in
10987 the current frame. We make the assumption that SEQ doesn't contain any
10988 function calls, with the possible exception of calls to the GOT helper. */
10990 static void
10991 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
10993 /* We must preserve the lowest 16 words for the register save area. */
10994 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
10995 /* We really need only 2 words of fresh stack space. */
10996 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
10998 rtx slot
10999 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11000 SPARC_STACK_BIAS + offset));
11002 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11003 emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
11004 if (reg2)
11005 emit_insn (gen_rtx_SET (VOIDmode,
11006 adjust_address (slot, word_mode, UNITS_PER_WORD),
11007 reg2));
11008 emit_insn (seq);
11009 if (reg2)
11010 emit_insn (gen_rtx_SET (VOIDmode,
11011 reg2,
11012 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11013 emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
11014 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11017 /* Output the assembler code for a thunk function. THUNK_DECL is the
11018 declaration for the thunk function itself, FUNCTION is the decl for
11019 the target function. DELTA is an immediate constant offset to be
11020 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11021 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11023 static void
11024 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11025 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11026 tree function)
11028 rtx this_rtx, insn, funexp;
11029 unsigned int int_arg_first;
11031 reload_completed = 1;
11032 epilogue_completed = 1;
11034 emit_note (NOTE_INSN_PROLOGUE_END);
11036 if (TARGET_FLAT)
11038 sparc_leaf_function_p = 1;
11040 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11042 else if (flag_delayed_branch)
11044 /* We will emit a regular sibcall below, so we need to instruct
11045 output_sibcall that we are in a leaf function. */
11046 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11048 /* This will cause final.c to invoke leaf_renumber_regs so we
11049 must behave as if we were in a not-yet-leafified function. */
11050 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11052 else
11054 /* We will emit the sibcall manually below, so we will need to
11055 manually spill non-leaf registers. */
11056 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11058 /* We really are in a leaf function. */
11059 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11062 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11063 returns a structure, the structure return pointer is there instead. */
11064 if (TARGET_ARCH64
11065 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11066 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11067 else
11068 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11070 /* Add DELTA. When possible use a plain add, otherwise load it into
11071 a register first. */
11072 if (delta)
11074 rtx delta_rtx = GEN_INT (delta);
11076 if (! SPARC_SIMM13_P (delta))
11078 rtx scratch = gen_rtx_REG (Pmode, 1);
11079 emit_move_insn (scratch, delta_rtx);
11080 delta_rtx = scratch;
11083 /* THIS_RTX += DELTA. */
11084 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11087 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
11088 if (vcall_offset)
11090 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11091 rtx scratch = gen_rtx_REG (Pmode, 1);
11093 gcc_assert (vcall_offset < 0);
11095 /* SCRATCH = *THIS_RTX. */
11096 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11098 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11099 may not have any available scratch register at this point. */
11100 if (SPARC_SIMM13_P (vcall_offset))
11102 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11103 else if (! fixed_regs[5]
11104 /* The below sequence is made up of at least 2 insns,
11105 while the default method may need only one. */
11106 && vcall_offset < -8192)
11108 rtx scratch2 = gen_rtx_REG (Pmode, 5);
11109 emit_move_insn (scratch2, vcall_offset_rtx);
11110 vcall_offset_rtx = scratch2;
11112 else
11114 rtx increment = GEN_INT (-4096);
11116 /* VCALL_OFFSET is a negative number whose typical range can be
11117 estimated as -32768..0 in 32-bit mode. In almost all cases
11118 it is therefore cheaper to emit multiple add insns than
11119 spilling and loading the constant into a register (at least
11120 6 insns). */
11121 while (! SPARC_SIMM13_P (vcall_offset))
11123 emit_insn (gen_add2_insn (scratch, increment));
11124 vcall_offset += 4096;
11126 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11129 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
11130 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11131 gen_rtx_PLUS (Pmode,
11132 scratch,
11133 vcall_offset_rtx)));
11135 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
11136 emit_insn (gen_add2_insn (this_rtx, scratch));
11139 /* Generate a tail call to the target function. */
11140 if (! TREE_USED (function))
11142 assemble_external (function);
11143 TREE_USED (function) = 1;
11145 funexp = XEXP (DECL_RTL (function), 0);
11147 if (flag_delayed_branch)
11149 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11150 insn = emit_call_insn (gen_sibcall (funexp));
11151 SIBLING_CALL_P (insn) = 1;
11153 else
11155 /* The hoops we have to jump through in order to generate a sibcall
11156 without using delay slots... */
11157 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11159 if (flag_pic)
11161 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
11162 start_sequence ();
11163 load_got_register (); /* clobbers %o7 */
11164 scratch = sparc_legitimize_pic_address (funexp, scratch);
11165 seq = get_insns ();
11166 end_sequence ();
11167 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11169 else if (TARGET_ARCH32)
11171 emit_insn (gen_rtx_SET (VOIDmode,
11172 scratch,
11173 gen_rtx_HIGH (SImode, funexp)));
11174 emit_insn (gen_rtx_SET (VOIDmode,
11175 scratch,
11176 gen_rtx_LO_SUM (SImode, scratch, funexp)));
11178 else /* TARGET_ARCH64 */
11180 switch (sparc_cmodel)
11182 case CM_MEDLOW:
11183 case CM_MEDMID:
11184 /* The destination can serve as a temporary. */
11185 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11186 break;
11188 case CM_MEDANY:
11189 case CM_EMBMEDANY:
11190 /* The destination cannot serve as a temporary. */
11191 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11192 start_sequence ();
11193 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11194 seq = get_insns ();
11195 end_sequence ();
11196 emit_and_preserve (seq, spill_reg, 0);
11197 break;
11199 default:
11200 gcc_unreachable ();
11204 emit_jump_insn (gen_indirect_jump (scratch));
11207 emit_barrier ();
11209 /* Run just enough of rest_of_compilation to get the insns emitted.
11210 There's not really enough bulk here to make other passes such as
11211 instruction scheduling worth while. Note that use_thunk calls
11212 assemble_start_function and assemble_end_function. */
11213 insn = get_insns ();
11214 shorten_branches (insn);
11215 final_start_function (insn, file, 1);
11216 final (insn, file, 1);
11217 final_end_function ();
11219 reload_completed = 0;
11220 epilogue_completed = 0;
11223 /* Return true if sparc_output_mi_thunk would be able to output the
11224 assembler code for the thunk function specified by the arguments
11225 it is passed, and false otherwise. */
11226 static bool
11227 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11228 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11229 HOST_WIDE_INT vcall_offset,
11230 const_tree function ATTRIBUTE_UNUSED)
11232 /* Bound the loop used in the default method above. */
11233 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11236 /* How to allocate a 'struct machine_function'. */
11238 static struct machine_function *
11239 sparc_init_machine_status (void)
11241 return ggc_alloc_cleared_machine_function ();
11244 /* Locate some local-dynamic symbol still in use by this function
11245 so that we can print its name in local-dynamic base patterns. */
11247 static const char *
11248 get_some_local_dynamic_name (void)
11250 rtx insn;
11252 if (cfun->machine->some_ld_name)
11253 return cfun->machine->some_ld_name;
11255 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11256 if (INSN_P (insn)
11257 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11258 return cfun->machine->some_ld_name;
11260 gcc_unreachable ();
11263 static int
11264 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11266 rtx x = *px;
11268 if (x
11269 && GET_CODE (x) == SYMBOL_REF
11270 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11272 cfun->machine->some_ld_name = XSTR (x, 0);
11273 return 1;
11276 return 0;
11279 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11280 We need to emit DTP-relative relocations. */
11282 static void
11283 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11285 switch (size)
11287 case 4:
11288 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11289 break;
11290 case 8:
11291 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11292 break;
11293 default:
11294 gcc_unreachable ();
11296 output_addr_const (file, x);
11297 fputs (")", file);
11300 /* Do whatever processing is required at the end of a file. */
11302 static void
11303 sparc_file_end (void)
11305 /* If we need to emit the special GOT helper function, do so now. */
11306 if (got_helper_rtx)
11308 const char *name = XSTR (got_helper_rtx, 0);
11309 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11310 #ifdef DWARF2_UNWIND_INFO
11311 bool do_cfi;
11312 #endif
11314 if (USE_HIDDEN_LINKONCE)
11316 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11317 get_identifier (name),
11318 build_function_type_list (void_type_node,
11319 NULL_TREE));
11320 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11321 NULL_TREE, void_type_node);
11322 TREE_PUBLIC (decl) = 1;
11323 TREE_STATIC (decl) = 1;
11324 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11325 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11326 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11327 resolve_unique_section (decl, 0, flag_function_sections);
11328 allocate_struct_function (decl, true);
11329 cfun->is_thunk = 1;
11330 current_function_decl = decl;
11331 init_varasm_status ();
11332 assemble_start_function (decl, name);
11334 else
11336 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11337 switch_to_section (text_section);
11338 if (align > 0)
11339 ASM_OUTPUT_ALIGN (asm_out_file, align);
11340 ASM_OUTPUT_LABEL (asm_out_file, name);
11343 #ifdef DWARF2_UNWIND_INFO
11344 do_cfi = dwarf2out_do_cfi_asm ();
11345 if (do_cfi)
11346 fprintf (asm_out_file, "\t.cfi_startproc\n");
11347 #endif
11348 if (flag_delayed_branch)
11349 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11350 reg_name, reg_name);
11351 else
11352 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11353 reg_name, reg_name);
11354 #ifdef DWARF2_UNWIND_INFO
11355 if (do_cfi)
11356 fprintf (asm_out_file, "\t.cfi_endproc\n");
11357 #endif
11360 if (NEED_INDICATE_EXEC_STACK)
11361 file_end_indicate_exec_stack ();
11363 #ifdef TARGET_SOLARIS
11364 solaris_file_end ();
11365 #endif
11368 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11369 /* Implement TARGET_MANGLE_TYPE. */
11371 static const char *
11372 sparc_mangle_type (const_tree type)
11374 if (!TARGET_64BIT
11375 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11376 && TARGET_LONG_DOUBLE_128)
11377 return "g";
11379 /* For all other types, use normal C++ mangling. */
11380 return NULL;
11382 #endif
11384 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11385 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11386 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11388 void
11389 sparc_emit_membar_for_model (enum memmodel model,
11390 int load_store, int before_after)
11392 /* Bits for the MEMBAR mmask field. */
11393 const int LoadLoad = 1;
11394 const int StoreLoad = 2;
11395 const int LoadStore = 4;
11396 const int StoreStore = 8;
11398 int mm = 0, implied = 0;
11400 switch (sparc_memory_model)
11402 case SMM_SC:
11403 /* Sequential Consistency. All memory transactions are immediately
11404 visible in sequential execution order. No barriers needed. */
11405 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11406 break;
11408 case SMM_TSO:
11409 /* Total Store Ordering: all memory transactions with store semantics
11410 are followed by an implied StoreStore. */
11411 implied |= StoreStore;
11413 /* If we're not looking for a raw barrer (before+after), then atomic
11414 operations get the benefit of being both load and store. */
11415 if (load_store == 3 && before_after == 1)
11416 implied |= StoreLoad;
11417 /* FALLTHRU */
11419 case SMM_PSO:
11420 /* Partial Store Ordering: all memory transactions with load semantics
11421 are followed by an implied LoadLoad | LoadStore. */
11422 implied |= LoadLoad | LoadStore;
11424 /* If we're not looking for a raw barrer (before+after), then atomic
11425 operations get the benefit of being both load and store. */
11426 if (load_store == 3 && before_after == 2)
11427 implied |= StoreLoad | StoreStore;
11428 /* FALLTHRU */
11430 case SMM_RMO:
11431 /* Relaxed Memory Ordering: no implicit bits. */
11432 break;
11434 default:
11435 gcc_unreachable ();
11438 if (before_after & 1)
11440 if (model == MEMMODEL_RELEASE
11441 || model == MEMMODEL_ACQ_REL
11442 || model == MEMMODEL_SEQ_CST)
11444 if (load_store & 1)
11445 mm |= LoadLoad | StoreLoad;
11446 if (load_store & 2)
11447 mm |= LoadStore | StoreStore;
11450 if (before_after & 2)
11452 if (model == MEMMODEL_ACQUIRE
11453 || model == MEMMODEL_ACQ_REL
11454 || model == MEMMODEL_SEQ_CST)
11456 if (load_store & 1)
11457 mm |= LoadLoad | LoadStore;
11458 if (load_store & 2)
11459 mm |= StoreLoad | StoreStore;
11463 /* Remove the bits implied by the system memory model. */
11464 mm &= ~implied;
11466 /* For raw barriers (before+after), always emit a barrier.
11467 This will become a compile-time barrier if needed. */
11468 if (mm || before_after == 3)
11469 emit_insn (gen_membar (GEN_INT (mm)));
11472 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11473 compare and swap on the word containing the byte or half-word. */
11475 static void
11476 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11477 rtx oldval, rtx newval)
11479 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11480 rtx addr = gen_reg_rtx (Pmode);
11481 rtx off = gen_reg_rtx (SImode);
11482 rtx oldv = gen_reg_rtx (SImode);
11483 rtx newv = gen_reg_rtx (SImode);
11484 rtx oldvalue = gen_reg_rtx (SImode);
11485 rtx newvalue = gen_reg_rtx (SImode);
11486 rtx res = gen_reg_rtx (SImode);
11487 rtx resv = gen_reg_rtx (SImode);
11488 rtx memsi, val, mask, end_label, loop_label, cc;
11490 emit_insn (gen_rtx_SET (VOIDmode, addr,
11491 gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11493 if (Pmode != SImode)
11494 addr1 = gen_lowpart (SImode, addr1);
11495 emit_insn (gen_rtx_SET (VOIDmode, off,
11496 gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11498 memsi = gen_rtx_MEM (SImode, addr);
11499 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11500 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11502 val = copy_to_reg (memsi);
11504 emit_insn (gen_rtx_SET (VOIDmode, off,
11505 gen_rtx_XOR (SImode, off,
11506 GEN_INT (GET_MODE (mem) == QImode
11507 ? 3 : 2))));
11509 emit_insn (gen_rtx_SET (VOIDmode, off,
11510 gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11512 if (GET_MODE (mem) == QImode)
11513 mask = force_reg (SImode, GEN_INT (0xff));
11514 else
11515 mask = force_reg (SImode, GEN_INT (0xffff));
11517 emit_insn (gen_rtx_SET (VOIDmode, mask,
11518 gen_rtx_ASHIFT (SImode, mask, off)));
11520 emit_insn (gen_rtx_SET (VOIDmode, val,
11521 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11522 val)));
11524 oldval = gen_lowpart (SImode, oldval);
11525 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11526 gen_rtx_ASHIFT (SImode, oldval, off)));
11528 newval = gen_lowpart_common (SImode, newval);
11529 emit_insn (gen_rtx_SET (VOIDmode, newv,
11530 gen_rtx_ASHIFT (SImode, newval, off)));
11532 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11533 gen_rtx_AND (SImode, oldv, mask)));
11535 emit_insn (gen_rtx_SET (VOIDmode, newv,
11536 gen_rtx_AND (SImode, newv, mask)));
11538 end_label = gen_label_rtx ();
11539 loop_label = gen_label_rtx ();
11540 emit_label (loop_label);
11542 emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
11543 gen_rtx_IOR (SImode, oldv, val)));
11545 emit_insn (gen_rtx_SET (VOIDmode, newvalue,
11546 gen_rtx_IOR (SImode, newv, val)));
11548 emit_move_insn (bool_result, const1_rtx);
11550 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11552 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11554 emit_insn (gen_rtx_SET (VOIDmode, resv,
11555 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11556 res)));
11558 emit_move_insn (bool_result, const0_rtx);
11560 cc = gen_compare_reg_1 (NE, resv, val);
11561 emit_insn (gen_rtx_SET (VOIDmode, val, resv));
11563 /* Use cbranchcc4 to separate the compare and branch! */
11564 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11565 cc, const0_rtx, loop_label));
11567 emit_label (end_label);
11569 emit_insn (gen_rtx_SET (VOIDmode, res,
11570 gen_rtx_AND (SImode, res, mask)));
11572 emit_insn (gen_rtx_SET (VOIDmode, res,
11573 gen_rtx_LSHIFTRT (SImode, res, off)));
11575 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11578 /* Expand code to perform a compare-and-swap. */
11580 void
11581 sparc_expand_compare_and_swap (rtx operands[])
11583 rtx bval, retval, mem, oldval, newval;
11584 enum machine_mode mode;
11585 enum memmodel model;
11587 bval = operands[0];
11588 retval = operands[1];
11589 mem = operands[2];
11590 oldval = operands[3];
11591 newval = operands[4];
11592 model = (enum memmodel) INTVAL (operands[6]);
11593 mode = GET_MODE (mem);
11595 sparc_emit_membar_for_model (model, 3, 1);
11597 if (reg_overlap_mentioned_p (retval, oldval))
11598 oldval = copy_to_reg (oldval);
11600 if (mode == QImode || mode == HImode)
11601 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11602 else
11604 rtx (*gen) (rtx, rtx, rtx, rtx);
11605 rtx x;
11607 if (mode == SImode)
11608 gen = gen_atomic_compare_and_swapsi_1;
11609 else
11610 gen = gen_atomic_compare_and_swapdi_1;
11611 emit_insn (gen (retval, mem, oldval, newval));
11613 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11614 if (x != bval)
11615 convert_move (bval, x, 1);
11618 sparc_emit_membar_for_model (model, 3, 2);
11621 void
11622 sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel)
11624 rtx t_1, t_2, t_3;
11626 sel = gen_lowpart (DImode, sel);
11627 switch (vmode)
11629 case V2SImode:
11630 /* inp = xxxxxxxAxxxxxxxB */
11631 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11632 NULL_RTX, 1, OPTAB_DIRECT);
11633 /* t_1 = ....xxxxxxxAxxx. */
11634 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11635 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11636 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11637 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11638 /* sel = .......B */
11639 /* t_1 = ...A.... */
11640 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11641 /* sel = ...A...B */
11642 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11643 /* sel = AAAABBBB * 4 */
11644 t_1 = force_reg (SImode, GEN_INT (0x01230123));
11645 /* sel = { A*4, A*4+1, A*4+2, ... } */
11646 break;
11648 case V4HImode:
11649 /* inp = xxxAxxxBxxxCxxxD */
11650 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11651 NULL_RTX, 1, OPTAB_DIRECT);
11652 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11653 NULL_RTX, 1, OPTAB_DIRECT);
11654 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11655 NULL_RTX, 1, OPTAB_DIRECT);
11656 /* t_1 = ..xxxAxxxBxxxCxx */
11657 /* t_2 = ....xxxAxxxBxxxC */
11658 /* t_3 = ......xxxAxxxBxx */
11659 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11660 GEN_INT (0x07),
11661 NULL_RTX, 1, OPTAB_DIRECT);
11662 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11663 GEN_INT (0x0700),
11664 NULL_RTX, 1, OPTAB_DIRECT);
11665 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11666 GEN_INT (0x070000),
11667 NULL_RTX, 1, OPTAB_DIRECT);
11668 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11669 GEN_INT (0x07000000),
11670 NULL_RTX, 1, OPTAB_DIRECT);
11671 /* sel = .......D */
11672 /* t_1 = .....C.. */
11673 /* t_2 = ...B.... */
11674 /* t_3 = .A...... */
11675 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11676 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11677 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11678 /* sel = .A.B.C.D */
11679 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11680 /* sel = AABBCCDD * 2 */
11681 t_1 = force_reg (SImode, GEN_INT (0x01010101));
11682 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11683 break;
11685 case V8QImode:
11686 /* input = xAxBxCxDxExFxGxH */
11687 sel = expand_simple_binop (DImode, AND, sel,
11688 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11689 | 0x0f0f0f0f),
11690 NULL_RTX, 1, OPTAB_DIRECT);
11691 /* sel = .A.B.C.D.E.F.G.H */
11692 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11693 NULL_RTX, 1, OPTAB_DIRECT);
11694 /* t_1 = ..A.B.C.D.E.F.G. */
11695 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11696 NULL_RTX, 1, OPTAB_DIRECT);
11697 /* sel = .AABBCCDDEEFFGGH */
11698 sel = expand_simple_binop (DImode, AND, sel,
11699 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11700 | 0xff00ff),
11701 NULL_RTX, 1, OPTAB_DIRECT);
11702 /* sel = ..AB..CD..EF..GH */
11703 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11704 NULL_RTX, 1, OPTAB_DIRECT);
11705 /* t_1 = ....AB..CD..EF.. */
11706 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11707 NULL_RTX, 1, OPTAB_DIRECT);
11708 /* sel = ..ABABCDCDEFEFGH */
11709 sel = expand_simple_binop (DImode, AND, sel,
11710 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11711 NULL_RTX, 1, OPTAB_DIRECT);
11712 /* sel = ....ABCD....EFGH */
11713 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11714 NULL_RTX, 1, OPTAB_DIRECT);
11715 /* t_1 = ........ABCD.... */
11716 sel = gen_lowpart (SImode, sel);
11717 t_1 = gen_lowpart (SImode, t_1);
11718 break;
11720 default:
11721 gcc_unreachable ();
11724 /* Always perform the final addition/merge within the bmask insn. */
11725 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
11728 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
11730 static bool
11731 sparc_frame_pointer_required (void)
11733 /* If the stack pointer is dynamically modified in the function, it cannot
11734 serve as the frame pointer. */
11735 if (cfun->calls_alloca)
11736 return true;
11738 /* If the function receives nonlocal gotos, it needs to save the frame
11739 pointer in the nonlocal_goto_save_area object. */
11740 if (cfun->has_nonlocal_label)
11741 return true;
11743 /* In flat mode, that's it. */
11744 if (TARGET_FLAT)
11745 return false;
11747 /* Otherwise, the frame pointer is required if the function isn't leaf. */
11748 return !(crtl->is_leaf && only_leaf_regs_used ());
11751 /* The way this is structured, we can't eliminate SFP in favor of SP
11752 if the frame pointer is required: we want to use the SFP->HFP elimination
11753 in that case. But the test in update_eliminables doesn't know we are
11754 assuming below that we only do the former elimination. */
11756 static bool
11757 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
11759 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
11762 /* Return the hard frame pointer directly to bypass the stack bias. */
11764 static rtx
11765 sparc_builtin_setjmp_frame_value (void)
11767 return hard_frame_pointer_rtx;
11770 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
11771 they won't be allocated. */
11773 static void
11774 sparc_conditional_register_usage (void)
11776 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
11778 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11779 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11781 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
11782 /* then honor it. */
11783 if (TARGET_ARCH32 && fixed_regs[5])
11784 fixed_regs[5] = 1;
11785 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
11786 fixed_regs[5] = 0;
11787 if (! TARGET_V9)
11789 int regno;
11790 for (regno = SPARC_FIRST_V9_FP_REG;
11791 regno <= SPARC_LAST_V9_FP_REG;
11792 regno++)
11793 fixed_regs[regno] = 1;
11794 /* %fcc0 is used by v8 and v9. */
11795 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
11796 regno <= SPARC_LAST_V9_FCC_REG;
11797 regno++)
11798 fixed_regs[regno] = 1;
11800 if (! TARGET_FPU)
11802 int regno;
11803 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
11804 fixed_regs[regno] = 1;
11806 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
11807 /* then honor it. Likewise with g3 and g4. */
11808 if (fixed_regs[2] == 2)
11809 fixed_regs[2] = ! TARGET_APP_REGS;
11810 if (fixed_regs[3] == 2)
11811 fixed_regs[3] = ! TARGET_APP_REGS;
11812 if (TARGET_ARCH32 && fixed_regs[4] == 2)
11813 fixed_regs[4] = ! TARGET_APP_REGS;
11814 else if (TARGET_CM_EMBMEDANY)
11815 fixed_regs[4] = 1;
11816 else if (fixed_regs[4] == 2)
11817 fixed_regs[4] = 0;
11818 if (TARGET_FLAT)
11820 int regno;
11821 /* Disable leaf functions. */
11822 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
11823 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11824 leaf_reg_remap [regno] = regno;
11826 if (TARGET_VIS)
11827 global_regs[SPARC_GSR_REG] = 1;
11830 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
11832 - We can't load constants into FP registers.
11833 - We can't load FP constants into integer registers when soft-float,
11834 because there is no soft-float pattern with a r/F constraint.
11835 - We can't load FP constants into integer registers for TFmode unless
11836 it is 0.0L, because there is no movtf pattern with a r/F constraint.
11837 - Try and reload integer constants (symbolic or otherwise) back into
11838 registers directly, rather than having them dumped to memory. */
11840 static reg_class_t
11841 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
11843 enum machine_mode mode = GET_MODE (x);
11844 if (CONSTANT_P (x))
11846 if (FP_REG_CLASS_P (rclass)
11847 || rclass == GENERAL_OR_FP_REGS
11848 || rclass == GENERAL_OR_EXTRA_FP_REGS
11849 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
11850 || (mode == TFmode && ! const_zero_operand (x, mode)))
11851 return NO_REGS;
11853 if (GET_MODE_CLASS (mode) == MODE_INT)
11854 return GENERAL_REGS;
11856 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
11858 if (! FP_REG_CLASS_P (rclass)
11859 || !(const_zero_operand (x, mode)
11860 || const_all_ones_operand (x, mode)))
11861 return NO_REGS;
11865 if (TARGET_VIS3
11866 && ! TARGET_ARCH64
11867 && (rclass == EXTRA_FP_REGS
11868 || rclass == GENERAL_OR_EXTRA_FP_REGS))
11870 int regno = true_regnum (x);
11872 if (SPARC_INT_REG_P (regno))
11873 return (rclass == EXTRA_FP_REGS
11874 ? FP_REGS : GENERAL_OR_FP_REGS);
11877 return rclass;
11880 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
11881 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
11883 const char *
11884 output_v8plus_mult (rtx insn, rtx *operands, const char *opcode)
11886 char mulstr[32];
11888 gcc_assert (! TARGET_ARCH64);
11890 if (sparc_check_64 (operands[1], insn) <= 0)
11891 output_asm_insn ("srl\t%L1, 0, %L1", operands);
11892 if (which_alternative == 1)
11893 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
11894 if (GET_CODE (operands[2]) == CONST_INT)
11896 if (which_alternative == 1)
11898 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11899 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
11900 output_asm_insn (mulstr, operands);
11901 return "srlx\t%L0, 32, %H0";
11903 else
11905 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11906 output_asm_insn ("or\t%L1, %3, %3", operands);
11907 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
11908 output_asm_insn (mulstr, operands);
11909 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11910 return "mov\t%3, %L0";
11913 else if (rtx_equal_p (operands[1], operands[2]))
11915 if (which_alternative == 1)
11917 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11918 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
11919 output_asm_insn (mulstr, operands);
11920 return "srlx\t%L0, 32, %H0";
11922 else
11924 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11925 output_asm_insn ("or\t%L1, %3, %3", operands);
11926 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
11927 output_asm_insn (mulstr, operands);
11928 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11929 return "mov\t%3, %L0";
11932 if (sparc_check_64 (operands[2], insn) <= 0)
11933 output_asm_insn ("srl\t%L2, 0, %L2", operands);
11934 if (which_alternative == 1)
11936 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11937 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
11938 output_asm_insn ("or\t%L2, %L1, %L1", operands);
11939 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
11940 output_asm_insn (mulstr, operands);
11941 return "srlx\t%L0, 32, %H0";
11943 else
11945 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11946 output_asm_insn ("sllx\t%H2, 32, %4", operands);
11947 output_asm_insn ("or\t%L1, %3, %3", operands);
11948 output_asm_insn ("or\t%L2, %4, %4", operands);
11949 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
11950 output_asm_insn (mulstr, operands);
11951 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11952 return "mov\t%3, %L0";
11956 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11957 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
11958 and INNER_MODE are the modes describing TARGET. */
11960 static void
11961 vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode,
11962 enum machine_mode inner_mode)
11964 rtx t1, final_insn;
11965 int bmask;
11967 t1 = gen_reg_rtx (mode);
11969 elt = convert_modes (SImode, inner_mode, elt, true);
11970 emit_move_insn (gen_lowpart(SImode, t1), elt);
11972 switch (mode)
11974 case V2SImode:
11975 final_insn = gen_bshufflev2si_vis (target, t1, t1);
11976 bmask = 0x45674567;
11977 break;
11978 case V4HImode:
11979 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
11980 bmask = 0x67676767;
11981 break;
11982 case V8QImode:
11983 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
11984 bmask = 0x77777777;
11985 break;
11986 default:
11987 gcc_unreachable ();
11990 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), CONST0_RTX (SImode),
11991 force_reg (SImode, GEN_INT (bmask))));
11992 emit_insn (final_insn);
11995 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11996 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
11998 static void
11999 vector_init_fpmerge (rtx target, rtx elt)
12001 rtx t1, t2, t2_low, t3, t3_low;
12003 t1 = gen_reg_rtx (V4QImode);
12004 elt = convert_modes (SImode, QImode, elt, true);
12005 emit_move_insn (gen_lowpart (SImode, t1), elt);
12007 t2 = gen_reg_rtx (V8QImode);
12008 t2_low = gen_lowpart (V4QImode, t2);
12009 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12011 t3 = gen_reg_rtx (V8QImode);
12012 t3_low = gen_lowpart (V4QImode, t3);
12013 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12015 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12018 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12019 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12021 static void
12022 vector_init_faligndata (rtx target, rtx elt)
12024 rtx t1 = gen_reg_rtx (V4HImode);
12025 int i;
12027 elt = convert_modes (SImode, HImode, elt, true);
12028 emit_move_insn (gen_lowpart (SImode, t1), elt);
12030 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12031 force_reg (SImode, GEN_INT (6)),
12032 const0_rtx));
12034 for (i = 0; i < 4; i++)
12035 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12038 /* Emit code to initialize TARGET to values for individual fields VALS. */
12040 void
12041 sparc_expand_vector_init (rtx target, rtx vals)
12043 const enum machine_mode mode = GET_MODE (target);
12044 const enum machine_mode inner_mode = GET_MODE_INNER (mode);
12045 const int n_elts = GET_MODE_NUNITS (mode);
12046 int i, n_var = 0;
12047 bool all_same;
12048 rtx mem;
12050 all_same = true;
12051 for (i = 0; i < n_elts; i++)
12053 rtx x = XVECEXP (vals, 0, i);
12054 if (!CONSTANT_P (x))
12055 n_var++;
12057 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12058 all_same = false;
12061 if (n_var == 0)
12063 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12064 return;
12067 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12069 if (GET_MODE_SIZE (inner_mode) == 4)
12071 emit_move_insn (gen_lowpart (SImode, target),
12072 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12073 return;
12075 else if (GET_MODE_SIZE (inner_mode) == 8)
12077 emit_move_insn (gen_lowpart (DImode, target),
12078 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12079 return;
12082 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12083 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12085 emit_move_insn (gen_highpart (word_mode, target),
12086 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12087 emit_move_insn (gen_lowpart (word_mode, target),
12088 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12089 return;
12092 if (all_same && GET_MODE_SIZE (mode) == 8)
12094 if (TARGET_VIS2)
12096 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12097 return;
12099 if (mode == V8QImode)
12101 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12102 return;
12104 if (mode == V4HImode)
12106 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12107 return;
12111 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12112 for (i = 0; i < n_elts; i++)
12113 emit_move_insn (adjust_address_nv (mem, inner_mode,
12114 i * GET_MODE_SIZE (inner_mode)),
12115 XVECEXP (vals, 0, i));
12116 emit_move_insn (target, mem);
12119 /* Implement TARGET_SECONDARY_RELOAD. */
12121 static reg_class_t
12122 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12123 enum machine_mode mode, secondary_reload_info *sri)
12125 enum reg_class rclass = (enum reg_class) rclass_i;
12127 sri->icode = CODE_FOR_nothing;
12128 sri->extra_cost = 0;
12130 /* We need a temporary when loading/storing a HImode/QImode value
12131 between memory and the FPU registers. This can happen when combine puts
12132 a paradoxical subreg in a float/fix conversion insn. */
12133 if (FP_REG_CLASS_P (rclass)
12134 && (mode == HImode || mode == QImode)
12135 && (GET_CODE (x) == MEM
12136 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12137 && true_regnum (x) == -1)))
12138 return GENERAL_REGS;
12140 /* On 32-bit we need a temporary when loading/storing a DFmode value
12141 between unaligned memory and the upper FPU registers. */
12142 if (TARGET_ARCH32
12143 && rclass == EXTRA_FP_REGS
12144 && mode == DFmode
12145 && GET_CODE (x) == MEM
12146 && ! mem_min_alignment (x, 8))
12147 return FP_REGS;
12149 if (((TARGET_CM_MEDANY
12150 && symbolic_operand (x, mode))
12151 || (TARGET_CM_EMBMEDANY
12152 && text_segment_operand (x, mode)))
12153 && ! flag_pic)
12155 if (in_p)
12156 sri->icode = direct_optab_handler (reload_in_optab, mode);
12157 else
12158 sri->icode = direct_optab_handler (reload_out_optab, mode);
12159 return NO_REGS;
12162 if (TARGET_VIS3 && TARGET_ARCH32)
12164 int regno = true_regnum (x);
12166 /* When using VIS3 fp<-->int register moves, on 32-bit we have
12167 to move 8-byte values in 4-byte pieces. This only works via
12168 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
12169 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12170 an FP_REGS intermediate move. */
12171 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12172 || ((general_or_i64_p (rclass)
12173 || rclass == GENERAL_OR_FP_REGS)
12174 && SPARC_FP_REG_P (regno)))
12176 sri->extra_cost = 2;
12177 return FP_REGS;
12181 return NO_REGS;
12184 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12185 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
12187 bool
12188 sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
12190 enum rtx_code rc = GET_CODE (operands[1]);
12191 enum machine_mode cmp_mode;
12192 rtx cc_reg, dst, cmp;
12194 cmp = operands[1];
12195 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12196 return false;
12198 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12199 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12201 cmp_mode = GET_MODE (XEXP (cmp, 0));
12202 rc = GET_CODE (cmp);
12204 dst = operands[0];
12205 if (! rtx_equal_p (operands[2], dst)
12206 && ! rtx_equal_p (operands[3], dst))
12208 if (reg_overlap_mentioned_p (dst, cmp))
12209 dst = gen_reg_rtx (mode);
12211 emit_move_insn (dst, operands[3]);
12213 else if (operands[2] == dst)
12215 operands[2] = operands[3];
12217 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12218 rc = reverse_condition_maybe_unordered (rc);
12219 else
12220 rc = reverse_condition (rc);
12223 if (XEXP (cmp, 1) == const0_rtx
12224 && GET_CODE (XEXP (cmp, 0)) == REG
12225 && cmp_mode == DImode
12226 && v9_regcmp_p (rc))
12227 cc_reg = XEXP (cmp, 0);
12228 else
12229 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12231 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12233 emit_insn (gen_rtx_SET (VOIDmode, dst,
12234 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12236 if (dst != operands[0])
12237 emit_move_insn (operands[0], dst);
12239 return true;
12242 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12243 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12244 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12245 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12246 code to be used for the condition mask. */
12248 void
12249 sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode)
12251 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12252 enum rtx_code code = GET_CODE (operands[3]);
12254 mask = gen_reg_rtx (Pmode);
12255 cop0 = operands[4];
12256 cop1 = operands[5];
12257 if (code == LT || code == GE)
12259 rtx t;
12261 code = swap_condition (code);
12262 t = cop0; cop0 = cop1; cop1 = t;
12265 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12267 fcmp = gen_rtx_UNSPEC (Pmode,
12268 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12269 fcode);
12271 cmask = gen_rtx_UNSPEC (DImode,
12272 gen_rtvec (2, mask, gsr),
12273 ccode);
12275 bshuf = gen_rtx_UNSPEC (mode,
12276 gen_rtvec (3, operands[1], operands[2], gsr),
12277 UNSPEC_BSHUFFLE);
12279 emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp));
12280 emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask));
12282 emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf));
12285 /* On sparc, any mode which naturally allocates into the float
12286 registers should return 4 here. */
12288 unsigned int
12289 sparc_regmode_natural_size (enum machine_mode mode)
12291 int size = UNITS_PER_WORD;
12293 if (TARGET_ARCH64)
12295 enum mode_class mclass = GET_MODE_CLASS (mode);
12297 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12298 size = 4;
12301 return size;
12304 /* Return TRUE if it is a good idea to tie two pseudo registers
12305 when one has mode MODE1 and one has mode MODE2.
12306 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12307 for any hard reg, then this must be FALSE for correct output.
12309 For V9 we have to deal with the fact that only the lower 32 floating
12310 point registers are 32-bit addressable. */
12312 bool
12313 sparc_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
12315 enum mode_class mclass1, mclass2;
12316 unsigned short size1, size2;
12318 if (mode1 == mode2)
12319 return true;
12321 mclass1 = GET_MODE_CLASS (mode1);
12322 mclass2 = GET_MODE_CLASS (mode2);
12323 if (mclass1 != mclass2)
12324 return false;
12326 if (! TARGET_V9)
12327 return true;
12329 /* Classes are the same and we are V9 so we have to deal with upper
12330 vs. lower floating point registers. If one of the modes is a
12331 4-byte mode, and the other is not, we have to mark them as not
12332 tieable because only the lower 32 floating point register are
12333 addressable 32-bits at a time.
12335 We can't just test explicitly for SFmode, otherwise we won't
12336 cover the vector mode cases properly. */
12338 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12339 return true;
12341 size1 = GET_MODE_SIZE (mode1);
12342 size2 = GET_MODE_SIZE (mode2);
12343 if ((size1 > 4 && size2 == 4)
12344 || (size2 > 4 && size1 == 4))
12345 return false;
12347 return true;
12350 #include "gt-sparc.h"