Sync with upstream 4.9 branch
[official-gcc.git] / embedded-4_9-branch / gcc / config / sparc / sparc.c
blobd00c7b6fef5c8d7dac6c29129164d6311dd59d3f
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2014 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stringpool.h"
29 #include "stor-layout.h"
30 #include "calls.h"
31 #include "varasm.h"
32 #include "rtl.h"
33 #include "regs.h"
34 #include "hard-reg-set.h"
35 #include "insn-config.h"
36 #include "insn-codes.h"
37 #include "conditions.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "flags.h"
41 #include "function.h"
42 #include "except.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "recog.h"
46 #include "diagnostic-core.h"
47 #include "ggc.h"
48 #include "tm_p.h"
49 #include "debug.h"
50 #include "target.h"
51 #include "target-def.h"
52 #include "common/common-target.h"
53 #include "pointer-set.h"
54 #include "hash-table.h"
55 #include "vec.h"
56 #include "basic-block.h"
57 #include "tree-ssa-alias.h"
58 #include "internal-fn.h"
59 #include "gimple-fold.h"
60 #include "tree-eh.h"
61 #include "gimple-expr.h"
62 #include "is-a.h"
63 #include "gimple.h"
64 #include "gimplify.h"
65 #include "langhooks.h"
66 #include "reload.h"
67 #include "params.h"
68 #include "df.h"
69 #include "opts.h"
70 #include "tree-pass.h"
71 #include "context.h"
73 /* Processor costs */
75 struct processor_costs {
76 /* Integer load */
77 const int int_load;
79 /* Integer signed load */
80 const int int_sload;
82 /* Integer zeroed load */
83 const int int_zload;
85 /* Float load */
86 const int float_load;
88 /* fmov, fneg, fabs */
89 const int float_move;
91 /* fadd, fsub */
92 const int float_plusminus;
94 /* fcmp */
95 const int float_cmp;
97 /* fmov, fmovr */
98 const int float_cmove;
100 /* fmul */
101 const int float_mul;
103 /* fdivs */
104 const int float_div_sf;
106 /* fdivd */
107 const int float_div_df;
109 /* fsqrts */
110 const int float_sqrt_sf;
112 /* fsqrtd */
113 const int float_sqrt_df;
115 /* umul/smul */
116 const int int_mul;
118 /* mulX */
119 const int int_mulX;
121 /* integer multiply cost for each bit set past the most
122 significant 3, so the formula for multiply cost becomes:
124 if (rs1 < 0)
125 highest_bit = highest_clear_bit(rs1);
126 else
127 highest_bit = highest_set_bit(rs1);
128 if (highest_bit < 3)
129 highest_bit = 3;
130 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
132 A value of zero indicates that the multiply costs is fixed,
133 and not variable. */
134 const int int_mul_bit_factor;
136 /* udiv/sdiv */
137 const int int_div;
139 /* divX */
140 const int int_divX;
142 /* movcc, movr */
143 const int int_cmove;
145 /* penalty for shifts, due to scheduling rules etc. */
146 const int shift_penalty;
149 static const
150 struct processor_costs cypress_costs = {
151 COSTS_N_INSNS (2), /* int load */
152 COSTS_N_INSNS (2), /* int signed load */
153 COSTS_N_INSNS (2), /* int zeroed load */
154 COSTS_N_INSNS (2), /* float load */
155 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
156 COSTS_N_INSNS (5), /* fadd, fsub */
157 COSTS_N_INSNS (1), /* fcmp */
158 COSTS_N_INSNS (1), /* fmov, fmovr */
159 COSTS_N_INSNS (7), /* fmul */
160 COSTS_N_INSNS (37), /* fdivs */
161 COSTS_N_INSNS (37), /* fdivd */
162 COSTS_N_INSNS (63), /* fsqrts */
163 COSTS_N_INSNS (63), /* fsqrtd */
164 COSTS_N_INSNS (1), /* imul */
165 COSTS_N_INSNS (1), /* imulX */
166 0, /* imul bit factor */
167 COSTS_N_INSNS (1), /* idiv */
168 COSTS_N_INSNS (1), /* idivX */
169 COSTS_N_INSNS (1), /* movcc/movr */
170 0, /* shift penalty */
173 static const
174 struct processor_costs supersparc_costs = {
175 COSTS_N_INSNS (1), /* int load */
176 COSTS_N_INSNS (1), /* int signed load */
177 COSTS_N_INSNS (1), /* int zeroed load */
178 COSTS_N_INSNS (0), /* float load */
179 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
180 COSTS_N_INSNS (3), /* fadd, fsub */
181 COSTS_N_INSNS (3), /* fcmp */
182 COSTS_N_INSNS (1), /* fmov, fmovr */
183 COSTS_N_INSNS (3), /* fmul */
184 COSTS_N_INSNS (6), /* fdivs */
185 COSTS_N_INSNS (9), /* fdivd */
186 COSTS_N_INSNS (12), /* fsqrts */
187 COSTS_N_INSNS (12), /* fsqrtd */
188 COSTS_N_INSNS (4), /* imul */
189 COSTS_N_INSNS (4), /* imulX */
190 0, /* imul bit factor */
191 COSTS_N_INSNS (4), /* idiv */
192 COSTS_N_INSNS (4), /* idivX */
193 COSTS_N_INSNS (1), /* movcc/movr */
194 1, /* shift penalty */
197 static const
198 struct processor_costs hypersparc_costs = {
199 COSTS_N_INSNS (1), /* int load */
200 COSTS_N_INSNS (1), /* int signed load */
201 COSTS_N_INSNS (1), /* int zeroed load */
202 COSTS_N_INSNS (1), /* float load */
203 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
204 COSTS_N_INSNS (1), /* fadd, fsub */
205 COSTS_N_INSNS (1), /* fcmp */
206 COSTS_N_INSNS (1), /* fmov, fmovr */
207 COSTS_N_INSNS (1), /* fmul */
208 COSTS_N_INSNS (8), /* fdivs */
209 COSTS_N_INSNS (12), /* fdivd */
210 COSTS_N_INSNS (17), /* fsqrts */
211 COSTS_N_INSNS (17), /* fsqrtd */
212 COSTS_N_INSNS (17), /* imul */
213 COSTS_N_INSNS (17), /* imulX */
214 0, /* imul bit factor */
215 COSTS_N_INSNS (17), /* idiv */
216 COSTS_N_INSNS (17), /* idivX */
217 COSTS_N_INSNS (1), /* movcc/movr */
218 0, /* shift penalty */
221 static const
222 struct processor_costs leon_costs = {
223 COSTS_N_INSNS (1), /* int load */
224 COSTS_N_INSNS (1), /* int signed load */
225 COSTS_N_INSNS (1), /* int zeroed load */
226 COSTS_N_INSNS (1), /* float load */
227 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
228 COSTS_N_INSNS (1), /* fadd, fsub */
229 COSTS_N_INSNS (1), /* fcmp */
230 COSTS_N_INSNS (1), /* fmov, fmovr */
231 COSTS_N_INSNS (1), /* fmul */
232 COSTS_N_INSNS (15), /* fdivs */
233 COSTS_N_INSNS (15), /* fdivd */
234 COSTS_N_INSNS (23), /* fsqrts */
235 COSTS_N_INSNS (23), /* fsqrtd */
236 COSTS_N_INSNS (5), /* imul */
237 COSTS_N_INSNS (5), /* imulX */
238 0, /* imul bit factor */
239 COSTS_N_INSNS (5), /* idiv */
240 COSTS_N_INSNS (5), /* idivX */
241 COSTS_N_INSNS (1), /* movcc/movr */
242 0, /* shift penalty */
245 static const
246 struct processor_costs leon3_costs = {
247 COSTS_N_INSNS (1), /* int load */
248 COSTS_N_INSNS (1), /* int signed load */
249 COSTS_N_INSNS (1), /* int zeroed load */
250 COSTS_N_INSNS (1), /* float load */
251 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
252 COSTS_N_INSNS (1), /* fadd, fsub */
253 COSTS_N_INSNS (1), /* fcmp */
254 COSTS_N_INSNS (1), /* fmov, fmovr */
255 COSTS_N_INSNS (1), /* fmul */
256 COSTS_N_INSNS (14), /* fdivs */
257 COSTS_N_INSNS (15), /* fdivd */
258 COSTS_N_INSNS (22), /* fsqrts */
259 COSTS_N_INSNS (23), /* fsqrtd */
260 COSTS_N_INSNS (5), /* imul */
261 COSTS_N_INSNS (5), /* imulX */
262 0, /* imul bit factor */
263 COSTS_N_INSNS (35), /* idiv */
264 COSTS_N_INSNS (35), /* idivX */
265 COSTS_N_INSNS (1), /* movcc/movr */
266 0, /* shift penalty */
269 static const
270 struct processor_costs sparclet_costs = {
271 COSTS_N_INSNS (3), /* int load */
272 COSTS_N_INSNS (3), /* int signed load */
273 COSTS_N_INSNS (1), /* int zeroed load */
274 COSTS_N_INSNS (1), /* float load */
275 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
276 COSTS_N_INSNS (1), /* fadd, fsub */
277 COSTS_N_INSNS (1), /* fcmp */
278 COSTS_N_INSNS (1), /* fmov, fmovr */
279 COSTS_N_INSNS (1), /* fmul */
280 COSTS_N_INSNS (1), /* fdivs */
281 COSTS_N_INSNS (1), /* fdivd */
282 COSTS_N_INSNS (1), /* fsqrts */
283 COSTS_N_INSNS (1), /* fsqrtd */
284 COSTS_N_INSNS (5), /* imul */
285 COSTS_N_INSNS (5), /* imulX */
286 0, /* imul bit factor */
287 COSTS_N_INSNS (5), /* idiv */
288 COSTS_N_INSNS (5), /* idivX */
289 COSTS_N_INSNS (1), /* movcc/movr */
290 0, /* shift penalty */
293 static const
294 struct processor_costs ultrasparc_costs = {
295 COSTS_N_INSNS (2), /* int load */
296 COSTS_N_INSNS (3), /* int signed load */
297 COSTS_N_INSNS (2), /* int zeroed load */
298 COSTS_N_INSNS (2), /* float load */
299 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
300 COSTS_N_INSNS (4), /* fadd, fsub */
301 COSTS_N_INSNS (1), /* fcmp */
302 COSTS_N_INSNS (2), /* fmov, fmovr */
303 COSTS_N_INSNS (4), /* fmul */
304 COSTS_N_INSNS (13), /* fdivs */
305 COSTS_N_INSNS (23), /* fdivd */
306 COSTS_N_INSNS (13), /* fsqrts */
307 COSTS_N_INSNS (23), /* fsqrtd */
308 COSTS_N_INSNS (4), /* imul */
309 COSTS_N_INSNS (4), /* imulX */
310 2, /* imul bit factor */
311 COSTS_N_INSNS (37), /* idiv */
312 COSTS_N_INSNS (68), /* idivX */
313 COSTS_N_INSNS (2), /* movcc/movr */
314 2, /* shift penalty */
317 static const
318 struct processor_costs ultrasparc3_costs = {
319 COSTS_N_INSNS (2), /* int load */
320 COSTS_N_INSNS (3), /* int signed load */
321 COSTS_N_INSNS (3), /* int zeroed load */
322 COSTS_N_INSNS (2), /* float load */
323 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
324 COSTS_N_INSNS (4), /* fadd, fsub */
325 COSTS_N_INSNS (5), /* fcmp */
326 COSTS_N_INSNS (3), /* fmov, fmovr */
327 COSTS_N_INSNS (4), /* fmul */
328 COSTS_N_INSNS (17), /* fdivs */
329 COSTS_N_INSNS (20), /* fdivd */
330 COSTS_N_INSNS (20), /* fsqrts */
331 COSTS_N_INSNS (29), /* fsqrtd */
332 COSTS_N_INSNS (6), /* imul */
333 COSTS_N_INSNS (6), /* imulX */
334 0, /* imul bit factor */
335 COSTS_N_INSNS (40), /* idiv */
336 COSTS_N_INSNS (71), /* idivX */
337 COSTS_N_INSNS (2), /* movcc/movr */
338 0, /* shift penalty */
341 static const
342 struct processor_costs niagara_costs = {
343 COSTS_N_INSNS (3), /* int load */
344 COSTS_N_INSNS (3), /* int signed load */
345 COSTS_N_INSNS (3), /* int zeroed load */
346 COSTS_N_INSNS (9), /* float load */
347 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
348 COSTS_N_INSNS (8), /* fadd, fsub */
349 COSTS_N_INSNS (26), /* fcmp */
350 COSTS_N_INSNS (8), /* fmov, fmovr */
351 COSTS_N_INSNS (29), /* fmul */
352 COSTS_N_INSNS (54), /* fdivs */
353 COSTS_N_INSNS (83), /* fdivd */
354 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
355 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
356 COSTS_N_INSNS (11), /* imul */
357 COSTS_N_INSNS (11), /* imulX */
358 0, /* imul bit factor */
359 COSTS_N_INSNS (72), /* idiv */
360 COSTS_N_INSNS (72), /* idivX */
361 COSTS_N_INSNS (1), /* movcc/movr */
362 0, /* shift penalty */
365 static const
366 struct processor_costs niagara2_costs = {
367 COSTS_N_INSNS (3), /* int load */
368 COSTS_N_INSNS (3), /* int signed load */
369 COSTS_N_INSNS (3), /* int zeroed load */
370 COSTS_N_INSNS (3), /* float load */
371 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
372 COSTS_N_INSNS (6), /* fadd, fsub */
373 COSTS_N_INSNS (6), /* fcmp */
374 COSTS_N_INSNS (6), /* fmov, fmovr */
375 COSTS_N_INSNS (6), /* fmul */
376 COSTS_N_INSNS (19), /* fdivs */
377 COSTS_N_INSNS (33), /* fdivd */
378 COSTS_N_INSNS (19), /* fsqrts */
379 COSTS_N_INSNS (33), /* fsqrtd */
380 COSTS_N_INSNS (5), /* imul */
381 COSTS_N_INSNS (5), /* imulX */
382 0, /* imul bit factor */
383 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
384 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
385 COSTS_N_INSNS (1), /* movcc/movr */
386 0, /* shift penalty */
389 static const
390 struct processor_costs niagara3_costs = {
391 COSTS_N_INSNS (3), /* int load */
392 COSTS_N_INSNS (3), /* int signed load */
393 COSTS_N_INSNS (3), /* int zeroed load */
394 COSTS_N_INSNS (3), /* float load */
395 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
396 COSTS_N_INSNS (9), /* fadd, fsub */
397 COSTS_N_INSNS (9), /* fcmp */
398 COSTS_N_INSNS (9), /* fmov, fmovr */
399 COSTS_N_INSNS (9), /* fmul */
400 COSTS_N_INSNS (23), /* fdivs */
401 COSTS_N_INSNS (37), /* fdivd */
402 COSTS_N_INSNS (23), /* fsqrts */
403 COSTS_N_INSNS (37), /* fsqrtd */
404 COSTS_N_INSNS (9), /* imul */
405 COSTS_N_INSNS (9), /* imulX */
406 0, /* imul bit factor */
407 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
408 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
409 COSTS_N_INSNS (1), /* movcc/movr */
410 0, /* shift penalty */
413 static const
414 struct processor_costs niagara4_costs = {
415 COSTS_N_INSNS (5), /* int load */
416 COSTS_N_INSNS (5), /* int signed load */
417 COSTS_N_INSNS (5), /* int zeroed load */
418 COSTS_N_INSNS (5), /* float load */
419 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
420 COSTS_N_INSNS (11), /* fadd, fsub */
421 COSTS_N_INSNS (11), /* fcmp */
422 COSTS_N_INSNS (11), /* fmov, fmovr */
423 COSTS_N_INSNS (11), /* fmul */
424 COSTS_N_INSNS (24), /* fdivs */
425 COSTS_N_INSNS (37), /* fdivd */
426 COSTS_N_INSNS (24), /* fsqrts */
427 COSTS_N_INSNS (37), /* fsqrtd */
428 COSTS_N_INSNS (12), /* imul */
429 COSTS_N_INSNS (12), /* imulX */
430 0, /* imul bit factor */
431 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
432 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
433 COSTS_N_INSNS (1), /* movcc/movr */
434 0, /* shift penalty */
437 static const struct processor_costs *sparc_costs = &cypress_costs;
439 #ifdef HAVE_AS_RELAX_OPTION
440 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
441 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
442 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
443 somebody does not branch between the sethi and jmp. */
444 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
445 #else
446 #define LEAF_SIBCALL_SLOT_RESERVED_P \
447 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
448 #endif
450 /* Vector to say how input registers are mapped to output registers.
451 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
452 eliminate it. You must use -fomit-frame-pointer to get that. */
453 char leaf_reg_remap[] =
454 { 0, 1, 2, 3, 4, 5, 6, 7,
455 -1, -1, -1, -1, -1, -1, 14, -1,
456 -1, -1, -1, -1, -1, -1, -1, -1,
457 8, 9, 10, 11, 12, 13, -1, 15,
459 32, 33, 34, 35, 36, 37, 38, 39,
460 40, 41, 42, 43, 44, 45, 46, 47,
461 48, 49, 50, 51, 52, 53, 54, 55,
462 56, 57, 58, 59, 60, 61, 62, 63,
463 64, 65, 66, 67, 68, 69, 70, 71,
464 72, 73, 74, 75, 76, 77, 78, 79,
465 80, 81, 82, 83, 84, 85, 86, 87,
466 88, 89, 90, 91, 92, 93, 94, 95,
467 96, 97, 98, 99, 100, 101, 102};
469 /* Vector, indexed by hard register number, which contains 1
470 for a register that is allowable in a candidate for leaf
471 function treatment. */
472 char sparc_leaf_regs[] =
473 { 1, 1, 1, 1, 1, 1, 1, 1,
474 0, 0, 0, 0, 0, 0, 1, 0,
475 0, 0, 0, 0, 0, 0, 0, 0,
476 1, 1, 1, 1, 1, 1, 0, 1,
477 1, 1, 1, 1, 1, 1, 1, 1,
478 1, 1, 1, 1, 1, 1, 1, 1,
479 1, 1, 1, 1, 1, 1, 1, 1,
480 1, 1, 1, 1, 1, 1, 1, 1,
481 1, 1, 1, 1, 1, 1, 1, 1,
482 1, 1, 1, 1, 1, 1, 1, 1,
483 1, 1, 1, 1, 1, 1, 1, 1,
484 1, 1, 1, 1, 1, 1, 1, 1,
485 1, 1, 1, 1, 1, 1, 1};
487 struct GTY(()) machine_function
489 /* Size of the frame of the function. */
490 HOST_WIDE_INT frame_size;
492 /* Size of the frame of the function minus the register window save area
493 and the outgoing argument area. */
494 HOST_WIDE_INT apparent_frame_size;
496 /* Register we pretend the frame pointer is allocated to. Normally, this
497 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
498 record "offset" separately as it may be too big for (reg + disp). */
499 rtx frame_base_reg;
500 HOST_WIDE_INT frame_base_offset;
502 /* Some local-dynamic TLS symbol name. */
503 const char *some_ld_name;
505 /* Number of global or FP registers to be saved (as 4-byte quantities). */
506 int n_global_fp_regs;
508 /* True if the current function is leaf and uses only leaf regs,
509 so that the SPARC leaf function optimization can be applied.
510 Private version of crtl->uses_only_leaf_regs, see
511 sparc_expand_prologue for the rationale. */
512 int leaf_function_p;
514 /* True if the prologue saves local or in registers. */
515 bool save_local_in_regs_p;
517 /* True if the data calculated by sparc_expand_prologue are valid. */
518 bool prologue_data_valid_p;
521 #define sparc_frame_size cfun->machine->frame_size
522 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
523 #define sparc_frame_base_reg cfun->machine->frame_base_reg
524 #define sparc_frame_base_offset cfun->machine->frame_base_offset
525 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
526 #define sparc_leaf_function_p cfun->machine->leaf_function_p
527 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
528 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
530 /* 1 if the next opcode is to be specially indented. */
531 int sparc_indent_opcode = 0;
533 static void sparc_option_override (void);
534 static void sparc_init_modes (void);
535 static void scan_record_type (const_tree, int *, int *, int *);
536 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
537 const_tree, bool, bool, int *, int *);
539 static int supersparc_adjust_cost (rtx, rtx, rtx, int);
540 static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
542 static void sparc_emit_set_const32 (rtx, rtx);
543 static void sparc_emit_set_const64 (rtx, rtx);
544 static void sparc_output_addr_vec (rtx);
545 static void sparc_output_addr_diff_vec (rtx);
546 static void sparc_output_deferred_case_vectors (void);
547 static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
548 static bool sparc_legitimate_constant_p (enum machine_mode, rtx);
549 static rtx sparc_builtin_saveregs (void);
550 static int epilogue_renumber (rtx *, int);
551 static bool sparc_assemble_integer (rtx, unsigned int, int);
552 static int set_extends (rtx);
553 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
554 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
555 #ifdef TARGET_SOLARIS
556 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
557 tree) ATTRIBUTE_UNUSED;
558 #endif
559 static int sparc_adjust_cost (rtx, rtx, rtx, int);
560 static int sparc_issue_rate (void);
561 static void sparc_sched_init (FILE *, int, int);
562 static int sparc_use_sched_lookahead (void);
564 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
565 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
566 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
567 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
568 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
570 static bool sparc_function_ok_for_sibcall (tree, tree);
571 static void sparc_init_libfuncs (void);
572 static void sparc_init_builtins (void);
573 static void sparc_fpu_init_builtins (void);
574 static void sparc_vis_init_builtins (void);
575 static tree sparc_builtin_decl (unsigned, bool);
576 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
577 static tree sparc_fold_builtin (tree, int, tree *, bool);
578 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
579 HOST_WIDE_INT, tree);
580 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
581 HOST_WIDE_INT, const_tree);
582 static struct machine_function * sparc_init_machine_status (void);
583 static bool sparc_cannot_force_const_mem (enum machine_mode, rtx);
584 static rtx sparc_tls_get_addr (void);
585 static rtx sparc_tls_got (void);
586 static const char *get_some_local_dynamic_name (void);
587 static int get_some_local_dynamic_name_1 (rtx *, void *);
588 static int sparc_register_move_cost (enum machine_mode,
589 reg_class_t, reg_class_t);
590 static bool sparc_rtx_costs (rtx, int, int, int, int *, bool);
591 static rtx sparc_function_value (const_tree, const_tree, bool);
592 static rtx sparc_libcall_value (enum machine_mode, const_rtx);
593 static bool sparc_function_value_regno_p (const unsigned int);
594 static rtx sparc_struct_value_rtx (tree, int);
595 static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode,
596 int *, const_tree, int);
597 static bool sparc_return_in_memory (const_tree, const_tree);
598 static bool sparc_strict_argument_naming (cumulative_args_t);
599 static void sparc_va_start (tree, rtx);
600 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
601 static bool sparc_vector_mode_supported_p (enum machine_mode);
602 static bool sparc_tls_referenced_p (rtx);
603 static rtx sparc_legitimize_tls_address (rtx);
604 static rtx sparc_legitimize_pic_address (rtx, rtx);
605 static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
606 static rtx sparc_delegitimize_address (rtx);
607 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
608 static bool sparc_pass_by_reference (cumulative_args_t,
609 enum machine_mode, const_tree, bool);
610 static void sparc_function_arg_advance (cumulative_args_t,
611 enum machine_mode, const_tree, bool);
612 static rtx sparc_function_arg_1 (cumulative_args_t,
613 enum machine_mode, const_tree, bool, bool);
614 static rtx sparc_function_arg (cumulative_args_t,
615 enum machine_mode, const_tree, bool);
616 static rtx sparc_function_incoming_arg (cumulative_args_t,
617 enum machine_mode, const_tree, bool);
618 static unsigned int sparc_function_arg_boundary (enum machine_mode,
619 const_tree);
620 static int sparc_arg_partial_bytes (cumulative_args_t,
621 enum machine_mode, tree, bool);
622 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
623 static void sparc_file_end (void);
624 static bool sparc_frame_pointer_required (void);
625 static bool sparc_can_eliminate (const int, const int);
626 static rtx sparc_builtin_setjmp_frame_value (void);
627 static void sparc_conditional_register_usage (void);
628 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
629 static const char *sparc_mangle_type (const_tree);
630 #endif
631 static void sparc_trampoline_init (rtx, tree, rtx);
632 static enum machine_mode sparc_preferred_simd_mode (enum machine_mode);
633 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
634 static bool sparc_print_operand_punct_valid_p (unsigned char);
635 static void sparc_print_operand (FILE *, rtx, int);
636 static void sparc_print_operand_address (FILE *, rtx);
637 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
638 enum machine_mode,
639 secondary_reload_info *);
640 static enum machine_mode sparc_cstore_mode (enum insn_code icode);
641 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
643 #ifdef SUBTARGET_ATTRIBUTE_TABLE
644 /* Table of valid machine attributes. */
645 static const struct attribute_spec sparc_attribute_table[] =
647 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
648 do_diagnostic } */
649 SUBTARGET_ATTRIBUTE_TABLE,
650 { NULL, 0, 0, false, false, false, NULL, false }
652 #endif
654 /* Option handling. */
656 /* Parsed value. */
657 enum cmodel sparc_cmodel;
659 char sparc_hard_reg_printed[8];
661 /* Initialize the GCC target structure. */
663 /* The default is to use .half rather than .short for aligned HI objects. */
664 #undef TARGET_ASM_ALIGNED_HI_OP
665 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
667 #undef TARGET_ASM_UNALIGNED_HI_OP
668 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
669 #undef TARGET_ASM_UNALIGNED_SI_OP
670 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
671 #undef TARGET_ASM_UNALIGNED_DI_OP
672 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
674 /* The target hook has to handle DI-mode values. */
675 #undef TARGET_ASM_INTEGER
676 #define TARGET_ASM_INTEGER sparc_assemble_integer
678 #undef TARGET_ASM_FUNCTION_PROLOGUE
679 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
680 #undef TARGET_ASM_FUNCTION_EPILOGUE
681 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
683 #undef TARGET_SCHED_ADJUST_COST
684 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
685 #undef TARGET_SCHED_ISSUE_RATE
686 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
687 #undef TARGET_SCHED_INIT
688 #define TARGET_SCHED_INIT sparc_sched_init
689 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
690 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
692 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
693 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
695 #undef TARGET_INIT_LIBFUNCS
696 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
698 #undef TARGET_LEGITIMIZE_ADDRESS
699 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
700 #undef TARGET_DELEGITIMIZE_ADDRESS
701 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
702 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
703 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
705 #undef TARGET_INIT_BUILTINS
706 #define TARGET_INIT_BUILTINS sparc_init_builtins
707 #undef TARGET_BUILTIN_DECL
708 #define TARGET_BUILTIN_DECL sparc_builtin_decl
709 #undef TARGET_EXPAND_BUILTIN
710 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
711 #undef TARGET_FOLD_BUILTIN
712 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
714 #if TARGET_TLS
715 #undef TARGET_HAVE_TLS
716 #define TARGET_HAVE_TLS true
717 #endif
719 #undef TARGET_CANNOT_FORCE_CONST_MEM
720 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
722 #undef TARGET_ASM_OUTPUT_MI_THUNK
723 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
724 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
725 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
727 #undef TARGET_RTX_COSTS
728 #define TARGET_RTX_COSTS sparc_rtx_costs
729 #undef TARGET_ADDRESS_COST
730 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
731 #undef TARGET_REGISTER_MOVE_COST
732 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
734 #undef TARGET_PROMOTE_FUNCTION_MODE
735 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
737 #undef TARGET_FUNCTION_VALUE
738 #define TARGET_FUNCTION_VALUE sparc_function_value
739 #undef TARGET_LIBCALL_VALUE
740 #define TARGET_LIBCALL_VALUE sparc_libcall_value
741 #undef TARGET_FUNCTION_VALUE_REGNO_P
742 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
744 #undef TARGET_STRUCT_VALUE_RTX
745 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
746 #undef TARGET_RETURN_IN_MEMORY
747 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
748 #undef TARGET_MUST_PASS_IN_STACK
749 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
750 #undef TARGET_PASS_BY_REFERENCE
751 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
752 #undef TARGET_ARG_PARTIAL_BYTES
753 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
754 #undef TARGET_FUNCTION_ARG_ADVANCE
755 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
756 #undef TARGET_FUNCTION_ARG
757 #define TARGET_FUNCTION_ARG sparc_function_arg
758 #undef TARGET_FUNCTION_INCOMING_ARG
759 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
760 #undef TARGET_FUNCTION_ARG_BOUNDARY
761 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
763 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
764 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
765 #undef TARGET_STRICT_ARGUMENT_NAMING
766 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
768 #undef TARGET_EXPAND_BUILTIN_VA_START
769 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
770 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
771 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
773 #undef TARGET_VECTOR_MODE_SUPPORTED_P
774 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
776 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
777 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
779 #ifdef SUBTARGET_INSERT_ATTRIBUTES
780 #undef TARGET_INSERT_ATTRIBUTES
781 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
782 #endif
784 #ifdef SUBTARGET_ATTRIBUTE_TABLE
785 #undef TARGET_ATTRIBUTE_TABLE
786 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
787 #endif
789 #undef TARGET_RELAXED_ORDERING
790 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
792 #undef TARGET_OPTION_OVERRIDE
793 #define TARGET_OPTION_OVERRIDE sparc_option_override
795 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
796 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
797 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
798 #endif
800 #undef TARGET_ASM_FILE_END
801 #define TARGET_ASM_FILE_END sparc_file_end
803 #undef TARGET_FRAME_POINTER_REQUIRED
804 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
806 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
807 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
809 #undef TARGET_CAN_ELIMINATE
810 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
812 #undef TARGET_PREFERRED_RELOAD_CLASS
813 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
815 #undef TARGET_SECONDARY_RELOAD
816 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
818 #undef TARGET_CONDITIONAL_REGISTER_USAGE
819 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
821 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
822 #undef TARGET_MANGLE_TYPE
823 #define TARGET_MANGLE_TYPE sparc_mangle_type
824 #endif
826 #undef TARGET_LEGITIMATE_ADDRESS_P
827 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
829 #undef TARGET_LEGITIMATE_CONSTANT_P
830 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
832 #undef TARGET_TRAMPOLINE_INIT
833 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
835 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
836 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
837 #undef TARGET_PRINT_OPERAND
838 #define TARGET_PRINT_OPERAND sparc_print_operand
839 #undef TARGET_PRINT_OPERAND_ADDRESS
840 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
842 /* The value stored by LDSTUB. */
843 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
844 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
846 #undef TARGET_CSTORE_MODE
847 #define TARGET_CSTORE_MODE sparc_cstore_mode
849 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
850 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
852 struct gcc_target targetm = TARGET_INITIALIZER;
854 /* Return the memory reference contained in X if any, zero otherwise. */
856 static rtx
857 mem_ref (rtx x)
859 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
860 x = XEXP (x, 0);
862 if (MEM_P (x))
863 return x;
865 return NULL_RTX;
868 /* We use a machine specific pass to enable workarounds for errata.
869 We need to have the (essentially) final form of the insn stream in order
870 to properly detect the various hazards. Therefore, this machine specific
871 pass runs as late as possible. The pass is inserted in the pass pipeline
872 at the end of sparc_option_override. */
874 static bool
875 sparc_gate_work_around_errata (void)
877 /* The only errata we handle are those of the AT697F and UT699. */
878 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
881 static unsigned int
882 sparc_do_work_around_errata (void)
884 rtx insn, next;
886 /* Force all instructions to be split into their final form. */
887 split_all_insns_noflow ();
889 /* Now look for specific patterns in the insn stream. */
890 for (insn = get_insns (); insn; insn = next)
892 bool insert_nop = false;
893 rtx set;
895 /* Look into the instruction in a delay slot. */
896 if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
897 insn = XVECEXP (PATTERN (insn), 0, 1);
899 /* Look for a single-word load into an odd-numbered FP register. */
900 if (sparc_fix_at697f
901 && NONJUMP_INSN_P (insn)
902 && (set = single_set (insn)) != NULL_RTX
903 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
904 && MEM_P (SET_SRC (set))
905 && REG_P (SET_DEST (set))
906 && REGNO (SET_DEST (set)) > 31
907 && REGNO (SET_DEST (set)) % 2 != 0)
909 /* The wrong dependency is on the enclosing double register. */
910 const unsigned int x = REGNO (SET_DEST (set)) - 1;
911 unsigned int src1, src2, dest;
912 int code;
914 next = next_active_insn (insn);
915 if (!next)
916 break;
917 /* If the insn is a branch, then it cannot be problematic. */
918 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
919 continue;
921 extract_insn (next);
922 code = INSN_CODE (next);
924 switch (code)
926 case CODE_FOR_adddf3:
927 case CODE_FOR_subdf3:
928 case CODE_FOR_muldf3:
929 case CODE_FOR_divdf3:
930 dest = REGNO (recog_data.operand[0]);
931 src1 = REGNO (recog_data.operand[1]);
932 src2 = REGNO (recog_data.operand[2]);
933 if (src1 != src2)
935 /* Case [1-4]:
936 ld [address], %fx+1
937 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
938 if ((src1 == x || src2 == x)
939 && (dest == src1 || dest == src2))
940 insert_nop = true;
942 else
944 /* Case 5:
945 ld [address], %fx+1
946 FPOPd %fx, %fx, %fx */
947 if (src1 == x
948 && dest == src1
949 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
950 insert_nop = true;
952 break;
954 case CODE_FOR_sqrtdf2:
955 dest = REGNO (recog_data.operand[0]);
956 src1 = REGNO (recog_data.operand[1]);
957 /* Case 6:
958 ld [address], %fx+1
959 fsqrtd %fx, %fx */
960 if (src1 == x && dest == src1)
961 insert_nop = true;
962 break;
964 default:
965 break;
969 /* Look for a single-word load into an integer register. */
970 else if (sparc_fix_ut699
971 && NONJUMP_INSN_P (insn)
972 && (set = single_set (insn)) != NULL_RTX
973 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
974 && mem_ref (SET_SRC (set)) != NULL_RTX
975 && REG_P (SET_DEST (set))
976 && REGNO (SET_DEST (set)) < 32)
978 /* There is no problem if the second memory access has a data
979 dependency on the first single-cycle load. */
980 rtx x = SET_DEST (set);
982 next = next_active_insn (insn);
983 if (!next)
984 break;
985 /* If the insn is a branch, then it cannot be problematic. */
986 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
987 continue;
989 /* Look for a second memory access to/from an integer register. */
990 if ((set = single_set (next)) != NULL_RTX)
992 rtx src = SET_SRC (set);
993 rtx dest = SET_DEST (set);
994 rtx mem;
996 /* LDD is affected. */
997 if ((mem = mem_ref (src)) != NULL_RTX
998 && REG_P (dest)
999 && REGNO (dest) < 32
1000 && !reg_mentioned_p (x, XEXP (mem, 0)))
1001 insert_nop = true;
1003 /* STD is *not* affected. */
1004 else if (MEM_P (dest)
1005 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1006 && (src == CONST0_RTX (GET_MODE (dest))
1007 || (REG_P (src)
1008 && REGNO (src) < 32
1009 && REGNO (src) != REGNO (x)))
1010 && !reg_mentioned_p (x, XEXP (dest, 0)))
1011 insert_nop = true;
1015 /* Look for a single-word load/operation into an FP register. */
1016 else if (sparc_fix_ut699
1017 && NONJUMP_INSN_P (insn)
1018 && (set = single_set (insn)) != NULL_RTX
1019 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1020 && REG_P (SET_DEST (set))
1021 && REGNO (SET_DEST (set)) > 31)
1023 /* Number of instructions in the problematic window. */
1024 const int n_insns = 4;
1025 /* The problematic combination is with the sibling FP register. */
1026 const unsigned int x = REGNO (SET_DEST (set));
1027 const unsigned int y = x ^ 1;
1028 rtx after;
1029 int i;
1031 next = next_active_insn (insn);
1032 if (!next)
1033 break;
1034 /* If the insn is a branch, then it cannot be problematic. */
1035 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1036 continue;
1038 /* Look for a second load/operation into the sibling FP register. */
1039 if (!((set = single_set (next)) != NULL_RTX
1040 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1041 && REG_P (SET_DEST (set))
1042 && REGNO (SET_DEST (set)) == y))
1043 continue;
1045 /* Look for a (possible) store from the FP register in the next N
1046 instructions, but bail out if it is again modified or if there
1047 is a store from the sibling FP register before this store. */
1048 for (after = next, i = 0; i < n_insns; i++)
1050 bool branch_p;
1052 after = next_active_insn (after);
1053 if (!after)
1054 break;
1056 /* This is a branch with an empty delay slot. */
1057 if (!NONJUMP_INSN_P (after))
1059 if (++i == n_insns)
1060 break;
1061 branch_p = true;
1062 after = NULL_RTX;
1064 /* This is a branch with a filled delay slot. */
1065 else if (GET_CODE (PATTERN (after)) == SEQUENCE)
1067 if (++i == n_insns)
1068 break;
1069 branch_p = true;
1070 after = XVECEXP (PATTERN (after), 0, 1);
1072 /* This is a regular instruction. */
1073 else
1074 branch_p = false;
1076 if (after && (set = single_set (after)) != NULL_RTX)
1078 const rtx src = SET_SRC (set);
1079 const rtx dest = SET_DEST (set);
1080 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1082 /* If the FP register is again modified before the store,
1083 then the store isn't affected. */
1084 if (REG_P (dest)
1085 && (REGNO (dest) == x
1086 || (REGNO (dest) == y && size == 8)))
1087 break;
1089 if (MEM_P (dest) && REG_P (src))
1091 /* If there is a store from the sibling FP register
1092 before the store, then the store is not affected. */
1093 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1094 break;
1096 /* Otherwise, the store is affected. */
1097 if (REGNO (src) == x && size == 4)
1099 insert_nop = true;
1100 break;
1105 /* If we have a branch in the first M instructions, then we
1106 cannot see the (M+2)th instruction so we play safe. */
1107 if (branch_p && i <= (n_insns - 2))
1109 insert_nop = true;
1110 break;
1115 else
1116 next = NEXT_INSN (insn);
1118 if (insert_nop)
1119 emit_insn_before (gen_nop (), next);
1122 return 0;
1125 namespace {
1127 const pass_data pass_data_work_around_errata =
1129 RTL_PASS, /* type */
1130 "errata", /* name */
1131 OPTGROUP_NONE, /* optinfo_flags */
1132 true, /* has_gate */
1133 true, /* has_execute */
1134 TV_MACH_DEP, /* tv_id */
1135 0, /* properties_required */
1136 0, /* properties_provided */
1137 0, /* properties_destroyed */
1138 0, /* todo_flags_start */
1139 TODO_verify_rtl_sharing, /* todo_flags_finish */
1142 class pass_work_around_errata : public rtl_opt_pass
1144 public:
1145 pass_work_around_errata(gcc::context *ctxt)
1146 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1149 /* opt_pass methods: */
1150 bool gate () { return sparc_gate_work_around_errata (); }
1151 unsigned int execute () { return sparc_do_work_around_errata (); }
1153 }; // class pass_work_around_errata
1155 } // anon namespace
1157 rtl_opt_pass *
1158 make_pass_work_around_errata (gcc::context *ctxt)
1160 return new pass_work_around_errata (ctxt);
1163 /* Helpers for TARGET_DEBUG_OPTIONS. */
1164 static void
1165 dump_target_flag_bits (const int flags)
1167 if (flags & MASK_64BIT)
1168 fprintf (stderr, "64BIT ");
1169 if (flags & MASK_APP_REGS)
1170 fprintf (stderr, "APP_REGS ");
1171 if (flags & MASK_FASTER_STRUCTS)
1172 fprintf (stderr, "FASTER_STRUCTS ");
1173 if (flags & MASK_FLAT)
1174 fprintf (stderr, "FLAT ");
1175 if (flags & MASK_FMAF)
1176 fprintf (stderr, "FMAF ");
1177 if (flags & MASK_FPU)
1178 fprintf (stderr, "FPU ");
1179 if (flags & MASK_HARD_QUAD)
1180 fprintf (stderr, "HARD_QUAD ");
1181 if (flags & MASK_POPC)
1182 fprintf (stderr, "POPC ");
1183 if (flags & MASK_PTR64)
1184 fprintf (stderr, "PTR64 ");
1185 if (flags & MASK_STACK_BIAS)
1186 fprintf (stderr, "STACK_BIAS ");
1187 if (flags & MASK_UNALIGNED_DOUBLES)
1188 fprintf (stderr, "UNALIGNED_DOUBLES ");
1189 if (flags & MASK_V8PLUS)
1190 fprintf (stderr, "V8PLUS ");
1191 if (flags & MASK_VIS)
1192 fprintf (stderr, "VIS ");
1193 if (flags & MASK_VIS2)
1194 fprintf (stderr, "VIS2 ");
1195 if (flags & MASK_VIS3)
1196 fprintf (stderr, "VIS3 ");
1197 if (flags & MASK_CBCOND)
1198 fprintf (stderr, "CBCOND ");
1199 if (flags & MASK_DEPRECATED_V8_INSNS)
1200 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1201 if (flags & MASK_SPARCLET)
1202 fprintf (stderr, "SPARCLET ");
1203 if (flags & MASK_SPARCLITE)
1204 fprintf (stderr, "SPARCLITE ");
1205 if (flags & MASK_V8)
1206 fprintf (stderr, "V8 ");
1207 if (flags & MASK_V9)
1208 fprintf (stderr, "V9 ");
1211 static void
1212 dump_target_flags (const char *prefix, const int flags)
1214 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1215 dump_target_flag_bits (flags);
1216 fprintf(stderr, "]\n");
1219 /* Validate and override various options, and do some machine dependent
1220 initialization. */
1222 static void
1223 sparc_option_override (void)
1225 static struct code_model {
1226 const char *const name;
1227 const enum cmodel value;
1228 } const cmodels[] = {
1229 { "32", CM_32 },
1230 { "medlow", CM_MEDLOW },
1231 { "medmid", CM_MEDMID },
1232 { "medany", CM_MEDANY },
1233 { "embmedany", CM_EMBMEDANY },
1234 { NULL, (enum cmodel) 0 }
1236 const struct code_model *cmodel;
1237 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1238 static struct cpu_default {
1239 const int cpu;
1240 const enum processor_type processor;
1241 } const cpu_default[] = {
1242 /* There must be one entry here for each TARGET_CPU value. */
1243 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1244 { TARGET_CPU_v8, PROCESSOR_V8 },
1245 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1246 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1247 { TARGET_CPU_leon, PROCESSOR_LEON },
1248 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1249 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1250 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1251 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1252 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1253 { TARGET_CPU_v9, PROCESSOR_V9 },
1254 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1255 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1256 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1257 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1258 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1259 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1260 { -1, PROCESSOR_V7 }
1262 const struct cpu_default *def;
1263 /* Table of values for -m{cpu,tune}=. This must match the order of
1264 the enum processor_type in sparc-opts.h. */
1265 static struct cpu_table {
1266 const char *const name;
1267 const int disable;
1268 const int enable;
1269 } const cpu_table[] = {
1270 { "v7", MASK_ISA, 0 },
1271 { "cypress", MASK_ISA, 0 },
1272 { "v8", MASK_ISA, MASK_V8 },
1273 /* TI TMS390Z55 supersparc */
1274 { "supersparc", MASK_ISA, MASK_V8 },
1275 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1276 { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
1277 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
1278 { "leon3v7", MASK_ISA, MASK_LEON3|MASK_FPU },
1279 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1280 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1281 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1282 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1283 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1284 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1285 { "sparclet", MASK_ISA, MASK_SPARCLET },
1286 /* TEMIC sparclet */
1287 { "tsc701", MASK_ISA, MASK_SPARCLET },
1288 { "v9", MASK_ISA, MASK_V9 },
1289 /* UltraSPARC I, II, IIi */
1290 { "ultrasparc", MASK_ISA,
1291 /* Although insns using %y are deprecated, it is a clear win. */
1292 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1293 /* UltraSPARC III */
1294 /* ??? Check if %y issue still holds true. */
1295 { "ultrasparc3", MASK_ISA,
1296 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1297 /* UltraSPARC T1 */
1298 { "niagara", MASK_ISA,
1299 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1300 /* UltraSPARC T2 */
1301 { "niagara2", MASK_ISA,
1302 MASK_V9|MASK_POPC|MASK_VIS2 },
1303 /* UltraSPARC T3 */
1304 { "niagara3", MASK_ISA,
1305 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
1306 /* UltraSPARC T4 */
1307 { "niagara4", MASK_ISA,
1308 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1310 const struct cpu_table *cpu;
1311 unsigned int i;
1312 int fpu;
1314 if (sparc_debug_string != NULL)
1316 const char *q;
1317 char *p;
1319 p = ASTRDUP (sparc_debug_string);
1320 while ((q = strtok (p, ",")) != NULL)
1322 bool invert;
1323 int mask;
1325 p = NULL;
1326 if (*q == '!')
1328 invert = true;
1329 q++;
1331 else
1332 invert = false;
1334 if (! strcmp (q, "all"))
1335 mask = MASK_DEBUG_ALL;
1336 else if (! strcmp (q, "options"))
1337 mask = MASK_DEBUG_OPTIONS;
1338 else
1339 error ("unknown -mdebug-%s switch", q);
1341 if (invert)
1342 sparc_debug &= ~mask;
1343 else
1344 sparc_debug |= mask;
1348 if (TARGET_DEBUG_OPTIONS)
1350 dump_target_flags("Initial target_flags", target_flags);
1351 dump_target_flags("target_flags_explicit", target_flags_explicit);
1354 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1355 SUBTARGET_OVERRIDE_OPTIONS;
1356 #endif
1358 #ifndef SPARC_BI_ARCH
1359 /* Check for unsupported architecture size. */
1360 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1361 error ("%s is not supported by this configuration",
1362 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1363 #endif
1365 /* We force all 64bit archs to use 128 bit long double */
1366 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1368 error ("-mlong-double-64 not allowed with -m64");
1369 target_flags |= MASK_LONG_DOUBLE_128;
1372 /* Code model selection. */
1373 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1375 #ifdef SPARC_BI_ARCH
1376 if (TARGET_ARCH32)
1377 sparc_cmodel = CM_32;
1378 #endif
1380 if (sparc_cmodel_string != NULL)
1382 if (TARGET_ARCH64)
1384 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1385 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1386 break;
1387 if (cmodel->name == NULL)
1388 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1389 else
1390 sparc_cmodel = cmodel->value;
1392 else
1393 error ("-mcmodel= is not supported on 32 bit systems");
1396 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1397 for (i = 8; i < 16; i++)
1398 if (!call_used_regs [i])
1400 error ("-fcall-saved-REG is not supported for out registers");
1401 call_used_regs [i] = 1;
1404 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1406 /* Set the default CPU. */
1407 if (!global_options_set.x_sparc_cpu_and_features)
1409 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1410 if (def->cpu == TARGET_CPU_DEFAULT)
1411 break;
1412 gcc_assert (def->cpu != -1);
1413 sparc_cpu_and_features = def->processor;
1416 if (!global_options_set.x_sparc_cpu)
1417 sparc_cpu = sparc_cpu_and_features;
1419 cpu = &cpu_table[(int) sparc_cpu_and_features];
1421 if (TARGET_DEBUG_OPTIONS)
1423 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1424 fprintf (stderr, "sparc_cpu: %s\n",
1425 cpu_table[(int) sparc_cpu].name);
1426 dump_target_flags ("cpu->disable", cpu->disable);
1427 dump_target_flags ("cpu->enable", cpu->enable);
1430 target_flags &= ~cpu->disable;
1431 target_flags |= (cpu->enable
1432 #ifndef HAVE_AS_FMAF_HPC_VIS3
1433 & ~(MASK_FMAF | MASK_VIS3)
1434 #endif
1435 #ifndef HAVE_AS_SPARC4
1436 & ~MASK_CBCOND
1437 #endif
1438 #ifndef HAVE_AS_LEON
1439 & ~(MASK_LEON | MASK_LEON3)
1440 #endif
1443 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1444 the processor default. */
1445 if (target_flags_explicit & MASK_FPU)
1446 target_flags = (target_flags & ~MASK_FPU) | fpu;
1448 /* -mvis2 implies -mvis */
1449 if (TARGET_VIS2)
1450 target_flags |= MASK_VIS;
1452 /* -mvis3 implies -mvis2 and -mvis */
1453 if (TARGET_VIS3)
1454 target_flags |= MASK_VIS2 | MASK_VIS;
1456 /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is
1457 disabled. */
1458 if (! TARGET_FPU)
1459 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF);
1461 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1462 are available.
1463 -m64 also implies v9. */
1464 if (TARGET_VIS || TARGET_ARCH64)
1466 target_flags |= MASK_V9;
1467 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1470 /* -mvis also implies -mv8plus on 32-bit */
1471 if (TARGET_VIS && ! TARGET_ARCH64)
1472 target_flags |= MASK_V8PLUS;
1474 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1475 if (TARGET_V9 && TARGET_ARCH32)
1476 target_flags |= MASK_DEPRECATED_V8_INSNS;
1478 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1479 if (! TARGET_V9 || TARGET_ARCH64)
1480 target_flags &= ~MASK_V8PLUS;
1482 /* Don't use stack biasing in 32 bit mode. */
1483 if (TARGET_ARCH32)
1484 target_flags &= ~MASK_STACK_BIAS;
1486 /* Supply a default value for align_functions. */
1487 if (align_functions == 0
1488 && (sparc_cpu == PROCESSOR_ULTRASPARC
1489 || sparc_cpu == PROCESSOR_ULTRASPARC3
1490 || sparc_cpu == PROCESSOR_NIAGARA
1491 || sparc_cpu == PROCESSOR_NIAGARA2
1492 || sparc_cpu == PROCESSOR_NIAGARA3
1493 || sparc_cpu == PROCESSOR_NIAGARA4))
1494 align_functions = 32;
1496 /* Validate PCC_STRUCT_RETURN. */
1497 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1498 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1500 /* Only use .uaxword when compiling for a 64-bit target. */
1501 if (!TARGET_ARCH64)
1502 targetm.asm_out.unaligned_op.di = NULL;
1504 /* Do various machine dependent initializations. */
1505 sparc_init_modes ();
1507 /* Set up function hooks. */
1508 init_machine_status = sparc_init_machine_status;
1510 switch (sparc_cpu)
1512 case PROCESSOR_V7:
1513 case PROCESSOR_CYPRESS:
1514 sparc_costs = &cypress_costs;
1515 break;
1516 case PROCESSOR_V8:
1517 case PROCESSOR_SPARCLITE:
1518 case PROCESSOR_SUPERSPARC:
1519 sparc_costs = &supersparc_costs;
1520 break;
1521 case PROCESSOR_F930:
1522 case PROCESSOR_F934:
1523 case PROCESSOR_HYPERSPARC:
1524 case PROCESSOR_SPARCLITE86X:
1525 sparc_costs = &hypersparc_costs;
1526 break;
1527 case PROCESSOR_LEON:
1528 sparc_costs = &leon_costs;
1529 break;
1530 case PROCESSOR_LEON3:
1531 case PROCESSOR_LEON3V7:
1532 sparc_costs = &leon3_costs;
1533 break;
1534 case PROCESSOR_SPARCLET:
1535 case PROCESSOR_TSC701:
1536 sparc_costs = &sparclet_costs;
1537 break;
1538 case PROCESSOR_V9:
1539 case PROCESSOR_ULTRASPARC:
1540 sparc_costs = &ultrasparc_costs;
1541 break;
1542 case PROCESSOR_ULTRASPARC3:
1543 sparc_costs = &ultrasparc3_costs;
1544 break;
1545 case PROCESSOR_NIAGARA:
1546 sparc_costs = &niagara_costs;
1547 break;
1548 case PROCESSOR_NIAGARA2:
1549 sparc_costs = &niagara2_costs;
1550 break;
1551 case PROCESSOR_NIAGARA3:
1552 sparc_costs = &niagara3_costs;
1553 break;
1554 case PROCESSOR_NIAGARA4:
1555 sparc_costs = &niagara4_costs;
1556 break;
1557 case PROCESSOR_NATIVE:
1558 gcc_unreachable ();
1561 if (sparc_memory_model == SMM_DEFAULT)
1563 /* Choose the memory model for the operating system. */
1564 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1565 if (os_default != SMM_DEFAULT)
1566 sparc_memory_model = os_default;
1567 /* Choose the most relaxed model for the processor. */
1568 else if (TARGET_V9)
1569 sparc_memory_model = SMM_RMO;
1570 else if (TARGET_LEON3)
1571 sparc_memory_model = SMM_TSO;
1572 else if (TARGET_LEON)
1573 sparc_memory_model = SMM_SC;
1574 else if (TARGET_V8)
1575 sparc_memory_model = SMM_PSO;
1576 else
1577 sparc_memory_model = SMM_SC;
1580 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1581 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1582 target_flags |= MASK_LONG_DOUBLE_128;
1583 #endif
1585 if (TARGET_DEBUG_OPTIONS)
1586 dump_target_flags ("Final target_flags", target_flags);
1588 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1589 ((sparc_cpu == PROCESSOR_ULTRASPARC
1590 || sparc_cpu == PROCESSOR_NIAGARA
1591 || sparc_cpu == PROCESSOR_NIAGARA2
1592 || sparc_cpu == PROCESSOR_NIAGARA3
1593 || sparc_cpu == PROCESSOR_NIAGARA4)
1595 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1596 ? 8 : 3)),
1597 global_options.x_param_values,
1598 global_options_set.x_param_values);
1599 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1600 ((sparc_cpu == PROCESSOR_ULTRASPARC
1601 || sparc_cpu == PROCESSOR_ULTRASPARC3
1602 || sparc_cpu == PROCESSOR_NIAGARA
1603 || sparc_cpu == PROCESSOR_NIAGARA2
1604 || sparc_cpu == PROCESSOR_NIAGARA3
1605 || sparc_cpu == PROCESSOR_NIAGARA4)
1606 ? 64 : 32),
1607 global_options.x_param_values,
1608 global_options_set.x_param_values);
1610 /* Disable save slot sharing for call-clobbered registers by default.
1611 The IRA sharing algorithm works on single registers only and this
1612 pessimizes for double floating-point registers. */
1613 if (!global_options_set.x_flag_ira_share_save_slots)
1614 flag_ira_share_save_slots = 0;
1616 /* We register a machine specific pass to work around errata, if any.
1617 The pass mut be scheduled as late as possible so that we have the
1618 (essentially) final form of the insn stream to work on.
1619 Registering the pass must be done at start up. It's convenient to
1620 do it here. */
1621 opt_pass *errata_pass = make_pass_work_around_errata (g);
1622 struct register_pass_info insert_pass_work_around_errata =
1624 errata_pass, /* pass */
1625 "dbr", /* reference_pass_name */
1626 1, /* ref_pass_instance_number */
1627 PASS_POS_INSERT_AFTER /* po_op */
1629 register_pass (&insert_pass_work_around_errata);
1632 /* Miscellaneous utilities. */
1634 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1635 or branch on register contents instructions. */
1638 v9_regcmp_p (enum rtx_code code)
1640 return (code == EQ || code == NE || code == GE || code == LT
1641 || code == LE || code == GT);
1644 /* Nonzero if OP is a floating point constant which can
1645 be loaded into an integer register using a single
1646 sethi instruction. */
1649 fp_sethi_p (rtx op)
1651 if (GET_CODE (op) == CONST_DOUBLE)
1653 REAL_VALUE_TYPE r;
1654 long i;
1656 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1657 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1658 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1661 return 0;
1664 /* Nonzero if OP is a floating point constant which can
1665 be loaded into an integer register using a single
1666 mov instruction. */
1669 fp_mov_p (rtx op)
1671 if (GET_CODE (op) == CONST_DOUBLE)
1673 REAL_VALUE_TYPE r;
1674 long i;
1676 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1677 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1678 return SPARC_SIMM13_P (i);
1681 return 0;
1684 /* Nonzero if OP is a floating point constant which can
1685 be loaded into an integer register using a high/losum
1686 instruction sequence. */
1689 fp_high_losum_p (rtx op)
1691 /* The constraints calling this should only be in
1692 SFmode move insns, so any constant which cannot
1693 be moved using a single insn will do. */
1694 if (GET_CODE (op) == CONST_DOUBLE)
1696 REAL_VALUE_TYPE r;
1697 long i;
1699 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1700 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1701 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1704 return 0;
1707 /* Return true if the address of LABEL can be loaded by means of the
1708 mov{si,di}_pic_label_ref patterns in PIC mode. */
1710 static bool
1711 can_use_mov_pic_label_ref (rtx label)
1713 /* VxWorks does not impose a fixed gap between segments; the run-time
1714 gap can be different from the object-file gap. We therefore can't
1715 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1716 are absolutely sure that X is in the same segment as the GOT.
1717 Unfortunately, the flexibility of linker scripts means that we
1718 can't be sure of that in general, so assume that GOT-relative
1719 accesses are never valid on VxWorks. */
1720 if (TARGET_VXWORKS_RTP)
1721 return false;
1723 /* Similarly, if the label is non-local, it might end up being placed
1724 in a different section than the current one; now mov_pic_label_ref
1725 requires the label and the code to be in the same section. */
1726 if (LABEL_REF_NONLOCAL_P (label))
1727 return false;
1729 /* Finally, if we are reordering basic blocks and partition into hot
1730 and cold sections, this might happen for any label. */
1731 if (flag_reorder_blocks_and_partition)
1732 return false;
1734 return true;
1737 /* Expand a move instruction. Return true if all work is done. */
1739 bool
1740 sparc_expand_move (enum machine_mode mode, rtx *operands)
1742 /* Handle sets of MEM first. */
1743 if (GET_CODE (operands[0]) == MEM)
1745 /* 0 is a register (or a pair of registers) on SPARC. */
1746 if (register_or_zero_operand (operands[1], mode))
1747 return false;
1749 if (!reload_in_progress)
1751 operands[0] = validize_mem (operands[0]);
1752 operands[1] = force_reg (mode, operands[1]);
1756 /* Fixup TLS cases. */
1757 if (TARGET_HAVE_TLS
1758 && CONSTANT_P (operands[1])
1759 && sparc_tls_referenced_p (operands [1]))
1761 operands[1] = sparc_legitimize_tls_address (operands[1]);
1762 return false;
1765 /* Fixup PIC cases. */
1766 if (flag_pic && CONSTANT_P (operands[1]))
1768 if (pic_address_needs_scratch (operands[1]))
1769 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1771 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1772 if (GET_CODE (operands[1]) == LABEL_REF
1773 && can_use_mov_pic_label_ref (operands[1]))
1775 if (mode == SImode)
1777 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1778 return true;
1781 if (mode == DImode)
1783 gcc_assert (TARGET_ARCH64);
1784 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1785 return true;
1789 if (symbolic_operand (operands[1], mode))
1791 operands[1]
1792 = sparc_legitimize_pic_address (operands[1],
1793 reload_in_progress
1794 ? operands[0] : NULL_RTX);
1795 return false;
1799 /* If we are trying to toss an integer constant into FP registers,
1800 or loading a FP or vector constant, force it into memory. */
1801 if (CONSTANT_P (operands[1])
1802 && REG_P (operands[0])
1803 && (SPARC_FP_REG_P (REGNO (operands[0]))
1804 || SCALAR_FLOAT_MODE_P (mode)
1805 || VECTOR_MODE_P (mode)))
1807 /* emit_group_store will send such bogosity to us when it is
1808 not storing directly into memory. So fix this up to avoid
1809 crashes in output_constant_pool. */
1810 if (operands [1] == const0_rtx)
1811 operands[1] = CONST0_RTX (mode);
1813 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1814 always other regs. */
1815 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1816 && (const_zero_operand (operands[1], mode)
1817 || const_all_ones_operand (operands[1], mode)))
1818 return false;
1820 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1821 /* We are able to build any SF constant in integer registers
1822 with at most 2 instructions. */
1823 && (mode == SFmode
1824 /* And any DF constant in integer registers. */
1825 || (mode == DFmode
1826 && ! can_create_pseudo_p ())))
1827 return false;
1829 operands[1] = force_const_mem (mode, operands[1]);
1830 if (!reload_in_progress)
1831 operands[1] = validize_mem (operands[1]);
1832 return false;
1835 /* Accept non-constants and valid constants unmodified. */
1836 if (!CONSTANT_P (operands[1])
1837 || GET_CODE (operands[1]) == HIGH
1838 || input_operand (operands[1], mode))
1839 return false;
1841 switch (mode)
1843 case QImode:
1844 /* All QImode constants require only one insn, so proceed. */
1845 break;
1847 case HImode:
1848 case SImode:
1849 sparc_emit_set_const32 (operands[0], operands[1]);
1850 return true;
1852 case DImode:
1853 /* input_operand should have filtered out 32-bit mode. */
1854 sparc_emit_set_const64 (operands[0], operands[1]);
1855 return true;
1857 case TImode:
1859 rtx high, low;
1860 /* TImode isn't available in 32-bit mode. */
1861 split_double (operands[1], &high, &low);
1862 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1863 high));
1864 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1865 low));
1867 return true;
1869 default:
1870 gcc_unreachable ();
1873 return false;
1876 /* Load OP1, a 32-bit constant, into OP0, a register.
1877 We know it can't be done in one insn when we get
1878 here, the move expander guarantees this. */
1880 static void
1881 sparc_emit_set_const32 (rtx op0, rtx op1)
1883 enum machine_mode mode = GET_MODE (op0);
1884 rtx temp = op0;
1886 if (can_create_pseudo_p ())
1887 temp = gen_reg_rtx (mode);
1889 if (GET_CODE (op1) == CONST_INT)
1891 gcc_assert (!small_int_operand (op1, mode)
1892 && !const_high_operand (op1, mode));
1894 /* Emit them as real moves instead of a HIGH/LO_SUM,
1895 this way CSE can see everything and reuse intermediate
1896 values if it wants. */
1897 emit_insn (gen_rtx_SET (VOIDmode, temp,
1898 GEN_INT (INTVAL (op1)
1899 & ~(HOST_WIDE_INT)0x3ff)));
1901 emit_insn (gen_rtx_SET (VOIDmode,
1902 op0,
1903 gen_rtx_IOR (mode, temp,
1904 GEN_INT (INTVAL (op1) & 0x3ff))));
1906 else
1908 /* A symbol, emit in the traditional way. */
1909 emit_insn (gen_rtx_SET (VOIDmode, temp,
1910 gen_rtx_HIGH (mode, op1)));
1911 emit_insn (gen_rtx_SET (VOIDmode,
1912 op0, gen_rtx_LO_SUM (mode, temp, op1)));
1916 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1917 If TEMP is nonzero, we are forbidden to use any other scratch
1918 registers. Otherwise, we are allowed to generate them as needed.
1920 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1921 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1923 void
1924 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1926 rtx temp1, temp2, temp3, temp4, temp5;
1927 rtx ti_temp = 0;
1929 if (temp && GET_MODE (temp) == TImode)
1931 ti_temp = temp;
1932 temp = gen_rtx_REG (DImode, REGNO (temp));
1935 /* SPARC-V9 code-model support. */
1936 switch (sparc_cmodel)
1938 case CM_MEDLOW:
1939 /* The range spanned by all instructions in the object is less
1940 than 2^31 bytes (2GB) and the distance from any instruction
1941 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1942 than 2^31 bytes (2GB).
1944 The executable must be in the low 4TB of the virtual address
1945 space.
1947 sethi %hi(symbol), %temp1
1948 or %temp1, %lo(symbol), %reg */
1949 if (temp)
1950 temp1 = temp; /* op0 is allowed. */
1951 else
1952 temp1 = gen_reg_rtx (DImode);
1954 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1955 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1956 break;
1958 case CM_MEDMID:
1959 /* The range spanned by all instructions in the object is less
1960 than 2^31 bytes (2GB) and the distance from any instruction
1961 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1962 than 2^31 bytes (2GB).
1964 The executable must be in the low 16TB of the virtual address
1965 space.
1967 sethi %h44(symbol), %temp1
1968 or %temp1, %m44(symbol), %temp2
1969 sllx %temp2, 12, %temp3
1970 or %temp3, %l44(symbol), %reg */
1971 if (temp)
1973 temp1 = op0;
1974 temp2 = op0;
1975 temp3 = temp; /* op0 is allowed. */
1977 else
1979 temp1 = gen_reg_rtx (DImode);
1980 temp2 = gen_reg_rtx (DImode);
1981 temp3 = gen_reg_rtx (DImode);
1984 emit_insn (gen_seth44 (temp1, op1));
1985 emit_insn (gen_setm44 (temp2, temp1, op1));
1986 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1987 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1988 emit_insn (gen_setl44 (op0, temp3, op1));
1989 break;
1991 case CM_MEDANY:
1992 /* The range spanned by all instructions in the object is less
1993 than 2^31 bytes (2GB) and the distance from any instruction
1994 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1995 than 2^31 bytes (2GB).
1997 The executable can be placed anywhere in the virtual address
1998 space.
2000 sethi %hh(symbol), %temp1
2001 sethi %lm(symbol), %temp2
2002 or %temp1, %hm(symbol), %temp3
2003 sllx %temp3, 32, %temp4
2004 or %temp4, %temp2, %temp5
2005 or %temp5, %lo(symbol), %reg */
2006 if (temp)
2008 /* It is possible that one of the registers we got for operands[2]
2009 might coincide with that of operands[0] (which is why we made
2010 it TImode). Pick the other one to use as our scratch. */
2011 if (rtx_equal_p (temp, op0))
2013 gcc_assert (ti_temp);
2014 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2016 temp1 = op0;
2017 temp2 = temp; /* op0 is _not_ allowed, see above. */
2018 temp3 = op0;
2019 temp4 = op0;
2020 temp5 = op0;
2022 else
2024 temp1 = gen_reg_rtx (DImode);
2025 temp2 = gen_reg_rtx (DImode);
2026 temp3 = gen_reg_rtx (DImode);
2027 temp4 = gen_reg_rtx (DImode);
2028 temp5 = gen_reg_rtx (DImode);
2031 emit_insn (gen_sethh (temp1, op1));
2032 emit_insn (gen_setlm (temp2, op1));
2033 emit_insn (gen_sethm (temp3, temp1, op1));
2034 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2035 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2036 emit_insn (gen_rtx_SET (VOIDmode, temp5,
2037 gen_rtx_PLUS (DImode, temp4, temp2)));
2038 emit_insn (gen_setlo (op0, temp5, op1));
2039 break;
2041 case CM_EMBMEDANY:
2042 /* Old old old backwards compatibility kruft here.
2043 Essentially it is MEDLOW with a fixed 64-bit
2044 virtual base added to all data segment addresses.
2045 Text-segment stuff is computed like MEDANY, we can't
2046 reuse the code above because the relocation knobs
2047 look different.
2049 Data segment: sethi %hi(symbol), %temp1
2050 add %temp1, EMBMEDANY_BASE_REG, %temp2
2051 or %temp2, %lo(symbol), %reg */
2052 if (data_segment_operand (op1, GET_MODE (op1)))
2054 if (temp)
2056 temp1 = temp; /* op0 is allowed. */
2057 temp2 = op0;
2059 else
2061 temp1 = gen_reg_rtx (DImode);
2062 temp2 = gen_reg_rtx (DImode);
2065 emit_insn (gen_embmedany_sethi (temp1, op1));
2066 emit_insn (gen_embmedany_brsum (temp2, temp1));
2067 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2070 /* Text segment: sethi %uhi(symbol), %temp1
2071 sethi %hi(symbol), %temp2
2072 or %temp1, %ulo(symbol), %temp3
2073 sllx %temp3, 32, %temp4
2074 or %temp4, %temp2, %temp5
2075 or %temp5, %lo(symbol), %reg */
2076 else
2078 if (temp)
2080 /* It is possible that one of the registers we got for operands[2]
2081 might coincide with that of operands[0] (which is why we made
2082 it TImode). Pick the other one to use as our scratch. */
2083 if (rtx_equal_p (temp, op0))
2085 gcc_assert (ti_temp);
2086 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2088 temp1 = op0;
2089 temp2 = temp; /* op0 is _not_ allowed, see above. */
2090 temp3 = op0;
2091 temp4 = op0;
2092 temp5 = op0;
2094 else
2096 temp1 = gen_reg_rtx (DImode);
2097 temp2 = gen_reg_rtx (DImode);
2098 temp3 = gen_reg_rtx (DImode);
2099 temp4 = gen_reg_rtx (DImode);
2100 temp5 = gen_reg_rtx (DImode);
2103 emit_insn (gen_embmedany_textuhi (temp1, op1));
2104 emit_insn (gen_embmedany_texthi (temp2, op1));
2105 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2106 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2107 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2108 emit_insn (gen_rtx_SET (VOIDmode, temp5,
2109 gen_rtx_PLUS (DImode, temp4, temp2)));
2110 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2112 break;
2114 default:
2115 gcc_unreachable ();
2119 #if HOST_BITS_PER_WIDE_INT == 32
2120 static void
2121 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
2123 gcc_unreachable ();
2125 #else
2126 /* These avoid problems when cross compiling. If we do not
2127 go through all this hair then the optimizer will see
2128 invalid REG_EQUAL notes or in some cases none at all. */
2129 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2130 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2131 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2132 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2134 /* The optimizer is not to assume anything about exactly
2135 which bits are set for a HIGH, they are unspecified.
2136 Unfortunately this leads to many missed optimizations
2137 during CSE. We mask out the non-HIGH bits, and matches
2138 a plain movdi, to alleviate this problem. */
2139 static rtx
2140 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2142 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2145 static rtx
2146 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2148 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
2151 static rtx
2152 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2154 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2157 static rtx
2158 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2160 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2163 /* Worker routines for 64-bit constant formation on arch64.
2164 One of the key things to be doing in these emissions is
2165 to create as many temp REGs as possible. This makes it
2166 possible for half-built constants to be used later when
2167 such values are similar to something required later on.
2168 Without doing this, the optimizer cannot see such
2169 opportunities. */
2171 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2172 unsigned HOST_WIDE_INT, int);
2174 static void
2175 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2176 unsigned HOST_WIDE_INT low_bits, int is_neg)
2178 unsigned HOST_WIDE_INT high_bits;
2180 if (is_neg)
2181 high_bits = (~low_bits) & 0xffffffff;
2182 else
2183 high_bits = low_bits;
2185 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2186 if (!is_neg)
2188 emit_insn (gen_rtx_SET (VOIDmode, op0,
2189 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2191 else
2193 /* If we are XOR'ing with -1, then we should emit a one's complement
2194 instead. This way the combiner will notice logical operations
2195 such as ANDN later on and substitute. */
2196 if ((low_bits & 0x3ff) == 0x3ff)
2198 emit_insn (gen_rtx_SET (VOIDmode, op0,
2199 gen_rtx_NOT (DImode, temp)));
2201 else
2203 emit_insn (gen_rtx_SET (VOIDmode, op0,
2204 gen_safe_XOR64 (temp,
2205 (-(HOST_WIDE_INT)0x400
2206 | (low_bits & 0x3ff)))));
2211 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2212 unsigned HOST_WIDE_INT, int);
2214 static void
2215 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2216 unsigned HOST_WIDE_INT high_bits,
2217 unsigned HOST_WIDE_INT low_immediate,
2218 int shift_count)
2220 rtx temp2 = op0;
2222 if ((high_bits & 0xfffffc00) != 0)
2224 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2225 if ((high_bits & ~0xfffffc00) != 0)
2226 emit_insn (gen_rtx_SET (VOIDmode, op0,
2227 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2228 else
2229 temp2 = temp;
2231 else
2233 emit_insn (gen_safe_SET64 (temp, high_bits));
2234 temp2 = temp;
2237 /* Now shift it up into place. */
2238 emit_insn (gen_rtx_SET (VOIDmode, op0,
2239 gen_rtx_ASHIFT (DImode, temp2,
2240 GEN_INT (shift_count))));
2242 /* If there is a low immediate part piece, finish up by
2243 putting that in as well. */
2244 if (low_immediate != 0)
2245 emit_insn (gen_rtx_SET (VOIDmode, op0,
2246 gen_safe_OR64 (op0, low_immediate)));
2249 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2250 unsigned HOST_WIDE_INT);
2252 /* Full 64-bit constant decomposition. Even though this is the
2253 'worst' case, we still optimize a few things away. */
2254 static void
2255 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2256 unsigned HOST_WIDE_INT high_bits,
2257 unsigned HOST_WIDE_INT low_bits)
2259 rtx sub_temp = op0;
2261 if (can_create_pseudo_p ())
2262 sub_temp = gen_reg_rtx (DImode);
2264 if ((high_bits & 0xfffffc00) != 0)
2266 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2267 if ((high_bits & ~0xfffffc00) != 0)
2268 emit_insn (gen_rtx_SET (VOIDmode,
2269 sub_temp,
2270 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2271 else
2272 sub_temp = temp;
2274 else
2276 emit_insn (gen_safe_SET64 (temp, high_bits));
2277 sub_temp = temp;
2280 if (can_create_pseudo_p ())
2282 rtx temp2 = gen_reg_rtx (DImode);
2283 rtx temp3 = gen_reg_rtx (DImode);
2284 rtx temp4 = gen_reg_rtx (DImode);
2286 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2287 gen_rtx_ASHIFT (DImode, sub_temp,
2288 GEN_INT (32))));
2290 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2291 if ((low_bits & ~0xfffffc00) != 0)
2293 emit_insn (gen_rtx_SET (VOIDmode, temp3,
2294 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2295 emit_insn (gen_rtx_SET (VOIDmode, op0,
2296 gen_rtx_PLUS (DImode, temp4, temp3)));
2298 else
2300 emit_insn (gen_rtx_SET (VOIDmode, op0,
2301 gen_rtx_PLUS (DImode, temp4, temp2)));
2304 else
2306 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2307 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2308 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2309 int to_shift = 12;
2311 /* We are in the middle of reload, so this is really
2312 painful. However we do still make an attempt to
2313 avoid emitting truly stupid code. */
2314 if (low1 != const0_rtx)
2316 emit_insn (gen_rtx_SET (VOIDmode, op0,
2317 gen_rtx_ASHIFT (DImode, sub_temp,
2318 GEN_INT (to_shift))));
2319 emit_insn (gen_rtx_SET (VOIDmode, op0,
2320 gen_rtx_IOR (DImode, op0, low1)));
2321 sub_temp = op0;
2322 to_shift = 12;
2324 else
2326 to_shift += 12;
2328 if (low2 != const0_rtx)
2330 emit_insn (gen_rtx_SET (VOIDmode, op0,
2331 gen_rtx_ASHIFT (DImode, sub_temp,
2332 GEN_INT (to_shift))));
2333 emit_insn (gen_rtx_SET (VOIDmode, op0,
2334 gen_rtx_IOR (DImode, op0, low2)));
2335 sub_temp = op0;
2336 to_shift = 8;
2338 else
2340 to_shift += 8;
2342 emit_insn (gen_rtx_SET (VOIDmode, op0,
2343 gen_rtx_ASHIFT (DImode, sub_temp,
2344 GEN_INT (to_shift))));
2345 if (low3 != const0_rtx)
2346 emit_insn (gen_rtx_SET (VOIDmode, op0,
2347 gen_rtx_IOR (DImode, op0, low3)));
2348 /* phew... */
2352 /* Analyze a 64-bit constant for certain properties. */
2353 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2354 unsigned HOST_WIDE_INT,
2355 int *, int *, int *);
2357 static void
2358 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2359 unsigned HOST_WIDE_INT low_bits,
2360 int *hbsp, int *lbsp, int *abbasp)
2362 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2363 int i;
2365 lowest_bit_set = highest_bit_set = -1;
2366 i = 0;
2369 if ((lowest_bit_set == -1)
2370 && ((low_bits >> i) & 1))
2371 lowest_bit_set = i;
2372 if ((highest_bit_set == -1)
2373 && ((high_bits >> (32 - i - 1)) & 1))
2374 highest_bit_set = (64 - i - 1);
2376 while (++i < 32
2377 && ((highest_bit_set == -1)
2378 || (lowest_bit_set == -1)));
2379 if (i == 32)
2381 i = 0;
2384 if ((lowest_bit_set == -1)
2385 && ((high_bits >> i) & 1))
2386 lowest_bit_set = i + 32;
2387 if ((highest_bit_set == -1)
2388 && ((low_bits >> (32 - i - 1)) & 1))
2389 highest_bit_set = 32 - i - 1;
2391 while (++i < 32
2392 && ((highest_bit_set == -1)
2393 || (lowest_bit_set == -1)));
2395 /* If there are no bits set this should have gone out
2396 as one instruction! */
2397 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2398 all_bits_between_are_set = 1;
2399 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2401 if (i < 32)
2403 if ((low_bits & (1 << i)) != 0)
2404 continue;
2406 else
2408 if ((high_bits & (1 << (i - 32))) != 0)
2409 continue;
2411 all_bits_between_are_set = 0;
2412 break;
2414 *hbsp = highest_bit_set;
2415 *lbsp = lowest_bit_set;
2416 *abbasp = all_bits_between_are_set;
2419 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2421 static int
2422 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2423 unsigned HOST_WIDE_INT low_bits)
2425 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2427 if (high_bits == 0
2428 || high_bits == 0xffffffff)
2429 return 1;
2431 analyze_64bit_constant (high_bits, low_bits,
2432 &highest_bit_set, &lowest_bit_set,
2433 &all_bits_between_are_set);
2435 if ((highest_bit_set == 63
2436 || lowest_bit_set == 0)
2437 && all_bits_between_are_set != 0)
2438 return 1;
2440 if ((highest_bit_set - lowest_bit_set) < 21)
2441 return 1;
2443 return 0;
2446 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2447 unsigned HOST_WIDE_INT,
2448 int, int);
2450 static unsigned HOST_WIDE_INT
2451 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2452 unsigned HOST_WIDE_INT low_bits,
2453 int lowest_bit_set, int shift)
2455 HOST_WIDE_INT hi, lo;
2457 if (lowest_bit_set < 32)
2459 lo = (low_bits >> lowest_bit_set) << shift;
2460 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2462 else
2464 lo = 0;
2465 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2467 gcc_assert (! (hi & lo));
2468 return (hi | lo);
2471 /* Here we are sure to be arch64 and this is an integer constant
2472 being loaded into a register. Emit the most efficient
2473 insn sequence possible. Detection of all the 1-insn cases
2474 has been done already. */
2475 static void
2476 sparc_emit_set_const64 (rtx op0, rtx op1)
2478 unsigned HOST_WIDE_INT high_bits, low_bits;
2479 int lowest_bit_set, highest_bit_set;
2480 int all_bits_between_are_set;
2481 rtx temp = 0;
2483 /* Sanity check that we know what we are working with. */
2484 gcc_assert (TARGET_ARCH64
2485 && (GET_CODE (op0) == SUBREG
2486 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2488 if (! can_create_pseudo_p ())
2489 temp = op0;
2491 if (GET_CODE (op1) != CONST_INT)
2493 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2494 return;
2497 if (! temp)
2498 temp = gen_reg_rtx (DImode);
2500 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2501 low_bits = (INTVAL (op1) & 0xffffffff);
2503 /* low_bits bits 0 --> 31
2504 high_bits bits 32 --> 63 */
2506 analyze_64bit_constant (high_bits, low_bits,
2507 &highest_bit_set, &lowest_bit_set,
2508 &all_bits_between_are_set);
2510 /* First try for a 2-insn sequence. */
2512 /* These situations are preferred because the optimizer can
2513 * do more things with them:
2514 * 1) mov -1, %reg
2515 * sllx %reg, shift, %reg
2516 * 2) mov -1, %reg
2517 * srlx %reg, shift, %reg
2518 * 3) mov some_small_const, %reg
2519 * sllx %reg, shift, %reg
2521 if (((highest_bit_set == 63
2522 || lowest_bit_set == 0)
2523 && all_bits_between_are_set != 0)
2524 || ((highest_bit_set - lowest_bit_set) < 12))
2526 HOST_WIDE_INT the_const = -1;
2527 int shift = lowest_bit_set;
2529 if ((highest_bit_set != 63
2530 && lowest_bit_set != 0)
2531 || all_bits_between_are_set == 0)
2533 the_const =
2534 create_simple_focus_bits (high_bits, low_bits,
2535 lowest_bit_set, 0);
2537 else if (lowest_bit_set == 0)
2538 shift = -(63 - highest_bit_set);
2540 gcc_assert (SPARC_SIMM13_P (the_const));
2541 gcc_assert (shift != 0);
2543 emit_insn (gen_safe_SET64 (temp, the_const));
2544 if (shift > 0)
2545 emit_insn (gen_rtx_SET (VOIDmode,
2546 op0,
2547 gen_rtx_ASHIFT (DImode,
2548 temp,
2549 GEN_INT (shift))));
2550 else if (shift < 0)
2551 emit_insn (gen_rtx_SET (VOIDmode,
2552 op0,
2553 gen_rtx_LSHIFTRT (DImode,
2554 temp,
2555 GEN_INT (-shift))));
2556 return;
2559 /* Now a range of 22 or less bits set somewhere.
2560 * 1) sethi %hi(focus_bits), %reg
2561 * sllx %reg, shift, %reg
2562 * 2) sethi %hi(focus_bits), %reg
2563 * srlx %reg, shift, %reg
2565 if ((highest_bit_set - lowest_bit_set) < 21)
2567 unsigned HOST_WIDE_INT focus_bits =
2568 create_simple_focus_bits (high_bits, low_bits,
2569 lowest_bit_set, 10);
2571 gcc_assert (SPARC_SETHI_P (focus_bits));
2572 gcc_assert (lowest_bit_set != 10);
2574 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2576 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2577 if (lowest_bit_set < 10)
2578 emit_insn (gen_rtx_SET (VOIDmode,
2579 op0,
2580 gen_rtx_LSHIFTRT (DImode, temp,
2581 GEN_INT (10 - lowest_bit_set))));
2582 else if (lowest_bit_set > 10)
2583 emit_insn (gen_rtx_SET (VOIDmode,
2584 op0,
2585 gen_rtx_ASHIFT (DImode, temp,
2586 GEN_INT (lowest_bit_set - 10))));
2587 return;
2590 /* 1) sethi %hi(low_bits), %reg
2591 * or %reg, %lo(low_bits), %reg
2592 * 2) sethi %hi(~low_bits), %reg
2593 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2595 if (high_bits == 0
2596 || high_bits == 0xffffffff)
2598 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2599 (high_bits == 0xffffffff));
2600 return;
2603 /* Now, try 3-insn sequences. */
2605 /* 1) sethi %hi(high_bits), %reg
2606 * or %reg, %lo(high_bits), %reg
2607 * sllx %reg, 32, %reg
2609 if (low_bits == 0)
2611 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2612 return;
2615 /* We may be able to do something quick
2616 when the constant is negated, so try that. */
2617 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2618 (~low_bits) & 0xfffffc00))
2620 /* NOTE: The trailing bits get XOR'd so we need the
2621 non-negated bits, not the negated ones. */
2622 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2624 if ((((~high_bits) & 0xffffffff) == 0
2625 && ((~low_bits) & 0x80000000) == 0)
2626 || (((~high_bits) & 0xffffffff) == 0xffffffff
2627 && ((~low_bits) & 0x80000000) != 0))
2629 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2631 if ((SPARC_SETHI_P (fast_int)
2632 && (~high_bits & 0xffffffff) == 0)
2633 || SPARC_SIMM13_P (fast_int))
2634 emit_insn (gen_safe_SET64 (temp, fast_int));
2635 else
2636 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2638 else
2640 rtx negated_const;
2641 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2642 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2643 sparc_emit_set_const64 (temp, negated_const);
2646 /* If we are XOR'ing with -1, then we should emit a one's complement
2647 instead. This way the combiner will notice logical operations
2648 such as ANDN later on and substitute. */
2649 if (trailing_bits == 0x3ff)
2651 emit_insn (gen_rtx_SET (VOIDmode, op0,
2652 gen_rtx_NOT (DImode, temp)));
2654 else
2656 emit_insn (gen_rtx_SET (VOIDmode,
2657 op0,
2658 gen_safe_XOR64 (temp,
2659 (-0x400 | trailing_bits))));
2661 return;
2664 /* 1) sethi %hi(xxx), %reg
2665 * or %reg, %lo(xxx), %reg
2666 * sllx %reg, yyy, %reg
2668 * ??? This is just a generalized version of the low_bits==0
2669 * thing above, FIXME...
2671 if ((highest_bit_set - lowest_bit_set) < 32)
2673 unsigned HOST_WIDE_INT focus_bits =
2674 create_simple_focus_bits (high_bits, low_bits,
2675 lowest_bit_set, 0);
2677 /* We can't get here in this state. */
2678 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2680 /* So what we know is that the set bits straddle the
2681 middle of the 64-bit word. */
2682 sparc_emit_set_const64_quick2 (op0, temp,
2683 focus_bits, 0,
2684 lowest_bit_set);
2685 return;
2688 /* 1) sethi %hi(high_bits), %reg
2689 * or %reg, %lo(high_bits), %reg
2690 * sllx %reg, 32, %reg
2691 * or %reg, low_bits, %reg
2693 if (SPARC_SIMM13_P(low_bits)
2694 && ((int)low_bits > 0))
2696 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2697 return;
2700 /* The easiest way when all else fails, is full decomposition. */
2701 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2703 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
2705 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2706 return the mode to be used for the comparison. For floating-point,
2707 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
2708 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2709 processing is needed. */
2711 enum machine_mode
2712 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2714 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2716 switch (op)
2718 case EQ:
2719 case NE:
2720 case UNORDERED:
2721 case ORDERED:
2722 case UNLT:
2723 case UNLE:
2724 case UNGT:
2725 case UNGE:
2726 case UNEQ:
2727 case LTGT:
2728 return CCFPmode;
2730 case LT:
2731 case LE:
2732 case GT:
2733 case GE:
2734 return CCFPEmode;
2736 default:
2737 gcc_unreachable ();
2740 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2741 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2743 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2744 return CCX_NOOVmode;
2745 else
2746 return CC_NOOVmode;
2748 else
2750 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2751 return CCXmode;
2752 else
2753 return CCmode;
2757 /* Emit the compare insn and return the CC reg for a CODE comparison
2758 with operands X and Y. */
2760 static rtx
2761 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2763 enum machine_mode mode;
2764 rtx cc_reg;
2766 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2767 return x;
2769 mode = SELECT_CC_MODE (code, x, y);
2771 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2772 fcc regs (cse can't tell they're really call clobbered regs and will
2773 remove a duplicate comparison even if there is an intervening function
2774 call - it will then try to reload the cc reg via an int reg which is why
2775 we need the movcc patterns). It is possible to provide the movcc
2776 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2777 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2778 to tell cse that CCFPE mode registers (even pseudos) are call
2779 clobbered. */
2781 /* ??? This is an experiment. Rather than making changes to cse which may
2782 or may not be easy/clean, we do our own cse. This is possible because
2783 we will generate hard registers. Cse knows they're call clobbered (it
2784 doesn't know the same thing about pseudos). If we guess wrong, no big
2785 deal, but if we win, great! */
2787 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2788 #if 1 /* experiment */
2790 int reg;
2791 /* We cycle through the registers to ensure they're all exercised. */
2792 static int next_fcc_reg = 0;
2793 /* Previous x,y for each fcc reg. */
2794 static rtx prev_args[4][2];
2796 /* Scan prev_args for x,y. */
2797 for (reg = 0; reg < 4; reg++)
2798 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2799 break;
2800 if (reg == 4)
2802 reg = next_fcc_reg;
2803 prev_args[reg][0] = x;
2804 prev_args[reg][1] = y;
2805 next_fcc_reg = (next_fcc_reg + 1) & 3;
2807 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2809 #else
2810 cc_reg = gen_reg_rtx (mode);
2811 #endif /* ! experiment */
2812 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2813 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2814 else
2815 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2817 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2818 will only result in an unrecognizable insn so no point in asserting. */
2819 emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2821 return cc_reg;
2825 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2828 gen_compare_reg (rtx cmp)
2830 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2833 /* This function is used for v9 only.
2834 DEST is the target of the Scc insn.
2835 CODE is the code for an Scc's comparison.
2836 X and Y are the values we compare.
2838 This function is needed to turn
2840 (set (reg:SI 110)
2841 (gt (reg:CCX 100 %icc)
2842 (const_int 0)))
2843 into
2844 (set (reg:SI 110)
2845 (gt:DI (reg:CCX 100 %icc)
2846 (const_int 0)))
2848 IE: The instruction recognizer needs to see the mode of the comparison to
2849 find the right instruction. We could use "gt:DI" right in the
2850 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2852 static int
2853 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2855 if (! TARGET_ARCH64
2856 && (GET_MODE (x) == DImode
2857 || GET_MODE (dest) == DImode))
2858 return 0;
2860 /* Try to use the movrCC insns. */
2861 if (TARGET_ARCH64
2862 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2863 && y == const0_rtx
2864 && v9_regcmp_p (compare_code))
2866 rtx op0 = x;
2867 rtx temp;
2869 /* Special case for op0 != 0. This can be done with one instruction if
2870 dest == x. */
2872 if (compare_code == NE
2873 && GET_MODE (dest) == DImode
2874 && rtx_equal_p (op0, dest))
2876 emit_insn (gen_rtx_SET (VOIDmode, dest,
2877 gen_rtx_IF_THEN_ELSE (DImode,
2878 gen_rtx_fmt_ee (compare_code, DImode,
2879 op0, const0_rtx),
2880 const1_rtx,
2881 dest)));
2882 return 1;
2885 if (reg_overlap_mentioned_p (dest, op0))
2887 /* Handle the case where dest == x.
2888 We "early clobber" the result. */
2889 op0 = gen_reg_rtx (GET_MODE (x));
2890 emit_move_insn (op0, x);
2893 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2894 if (GET_MODE (op0) != DImode)
2896 temp = gen_reg_rtx (DImode);
2897 convert_move (temp, op0, 0);
2899 else
2900 temp = op0;
2901 emit_insn (gen_rtx_SET (VOIDmode, dest,
2902 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2903 gen_rtx_fmt_ee (compare_code, DImode,
2904 temp, const0_rtx),
2905 const1_rtx,
2906 dest)));
2907 return 1;
2909 else
2911 x = gen_compare_reg_1 (compare_code, x, y);
2912 y = const0_rtx;
2914 gcc_assert (GET_MODE (x) != CC_NOOVmode
2915 && GET_MODE (x) != CCX_NOOVmode);
2917 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2918 emit_insn (gen_rtx_SET (VOIDmode, dest,
2919 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2920 gen_rtx_fmt_ee (compare_code,
2921 GET_MODE (x), x, y),
2922 const1_rtx, dest)));
2923 return 1;
2928 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2929 without jumps using the addx/subx instructions. */
2931 bool
2932 emit_scc_insn (rtx operands[])
2934 rtx tem;
2935 rtx x;
2936 rtx y;
2937 enum rtx_code code;
2939 /* The quad-word fp compare library routines all return nonzero to indicate
2940 true, which is different from the equivalent libgcc routines, so we must
2941 handle them specially here. */
2942 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2944 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2945 GET_CODE (operands[1]));
2946 operands[2] = XEXP (operands[1], 0);
2947 operands[3] = XEXP (operands[1], 1);
2950 code = GET_CODE (operands[1]);
2951 x = operands[2];
2952 y = operands[3];
2954 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2955 more applications). The exception to this is "reg != 0" which can
2956 be done in one instruction on v9 (so we do it). */
2957 if (code == EQ)
2959 if (GET_MODE (x) == SImode)
2961 rtx pat;
2962 if (TARGET_ARCH64)
2963 pat = gen_seqsidi_special (operands[0], x, y);
2964 else
2965 pat = gen_seqsisi_special (operands[0], x, y);
2966 emit_insn (pat);
2967 return true;
2969 else if (GET_MODE (x) == DImode)
2971 rtx pat = gen_seqdi_special (operands[0], x, y);
2972 emit_insn (pat);
2973 return true;
2977 if (code == NE)
2979 if (GET_MODE (x) == SImode)
2981 rtx pat;
2982 if (TARGET_ARCH64)
2983 pat = gen_snesidi_special (operands[0], x, y);
2984 else
2985 pat = gen_snesisi_special (operands[0], x, y);
2986 emit_insn (pat);
2987 return true;
2989 else if (GET_MODE (x) == DImode)
2991 rtx pat;
2992 if (TARGET_VIS3)
2993 pat = gen_snedi_special_vis3 (operands[0], x, y);
2994 else
2995 pat = gen_snedi_special (operands[0], x, y);
2996 emit_insn (pat);
2997 return true;
3001 if (TARGET_V9
3002 && TARGET_ARCH64
3003 && GET_MODE (x) == DImode
3004 && !(TARGET_VIS3
3005 && (code == GTU || code == LTU))
3006 && gen_v9_scc (operands[0], code, x, y))
3007 return true;
3009 /* We can do LTU and GEU using the addx/subx instructions too. And
3010 for GTU/LEU, if both operands are registers swap them and fall
3011 back to the easy case. */
3012 if (code == GTU || code == LEU)
3014 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3015 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3017 tem = x;
3018 x = y;
3019 y = tem;
3020 code = swap_condition (code);
3024 if (code == LTU
3025 || (!TARGET_VIS3 && code == GEU))
3027 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3028 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3029 gen_compare_reg_1 (code, x, y),
3030 const0_rtx)));
3031 return true;
3034 /* All the posibilities to use addx/subx based sequences has been
3035 exhausted, try for a 3 instruction sequence using v9 conditional
3036 moves. */
3037 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3038 return true;
3040 /* Nope, do branches. */
3041 return false;
3044 /* Emit a conditional jump insn for the v9 architecture using comparison code
3045 CODE and jump target LABEL.
3046 This function exists to take advantage of the v9 brxx insns. */
3048 static void
3049 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3051 emit_jump_insn (gen_rtx_SET (VOIDmode,
3052 pc_rtx,
3053 gen_rtx_IF_THEN_ELSE (VOIDmode,
3054 gen_rtx_fmt_ee (code, GET_MODE (op0),
3055 op0, const0_rtx),
3056 gen_rtx_LABEL_REF (VOIDmode, label),
3057 pc_rtx)));
3060 /* Emit a conditional jump insn for the UA2011 architecture using
3061 comparison code CODE and jump target LABEL. This function exists
3062 to take advantage of the UA2011 Compare and Branch insns. */
3064 static void
3065 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3067 rtx if_then_else;
3069 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3070 gen_rtx_fmt_ee(code, GET_MODE(op0),
3071 op0, op1),
3072 gen_rtx_LABEL_REF (VOIDmode, label),
3073 pc_rtx);
3075 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, if_then_else));
3078 void
3079 emit_conditional_branch_insn (rtx operands[])
3081 /* The quad-word fp compare library routines all return nonzero to indicate
3082 true, which is different from the equivalent libgcc routines, so we must
3083 handle them specially here. */
3084 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3086 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3087 GET_CODE (operands[0]));
3088 operands[1] = XEXP (operands[0], 0);
3089 operands[2] = XEXP (operands[0], 1);
3092 /* If we can tell early on that the comparison is against a constant
3093 that won't fit in the 5-bit signed immediate field of a cbcond,
3094 use one of the other v9 conditional branch sequences. */
3095 if (TARGET_CBCOND
3096 && GET_CODE (operands[1]) == REG
3097 && (GET_MODE (operands[1]) == SImode
3098 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3099 && (GET_CODE (operands[2]) != CONST_INT
3100 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3102 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3103 return;
3106 if (TARGET_ARCH64 && operands[2] == const0_rtx
3107 && GET_CODE (operands[1]) == REG
3108 && GET_MODE (operands[1]) == DImode)
3110 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3111 return;
3114 operands[1] = gen_compare_reg (operands[0]);
3115 operands[2] = const0_rtx;
3116 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3117 operands[1], operands[2]);
3118 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3119 operands[3]));
3123 /* Generate a DFmode part of a hard TFmode register.
3124 REG is the TFmode hard register, LOW is 1 for the
3125 low 64bit of the register and 0 otherwise.
3128 gen_df_reg (rtx reg, int low)
3130 int regno = REGNO (reg);
3132 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3133 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3134 return gen_rtx_REG (DFmode, regno);
3137 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3138 Unlike normal calls, TFmode operands are passed by reference. It is
3139 assumed that no more than 3 operands are required. */
3141 static void
3142 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3144 rtx ret_slot = NULL, arg[3], func_sym;
3145 int i;
3147 /* We only expect to be called for conversions, unary, and binary ops. */
3148 gcc_assert (nargs == 2 || nargs == 3);
3150 for (i = 0; i < nargs; ++i)
3152 rtx this_arg = operands[i];
3153 rtx this_slot;
3155 /* TFmode arguments and return values are passed by reference. */
3156 if (GET_MODE (this_arg) == TFmode)
3158 int force_stack_temp;
3160 force_stack_temp = 0;
3161 if (TARGET_BUGGY_QP_LIB && i == 0)
3162 force_stack_temp = 1;
3164 if (GET_CODE (this_arg) == MEM
3165 && ! force_stack_temp)
3167 tree expr = MEM_EXPR (this_arg);
3168 if (expr)
3169 mark_addressable (expr);
3170 this_arg = XEXP (this_arg, 0);
3172 else if (CONSTANT_P (this_arg)
3173 && ! force_stack_temp)
3175 this_slot = force_const_mem (TFmode, this_arg);
3176 this_arg = XEXP (this_slot, 0);
3178 else
3180 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3182 /* Operand 0 is the return value. We'll copy it out later. */
3183 if (i > 0)
3184 emit_move_insn (this_slot, this_arg);
3185 else
3186 ret_slot = this_slot;
3188 this_arg = XEXP (this_slot, 0);
3192 arg[i] = this_arg;
3195 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3197 if (GET_MODE (operands[0]) == TFmode)
3199 if (nargs == 2)
3200 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3201 arg[0], GET_MODE (arg[0]),
3202 arg[1], GET_MODE (arg[1]));
3203 else
3204 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3205 arg[0], GET_MODE (arg[0]),
3206 arg[1], GET_MODE (arg[1]),
3207 arg[2], GET_MODE (arg[2]));
3209 if (ret_slot)
3210 emit_move_insn (operands[0], ret_slot);
3212 else
3214 rtx ret;
3216 gcc_assert (nargs == 2);
3218 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3219 GET_MODE (operands[0]), 1,
3220 arg[1], GET_MODE (arg[1]));
3222 if (ret != operands[0])
3223 emit_move_insn (operands[0], ret);
3227 /* Expand soft-float TFmode calls to sparc abi routines. */
3229 static void
3230 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3232 const char *func;
3234 switch (code)
3236 case PLUS:
3237 func = "_Qp_add";
3238 break;
3239 case MINUS:
3240 func = "_Qp_sub";
3241 break;
3242 case MULT:
3243 func = "_Qp_mul";
3244 break;
3245 case DIV:
3246 func = "_Qp_div";
3247 break;
3248 default:
3249 gcc_unreachable ();
3252 emit_soft_tfmode_libcall (func, 3, operands);
3255 static void
3256 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3258 const char *func;
3260 gcc_assert (code == SQRT);
3261 func = "_Qp_sqrt";
3263 emit_soft_tfmode_libcall (func, 2, operands);
3266 static void
3267 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3269 const char *func;
3271 switch (code)
3273 case FLOAT_EXTEND:
3274 switch (GET_MODE (operands[1]))
3276 case SFmode:
3277 func = "_Qp_stoq";
3278 break;
3279 case DFmode:
3280 func = "_Qp_dtoq";
3281 break;
3282 default:
3283 gcc_unreachable ();
3285 break;
3287 case FLOAT_TRUNCATE:
3288 switch (GET_MODE (operands[0]))
3290 case SFmode:
3291 func = "_Qp_qtos";
3292 break;
3293 case DFmode:
3294 func = "_Qp_qtod";
3295 break;
3296 default:
3297 gcc_unreachable ();
3299 break;
3301 case FLOAT:
3302 switch (GET_MODE (operands[1]))
3304 case SImode:
3305 func = "_Qp_itoq";
3306 if (TARGET_ARCH64)
3307 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3308 break;
3309 case DImode:
3310 func = "_Qp_xtoq";
3311 break;
3312 default:
3313 gcc_unreachable ();
3315 break;
3317 case UNSIGNED_FLOAT:
3318 switch (GET_MODE (operands[1]))
3320 case SImode:
3321 func = "_Qp_uitoq";
3322 if (TARGET_ARCH64)
3323 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3324 break;
3325 case DImode:
3326 func = "_Qp_uxtoq";
3327 break;
3328 default:
3329 gcc_unreachable ();
3331 break;
3333 case FIX:
3334 switch (GET_MODE (operands[0]))
3336 case SImode:
3337 func = "_Qp_qtoi";
3338 break;
3339 case DImode:
3340 func = "_Qp_qtox";
3341 break;
3342 default:
3343 gcc_unreachable ();
3345 break;
3347 case UNSIGNED_FIX:
3348 switch (GET_MODE (operands[0]))
3350 case SImode:
3351 func = "_Qp_qtoui";
3352 break;
3353 case DImode:
3354 func = "_Qp_qtoux";
3355 break;
3356 default:
3357 gcc_unreachable ();
3359 break;
3361 default:
3362 gcc_unreachable ();
3365 emit_soft_tfmode_libcall (func, 2, operands);
3368 /* Expand a hard-float tfmode operation. All arguments must be in
3369 registers. */
3371 static void
3372 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3374 rtx op, dest;
3376 if (GET_RTX_CLASS (code) == RTX_UNARY)
3378 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3379 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3381 else
3383 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3384 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3385 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3386 operands[1], operands[2]);
3389 if (register_operand (operands[0], VOIDmode))
3390 dest = operands[0];
3391 else
3392 dest = gen_reg_rtx (GET_MODE (operands[0]));
3394 emit_insn (gen_rtx_SET (VOIDmode, dest, op));
3396 if (dest != operands[0])
3397 emit_move_insn (operands[0], dest);
3400 void
3401 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3403 if (TARGET_HARD_QUAD)
3404 emit_hard_tfmode_operation (code, operands);
3405 else
3406 emit_soft_tfmode_binop (code, operands);
3409 void
3410 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3412 if (TARGET_HARD_QUAD)
3413 emit_hard_tfmode_operation (code, operands);
3414 else
3415 emit_soft_tfmode_unop (code, operands);
3418 void
3419 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3421 if (TARGET_HARD_QUAD)
3422 emit_hard_tfmode_operation (code, operands);
3423 else
3424 emit_soft_tfmode_cvt (code, operands);
3427 /* Return nonzero if a branch/jump/call instruction will be emitting
3428 nop into its delay slot. */
3431 empty_delay_slot (rtx insn)
3433 rtx seq;
3435 /* If no previous instruction (should not happen), return true. */
3436 if (PREV_INSN (insn) == NULL)
3437 return 1;
3439 seq = NEXT_INSN (PREV_INSN (insn));
3440 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3441 return 0;
3443 return 1;
3446 /* Return nonzero if we should emit a nop after a cbcond instruction.
3447 The cbcond instruction does not have a delay slot, however there is
3448 a severe performance penalty if a control transfer appears right
3449 after a cbcond. Therefore we emit a nop when we detect this
3450 situation. */
3453 emit_cbcond_nop (rtx insn)
3455 rtx next = next_active_insn (insn);
3457 if (!next)
3458 return 1;
3460 if (NONJUMP_INSN_P (next)
3461 && GET_CODE (PATTERN (next)) == SEQUENCE)
3462 next = XVECEXP (PATTERN (next), 0, 0);
3463 else if (CALL_P (next)
3464 && GET_CODE (PATTERN (next)) == PARALLEL)
3466 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3468 if (GET_CODE (delay) == RETURN)
3470 /* It's a sibling call. Do not emit the nop if we're going
3471 to emit something other than the jump itself as the first
3472 instruction of the sibcall sequence. */
3473 if (sparc_leaf_function_p || TARGET_FLAT)
3474 return 0;
3478 if (NONJUMP_INSN_P (next))
3479 return 0;
3481 return 1;
3484 /* Return nonzero if TRIAL can go into the call delay slot. */
3487 eligible_for_call_delay (rtx trial)
3489 rtx pat;
3491 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3492 return 0;
3494 /* Binutils allows
3495 call __tls_get_addr, %tgd_call (foo)
3496 add %l7, %o0, %o0, %tgd_add (foo)
3497 while Sun as/ld does not. */
3498 if (TARGET_GNU_TLS || !TARGET_TLS)
3499 return 1;
3501 pat = PATTERN (trial);
3503 /* We must reject tgd_add{32|64}, i.e.
3504 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3505 and tldm_add{32|64}, i.e.
3506 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3507 for Sun as/ld. */
3508 if (GET_CODE (pat) == SET
3509 && GET_CODE (SET_SRC (pat)) == PLUS)
3511 rtx unspec = XEXP (SET_SRC (pat), 1);
3513 if (GET_CODE (unspec) == UNSPEC
3514 && (XINT (unspec, 1) == UNSPEC_TLSGD
3515 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3516 return 0;
3519 return 1;
3522 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3523 instruction. RETURN_P is true if the v9 variant 'return' is to be
3524 considered in the test too.
3526 TRIAL must be a SET whose destination is a REG appropriate for the
3527 'restore' instruction or, if RETURN_P is true, for the 'return'
3528 instruction. */
3530 static int
3531 eligible_for_restore_insn (rtx trial, bool return_p)
3533 rtx pat = PATTERN (trial);
3534 rtx src = SET_SRC (pat);
3535 bool src_is_freg = false;
3536 rtx src_reg;
3538 /* Since we now can do moves between float and integer registers when
3539 VIS3 is enabled, we have to catch this case. We can allow such
3540 moves when doing a 'return' however. */
3541 src_reg = src;
3542 if (GET_CODE (src_reg) == SUBREG)
3543 src_reg = SUBREG_REG (src_reg);
3544 if (GET_CODE (src_reg) == REG
3545 && SPARC_FP_REG_P (REGNO (src_reg)))
3546 src_is_freg = true;
3548 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3549 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3550 && arith_operand (src, GET_MODE (src))
3551 && ! src_is_freg)
3553 if (TARGET_ARCH64)
3554 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3555 else
3556 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3559 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3560 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3561 && arith_double_operand (src, GET_MODE (src))
3562 && ! src_is_freg)
3563 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3565 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3566 else if (! TARGET_FPU && register_operand (src, SFmode))
3567 return 1;
3569 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3570 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3571 return 1;
3573 /* If we have the 'return' instruction, anything that does not use
3574 local or output registers and can go into a delay slot wins. */
3575 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3576 return 1;
3578 /* The 'restore src1,src2,dest' pattern for SImode. */
3579 else if (GET_CODE (src) == PLUS
3580 && register_operand (XEXP (src, 0), SImode)
3581 && arith_operand (XEXP (src, 1), SImode))
3582 return 1;
3584 /* The 'restore src1,src2,dest' pattern for DImode. */
3585 else if (GET_CODE (src) == PLUS
3586 && register_operand (XEXP (src, 0), DImode)
3587 && arith_double_operand (XEXP (src, 1), DImode))
3588 return 1;
3590 /* The 'restore src1,%lo(src2),dest' pattern. */
3591 else if (GET_CODE (src) == LO_SUM
3592 && ! TARGET_CM_MEDMID
3593 && ((register_operand (XEXP (src, 0), SImode)
3594 && immediate_operand (XEXP (src, 1), SImode))
3595 || (TARGET_ARCH64
3596 && register_operand (XEXP (src, 0), DImode)
3597 && immediate_operand (XEXP (src, 1), DImode))))
3598 return 1;
3600 /* The 'restore src,src,dest' pattern. */
3601 else if (GET_CODE (src) == ASHIFT
3602 && (register_operand (XEXP (src, 0), SImode)
3603 || register_operand (XEXP (src, 0), DImode))
3604 && XEXP (src, 1) == const1_rtx)
3605 return 1;
3607 return 0;
3610 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3613 eligible_for_return_delay (rtx trial)
3615 int regno;
3616 rtx pat;
3618 /* If the function uses __builtin_eh_return, the eh_return machinery
3619 occupies the delay slot. */
3620 if (crtl->calls_eh_return)
3621 return 0;
3623 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3624 return 0;
3626 /* In the case of a leaf or flat function, anything can go into the slot. */
3627 if (sparc_leaf_function_p || TARGET_FLAT)
3628 return 1;
3630 if (!NONJUMP_INSN_P (trial))
3631 return 0;
3633 pat = PATTERN (trial);
3634 if (GET_CODE (pat) == PARALLEL)
3636 int i;
3638 if (! TARGET_V9)
3639 return 0;
3640 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3642 rtx expr = XVECEXP (pat, 0, i);
3643 if (GET_CODE (expr) != SET)
3644 return 0;
3645 if (GET_CODE (SET_DEST (expr)) != REG)
3646 return 0;
3647 regno = REGNO (SET_DEST (expr));
3648 if (regno >= 8 && regno < 24)
3649 return 0;
3651 return !epilogue_renumber (&pat, 1);
3654 if (GET_CODE (pat) != SET)
3655 return 0;
3657 if (GET_CODE (SET_DEST (pat)) != REG)
3658 return 0;
3660 regno = REGNO (SET_DEST (pat));
3662 /* Otherwise, only operations which can be done in tandem with
3663 a `restore' or `return' insn can go into the delay slot. */
3664 if (regno >= 8 && regno < 24)
3665 return 0;
3667 /* If this instruction sets up floating point register and we have a return
3668 instruction, it can probably go in. But restore will not work
3669 with FP_REGS. */
3670 if (! SPARC_INT_REG_P (regno))
3671 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3673 return eligible_for_restore_insn (trial, true);
3676 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3679 eligible_for_sibcall_delay (rtx trial)
3681 rtx pat;
3683 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3684 return 0;
3686 if (!NONJUMP_INSN_P (trial))
3687 return 0;
3689 pat = PATTERN (trial);
3691 if (sparc_leaf_function_p || TARGET_FLAT)
3693 /* If the tail call is done using the call instruction,
3694 we have to restore %o7 in the delay slot. */
3695 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3696 return 0;
3698 /* %g1 is used to build the function address */
3699 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3700 return 0;
3702 return 1;
3705 if (GET_CODE (pat) != SET)
3706 return 0;
3708 /* Otherwise, only operations which can be done in tandem with
3709 a `restore' insn can go into the delay slot. */
3710 if (GET_CODE (SET_DEST (pat)) != REG
3711 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3712 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3713 return 0;
3715 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3716 in most cases. */
3717 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3718 return 0;
3720 return eligible_for_restore_insn (trial, false);
3723 /* Determine if it's legal to put X into the constant pool. This
3724 is not possible if X contains the address of a symbol that is
3725 not constant (TLS) or not known at final link time (PIC). */
3727 static bool
3728 sparc_cannot_force_const_mem (enum machine_mode mode, rtx x)
3730 switch (GET_CODE (x))
3732 case CONST_INT:
3733 case CONST_DOUBLE:
3734 case CONST_VECTOR:
3735 /* Accept all non-symbolic constants. */
3736 return false;
3738 case LABEL_REF:
3739 /* Labels are OK iff we are non-PIC. */
3740 return flag_pic != 0;
3742 case SYMBOL_REF:
3743 /* 'Naked' TLS symbol references are never OK,
3744 non-TLS symbols are OK iff we are non-PIC. */
3745 if (SYMBOL_REF_TLS_MODEL (x))
3746 return true;
3747 else
3748 return flag_pic != 0;
3750 case CONST:
3751 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3752 case PLUS:
3753 case MINUS:
3754 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3755 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3756 case UNSPEC:
3757 return true;
3758 default:
3759 gcc_unreachable ();
3763 /* Global Offset Table support. */
3764 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3765 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3767 /* Return the SYMBOL_REF for the Global Offset Table. */
3769 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3771 static rtx
3772 sparc_got (void)
3774 if (!sparc_got_symbol)
3775 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3777 return sparc_got_symbol;
3780 /* Ensure that we are not using patterns that are not OK with PIC. */
3783 check_pic (int i)
3785 rtx op;
3787 switch (flag_pic)
3789 case 1:
3790 op = recog_data.operand[i];
3791 gcc_assert (GET_CODE (op) != SYMBOL_REF
3792 && (GET_CODE (op) != CONST
3793 || (GET_CODE (XEXP (op, 0)) == MINUS
3794 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3795 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3796 case 2:
3797 default:
3798 return 1;
3802 /* Return true if X is an address which needs a temporary register when
3803 reloaded while generating PIC code. */
3806 pic_address_needs_scratch (rtx x)
3808 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3809 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3810 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3811 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3812 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3813 return 1;
3815 return 0;
3818 /* Determine if a given RTX is a valid constant. We already know this
3819 satisfies CONSTANT_P. */
3821 static bool
3822 sparc_legitimate_constant_p (enum machine_mode mode, rtx x)
3824 switch (GET_CODE (x))
3826 case CONST:
3827 case SYMBOL_REF:
3828 if (sparc_tls_referenced_p (x))
3829 return false;
3830 break;
3832 case CONST_DOUBLE:
3833 if (GET_MODE (x) == VOIDmode)
3834 return true;
3836 /* Floating point constants are generally not ok.
3837 The only exception is 0.0 and all-ones in VIS. */
3838 if (TARGET_VIS
3839 && SCALAR_FLOAT_MODE_P (mode)
3840 && (const_zero_operand (x, mode)
3841 || const_all_ones_operand (x, mode)))
3842 return true;
3844 return false;
3846 case CONST_VECTOR:
3847 /* Vector constants are generally not ok.
3848 The only exception is 0 or -1 in VIS. */
3849 if (TARGET_VIS
3850 && (const_zero_operand (x, mode)
3851 || const_all_ones_operand (x, mode)))
3852 return true;
3854 return false;
3856 default:
3857 break;
3860 return true;
3863 /* Determine if a given RTX is a valid constant address. */
3865 bool
3866 constant_address_p (rtx x)
3868 switch (GET_CODE (x))
3870 case LABEL_REF:
3871 case CONST_INT:
3872 case HIGH:
3873 return true;
3875 case CONST:
3876 if (flag_pic && pic_address_needs_scratch (x))
3877 return false;
3878 return sparc_legitimate_constant_p (Pmode, x);
3880 case SYMBOL_REF:
3881 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3883 default:
3884 return false;
3888 /* Nonzero if the constant value X is a legitimate general operand
3889 when generating PIC code. It is given that flag_pic is on and
3890 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
3892 bool
3893 legitimate_pic_operand_p (rtx x)
3895 if (pic_address_needs_scratch (x))
3896 return false;
3897 if (sparc_tls_referenced_p (x))
3898 return false;
3899 return true;
3902 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3903 (CONST_INT_P (X) \
3904 && INTVAL (X) >= -0x1000 \
3905 && INTVAL (X) < (0x1000 - GET_MODE_SIZE (MODE)))
3907 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3908 (CONST_INT_P (X) \
3909 && INTVAL (X) >= -0x1000 \
3910 && INTVAL (X) < (0xc00 - GET_MODE_SIZE (MODE)))
3912 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3914 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3915 ordinarily. This changes a bit when generating PIC. */
3917 static bool
3918 sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3920 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3922 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3923 rs1 = addr;
3924 else if (GET_CODE (addr) == PLUS)
3926 rs1 = XEXP (addr, 0);
3927 rs2 = XEXP (addr, 1);
3929 /* Canonicalize. REG comes first, if there are no regs,
3930 LO_SUM comes first. */
3931 if (!REG_P (rs1)
3932 && GET_CODE (rs1) != SUBREG
3933 && (REG_P (rs2)
3934 || GET_CODE (rs2) == SUBREG
3935 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3937 rs1 = XEXP (addr, 1);
3938 rs2 = XEXP (addr, 0);
3941 if ((flag_pic == 1
3942 && rs1 == pic_offset_table_rtx
3943 && !REG_P (rs2)
3944 && GET_CODE (rs2) != SUBREG
3945 && GET_CODE (rs2) != LO_SUM
3946 && GET_CODE (rs2) != MEM
3947 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3948 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3949 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3950 || ((REG_P (rs1)
3951 || GET_CODE (rs1) == SUBREG)
3952 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3954 imm1 = rs2;
3955 rs2 = NULL;
3957 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3958 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3960 /* We prohibit REG + REG for TFmode when there are no quad move insns
3961 and we consequently need to split. We do this because REG+REG
3962 is not an offsettable address. If we get the situation in reload
3963 where source and destination of a movtf pattern are both MEMs with
3964 REG+REG address, then only one of them gets converted to an
3965 offsettable address. */
3966 if (mode == TFmode
3967 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
3968 return 0;
3970 /* Likewise for TImode, but in all cases. */
3971 if (mode == TImode)
3972 return 0;
3974 /* We prohibit REG + REG on ARCH32 if not optimizing for
3975 DFmode/DImode because then mem_min_alignment is likely to be zero
3976 after reload and the forced split would lack a matching splitter
3977 pattern. */
3978 if (TARGET_ARCH32 && !optimize
3979 && (mode == DFmode || mode == DImode))
3980 return 0;
3982 else if (USE_AS_OFFSETABLE_LO10
3983 && GET_CODE (rs1) == LO_SUM
3984 && TARGET_ARCH64
3985 && ! TARGET_CM_MEDMID
3986 && RTX_OK_FOR_OLO10_P (rs2, mode))
3988 rs2 = NULL;
3989 imm1 = XEXP (rs1, 1);
3990 rs1 = XEXP (rs1, 0);
3991 if (!CONSTANT_P (imm1)
3992 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3993 return 0;
3996 else if (GET_CODE (addr) == LO_SUM)
3998 rs1 = XEXP (addr, 0);
3999 imm1 = XEXP (addr, 1);
4001 if (!CONSTANT_P (imm1)
4002 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4003 return 0;
4005 /* We can't allow TFmode in 32-bit mode, because an offset greater
4006 than the alignment (8) may cause the LO_SUM to overflow. */
4007 if (mode == TFmode && TARGET_ARCH32)
4008 return 0;
4010 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4011 return 1;
4012 else
4013 return 0;
4015 if (GET_CODE (rs1) == SUBREG)
4016 rs1 = SUBREG_REG (rs1);
4017 if (!REG_P (rs1))
4018 return 0;
4020 if (rs2)
4022 if (GET_CODE (rs2) == SUBREG)
4023 rs2 = SUBREG_REG (rs2);
4024 if (!REG_P (rs2))
4025 return 0;
4028 if (strict)
4030 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4031 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4032 return 0;
4034 else
4036 if ((! SPARC_INT_REG_P (REGNO (rs1))
4037 && REGNO (rs1) != FRAME_POINTER_REGNUM
4038 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4039 || (rs2
4040 && (! SPARC_INT_REG_P (REGNO (rs2))
4041 && REGNO (rs2) != FRAME_POINTER_REGNUM
4042 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4043 return 0;
4045 return 1;
4048 /* Return the SYMBOL_REF for the tls_get_addr function. */
4050 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4052 static rtx
4053 sparc_tls_get_addr (void)
4055 if (!sparc_tls_symbol)
4056 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4058 return sparc_tls_symbol;
4061 /* Return the Global Offset Table to be used in TLS mode. */
4063 static rtx
4064 sparc_tls_got (void)
4066 /* In PIC mode, this is just the PIC offset table. */
4067 if (flag_pic)
4069 crtl->uses_pic_offset_table = 1;
4070 return pic_offset_table_rtx;
4073 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4074 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4075 if (TARGET_SUN_TLS && TARGET_ARCH32)
4077 load_got_register ();
4078 return global_offset_table_rtx;
4081 /* In all other cases, we load a new pseudo with the GOT symbol. */
4082 return copy_to_reg (sparc_got ());
4085 /* Return true if X contains a thread-local symbol. */
4087 static bool
4088 sparc_tls_referenced_p (rtx x)
4090 if (!TARGET_HAVE_TLS)
4091 return false;
4093 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4094 x = XEXP (XEXP (x, 0), 0);
4096 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4097 return true;
4099 /* That's all we handle in sparc_legitimize_tls_address for now. */
4100 return false;
4103 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4104 this (thread-local) address. */
4106 static rtx
4107 sparc_legitimize_tls_address (rtx addr)
4109 rtx temp1, temp2, temp3, ret, o0, got, insn;
4111 gcc_assert (can_create_pseudo_p ());
4113 if (GET_CODE (addr) == SYMBOL_REF)
4114 switch (SYMBOL_REF_TLS_MODEL (addr))
4116 case TLS_MODEL_GLOBAL_DYNAMIC:
4117 start_sequence ();
4118 temp1 = gen_reg_rtx (SImode);
4119 temp2 = gen_reg_rtx (SImode);
4120 ret = gen_reg_rtx (Pmode);
4121 o0 = gen_rtx_REG (Pmode, 8);
4122 got = sparc_tls_got ();
4123 emit_insn (gen_tgd_hi22 (temp1, addr));
4124 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4125 if (TARGET_ARCH32)
4127 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4128 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4129 addr, const1_rtx));
4131 else
4133 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4134 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4135 addr, const1_rtx));
4137 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4138 insn = get_insns ();
4139 end_sequence ();
4140 emit_libcall_block (insn, ret, o0, addr);
4141 break;
4143 case TLS_MODEL_LOCAL_DYNAMIC:
4144 start_sequence ();
4145 temp1 = gen_reg_rtx (SImode);
4146 temp2 = gen_reg_rtx (SImode);
4147 temp3 = gen_reg_rtx (Pmode);
4148 ret = gen_reg_rtx (Pmode);
4149 o0 = gen_rtx_REG (Pmode, 8);
4150 got = sparc_tls_got ();
4151 emit_insn (gen_tldm_hi22 (temp1));
4152 emit_insn (gen_tldm_lo10 (temp2, temp1));
4153 if (TARGET_ARCH32)
4155 emit_insn (gen_tldm_add32 (o0, got, temp2));
4156 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4157 const1_rtx));
4159 else
4161 emit_insn (gen_tldm_add64 (o0, got, temp2));
4162 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4163 const1_rtx));
4165 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4166 insn = get_insns ();
4167 end_sequence ();
4168 emit_libcall_block (insn, temp3, o0,
4169 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4170 UNSPEC_TLSLD_BASE));
4171 temp1 = gen_reg_rtx (SImode);
4172 temp2 = gen_reg_rtx (SImode);
4173 emit_insn (gen_tldo_hix22 (temp1, addr));
4174 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4175 if (TARGET_ARCH32)
4176 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4177 else
4178 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4179 break;
4181 case TLS_MODEL_INITIAL_EXEC:
4182 temp1 = gen_reg_rtx (SImode);
4183 temp2 = gen_reg_rtx (SImode);
4184 temp3 = gen_reg_rtx (Pmode);
4185 got = sparc_tls_got ();
4186 emit_insn (gen_tie_hi22 (temp1, addr));
4187 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4188 if (TARGET_ARCH32)
4189 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4190 else
4191 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4192 if (TARGET_SUN_TLS)
4194 ret = gen_reg_rtx (Pmode);
4195 if (TARGET_ARCH32)
4196 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4197 temp3, addr));
4198 else
4199 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4200 temp3, addr));
4202 else
4203 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4204 break;
4206 case TLS_MODEL_LOCAL_EXEC:
4207 temp1 = gen_reg_rtx (Pmode);
4208 temp2 = gen_reg_rtx (Pmode);
4209 if (TARGET_ARCH32)
4211 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4212 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4214 else
4216 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4217 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4219 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4220 break;
4222 default:
4223 gcc_unreachable ();
4226 else if (GET_CODE (addr) == CONST)
4228 rtx base, offset;
4230 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4232 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4233 offset = XEXP (XEXP (addr, 0), 1);
4235 base = force_operand (base, NULL_RTX);
4236 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4237 offset = force_reg (Pmode, offset);
4238 ret = gen_rtx_PLUS (Pmode, base, offset);
4241 else
4242 gcc_unreachable (); /* for now ... */
4244 return ret;
4247 /* Legitimize PIC addresses. If the address is already position-independent,
4248 we return ORIG. Newly generated position-independent addresses go into a
4249 reg. This is REG if nonzero, otherwise we allocate register(s) as
4250 necessary. */
4252 static rtx
4253 sparc_legitimize_pic_address (rtx orig, rtx reg)
4255 bool gotdata_op = false;
4257 if (GET_CODE (orig) == SYMBOL_REF
4258 /* See the comment in sparc_expand_move. */
4259 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4261 rtx pic_ref, address;
4262 rtx insn;
4264 if (reg == 0)
4266 gcc_assert (can_create_pseudo_p ());
4267 reg = gen_reg_rtx (Pmode);
4270 if (flag_pic == 2)
4272 /* If not during reload, allocate another temp reg here for loading
4273 in the address, so that these instructions can be optimized
4274 properly. */
4275 rtx temp_reg = (! can_create_pseudo_p ()
4276 ? reg : gen_reg_rtx (Pmode));
4278 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4279 won't get confused into thinking that these two instructions
4280 are loading in the true address of the symbol. If in the
4281 future a PIC rtx exists, that should be used instead. */
4282 if (TARGET_ARCH64)
4284 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4285 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4287 else
4289 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4290 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4292 address = temp_reg;
4293 gotdata_op = true;
4295 else
4296 address = orig;
4298 crtl->uses_pic_offset_table = 1;
4299 if (gotdata_op)
4301 if (TARGET_ARCH64)
4302 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4303 pic_offset_table_rtx,
4304 address, orig));
4305 else
4306 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4307 pic_offset_table_rtx,
4308 address, orig));
4310 else
4312 pic_ref
4313 = gen_const_mem (Pmode,
4314 gen_rtx_PLUS (Pmode,
4315 pic_offset_table_rtx, address));
4316 insn = emit_move_insn (reg, pic_ref);
4319 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4320 by loop. */
4321 set_unique_reg_note (insn, REG_EQUAL, orig);
4322 return reg;
4324 else if (GET_CODE (orig) == CONST)
4326 rtx base, offset;
4328 if (GET_CODE (XEXP (orig, 0)) == PLUS
4329 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4330 return orig;
4332 if (reg == 0)
4334 gcc_assert (can_create_pseudo_p ());
4335 reg = gen_reg_rtx (Pmode);
4338 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4339 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4340 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4341 base == reg ? NULL_RTX : reg);
4343 if (GET_CODE (offset) == CONST_INT)
4345 if (SMALL_INT (offset))
4346 return plus_constant (Pmode, base, INTVAL (offset));
4347 else if (can_create_pseudo_p ())
4348 offset = force_reg (Pmode, offset);
4349 else
4350 /* If we reach here, then something is seriously wrong. */
4351 gcc_unreachable ();
4353 return gen_rtx_PLUS (Pmode, base, offset);
4355 else if (GET_CODE (orig) == LABEL_REF)
4356 /* ??? We ought to be checking that the register is live instead, in case
4357 it is eliminated. */
4358 crtl->uses_pic_offset_table = 1;
4360 return orig;
4363 /* Try machine-dependent ways of modifying an illegitimate address X
4364 to be legitimate. If we find one, return the new, valid address.
4366 OLDX is the address as it was before break_out_memory_refs was called.
4367 In some cases it is useful to look at this to decide what needs to be done.
4369 MODE is the mode of the operand pointed to by X.
4371 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4373 static rtx
4374 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4375 enum machine_mode mode)
4377 rtx orig_x = x;
4379 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4380 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4381 force_operand (XEXP (x, 0), NULL_RTX));
4382 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4383 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4384 force_operand (XEXP (x, 1), NULL_RTX));
4385 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4386 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4387 XEXP (x, 1));
4388 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4389 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4390 force_operand (XEXP (x, 1), NULL_RTX));
4392 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4393 return x;
4395 if (sparc_tls_referenced_p (x))
4396 x = sparc_legitimize_tls_address (x);
4397 else if (flag_pic)
4398 x = sparc_legitimize_pic_address (x, NULL_RTX);
4399 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4400 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4401 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4402 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4403 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4404 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4405 else if (GET_CODE (x) == SYMBOL_REF
4406 || GET_CODE (x) == CONST
4407 || GET_CODE (x) == LABEL_REF)
4408 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4410 return x;
4413 /* Delegitimize an address that was legitimized by the above function. */
4415 static rtx
4416 sparc_delegitimize_address (rtx x)
4418 x = delegitimize_mem_from_attrs (x);
4420 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4421 switch (XINT (XEXP (x, 1), 1))
4423 case UNSPEC_MOVE_PIC:
4424 case UNSPEC_TLSLE:
4425 x = XVECEXP (XEXP (x, 1), 0, 0);
4426 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4427 break;
4428 default:
4429 break;
4432 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4433 if (GET_CODE (x) == MINUS
4434 && REG_P (XEXP (x, 0))
4435 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4436 && GET_CODE (XEXP (x, 1)) == LO_SUM
4437 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4438 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4440 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4441 gcc_assert (GET_CODE (x) == LABEL_REF);
4444 return x;
4447 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4448 replace the input X, or the original X if no replacement is called for.
4449 The output parameter *WIN is 1 if the calling macro should goto WIN,
4450 0 if it should not.
4452 For SPARC, we wish to handle addresses by splitting them into
4453 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4454 This cuts the number of extra insns by one.
4456 Do nothing when generating PIC code and the address is a symbolic
4457 operand or requires a scratch register. */
4460 sparc_legitimize_reload_address (rtx x, enum machine_mode mode,
4461 int opnum, int type,
4462 int ind_levels ATTRIBUTE_UNUSED, int *win)
4464 /* Decompose SImode constants into HIGH+LO_SUM. */
4465 if (CONSTANT_P (x)
4466 && (mode != TFmode || TARGET_ARCH64)
4467 && GET_MODE (x) == SImode
4468 && GET_CODE (x) != LO_SUM
4469 && GET_CODE (x) != HIGH
4470 && sparc_cmodel <= CM_MEDLOW
4471 && !(flag_pic
4472 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4474 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4475 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4476 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4477 opnum, (enum reload_type)type);
4478 *win = 1;
4479 return x;
4482 /* We have to recognize what we have already generated above. */
4483 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4485 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4486 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4487 opnum, (enum reload_type)type);
4488 *win = 1;
4489 return x;
4492 *win = 0;
4493 return x;
4496 /* Return true if ADDR (a legitimate address expression)
4497 has an effect that depends on the machine mode it is used for.
4499 In PIC mode,
4501 (mem:HI [%l7+a])
4503 is not equivalent to
4505 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4507 because [%l7+a+1] is interpreted as the address of (a+1). */
4510 static bool
4511 sparc_mode_dependent_address_p (const_rtx addr,
4512 addr_space_t as ATTRIBUTE_UNUSED)
4514 if (flag_pic && GET_CODE (addr) == PLUS)
4516 rtx op0 = XEXP (addr, 0);
4517 rtx op1 = XEXP (addr, 1);
4518 if (op0 == pic_offset_table_rtx
4519 && symbolic_operand (op1, VOIDmode))
4520 return true;
4523 return false;
4526 #ifdef HAVE_GAS_HIDDEN
4527 # define USE_HIDDEN_LINKONCE 1
4528 #else
4529 # define USE_HIDDEN_LINKONCE 0
4530 #endif
4532 static void
4533 get_pc_thunk_name (char name[32], unsigned int regno)
4535 const char *reg_name = reg_names[regno];
4537 /* Skip the leading '%' as that cannot be used in a
4538 symbol name. */
4539 reg_name += 1;
4541 if (USE_HIDDEN_LINKONCE)
4542 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4543 else
4544 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4547 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4549 static rtx
4550 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4552 int orig_flag_pic = flag_pic;
4553 rtx insn;
4555 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4556 flag_pic = 0;
4557 if (TARGET_ARCH64)
4558 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4559 else
4560 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4561 flag_pic = orig_flag_pic;
4563 return insn;
4566 /* Emit code to load the GOT register. */
4568 void
4569 load_got_register (void)
4571 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4572 if (!global_offset_table_rtx)
4573 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4575 if (TARGET_VXWORKS_RTP)
4576 emit_insn (gen_vxworks_load_got ());
4577 else
4579 /* The GOT symbol is subject to a PC-relative relocation so we need a
4580 helper function to add the PC value and thus get the final value. */
4581 if (!got_helper_rtx)
4583 char name[32];
4584 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4585 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4588 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4589 got_helper_rtx,
4590 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4593 /* Need to emit this whether or not we obey regdecls,
4594 since setjmp/longjmp can cause life info to screw up.
4595 ??? In the case where we don't obey regdecls, this is not sufficient
4596 since we may not fall out the bottom. */
4597 emit_use (global_offset_table_rtx);
4600 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4601 address of the call target. */
4603 void
4604 sparc_emit_call_insn (rtx pat, rtx addr)
4606 rtx insn;
4608 insn = emit_call_insn (pat);
4610 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4611 if (TARGET_VXWORKS_RTP
4612 && flag_pic
4613 && GET_CODE (addr) == SYMBOL_REF
4614 && (SYMBOL_REF_DECL (addr)
4615 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4616 : !SYMBOL_REF_LOCAL_P (addr)))
4618 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4619 crtl->uses_pic_offset_table = 1;
4623 /* Return 1 if RTX is a MEM which is known to be aligned to at
4624 least a DESIRED byte boundary. */
4627 mem_min_alignment (rtx mem, int desired)
4629 rtx addr, base, offset;
4631 /* If it's not a MEM we can't accept it. */
4632 if (GET_CODE (mem) != MEM)
4633 return 0;
4635 /* Obviously... */
4636 if (!TARGET_UNALIGNED_DOUBLES
4637 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4638 return 1;
4640 /* ??? The rest of the function predates MEM_ALIGN so
4641 there is probably a bit of redundancy. */
4642 addr = XEXP (mem, 0);
4643 base = offset = NULL_RTX;
4644 if (GET_CODE (addr) == PLUS)
4646 if (GET_CODE (XEXP (addr, 0)) == REG)
4648 base = XEXP (addr, 0);
4650 /* What we are saying here is that if the base
4651 REG is aligned properly, the compiler will make
4652 sure any REG based index upon it will be so
4653 as well. */
4654 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4655 offset = XEXP (addr, 1);
4656 else
4657 offset = const0_rtx;
4660 else if (GET_CODE (addr) == REG)
4662 base = addr;
4663 offset = const0_rtx;
4666 if (base != NULL_RTX)
4668 int regno = REGNO (base);
4670 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4672 /* Check if the compiler has recorded some information
4673 about the alignment of the base REG. If reload has
4674 completed, we already matched with proper alignments.
4675 If not running global_alloc, reload might give us
4676 unaligned pointer to local stack though. */
4677 if (((cfun != 0
4678 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4679 || (optimize && reload_completed))
4680 && (INTVAL (offset) & (desired - 1)) == 0)
4681 return 1;
4683 else
4685 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4686 return 1;
4689 else if (! TARGET_UNALIGNED_DOUBLES
4690 || CONSTANT_P (addr)
4691 || GET_CODE (addr) == LO_SUM)
4693 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4694 is true, in which case we can only assume that an access is aligned if
4695 it is to a constant address, or the address involves a LO_SUM. */
4696 return 1;
4699 /* An obviously unaligned address. */
4700 return 0;
4704 /* Vectors to keep interesting information about registers where it can easily
4705 be got. We used to use the actual mode value as the bit number, but there
4706 are more than 32 modes now. Instead we use two tables: one indexed by
4707 hard register number, and one indexed by mode. */
4709 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4710 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4711 mapped into one sparc_mode_class mode. */
4713 enum sparc_mode_class {
4714 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4715 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4716 CC_MODE, CCFP_MODE
4719 /* Modes for single-word and smaller quantities. */
4720 #define S_MODES \
4721 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4723 /* Modes for double-word and smaller quantities. */
4724 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4726 /* Modes for quad-word and smaller quantities. */
4727 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4729 /* Modes for 8-word and smaller quantities. */
4730 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4732 /* Modes for single-float quantities. */
4733 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4735 /* Modes for double-float and smaller quantities. */
4736 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4738 /* Modes for quad-float and smaller quantities. */
4739 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4741 /* Modes for quad-float pairs and smaller quantities. */
4742 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4744 /* Modes for double-float only quantities. */
4745 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4747 /* Modes for quad-float and double-float only quantities. */
4748 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4750 /* Modes for quad-float pairs and double-float only quantities. */
4751 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4753 /* Modes for condition codes. */
4754 #define CC_MODES (1 << (int) CC_MODE)
4755 #define CCFP_MODES (1 << (int) CCFP_MODE)
4757 /* Value is 1 if register/mode pair is acceptable on sparc.
4758 The funny mixture of D and T modes is because integer operations
4759 do not specially operate on tetra quantities, so non-quad-aligned
4760 registers can hold quadword quantities (except %o4 and %i4 because
4761 they cross fixed registers). */
4763 /* This points to either the 32 bit or the 64 bit version. */
4764 const int *hard_regno_mode_classes;
4766 static const int hard_32bit_mode_classes[] = {
4767 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4768 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4769 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4770 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4772 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4773 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4774 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4775 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4777 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4778 and none can hold SFmode/SImode values. */
4779 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4780 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4781 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4782 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4784 /* %fcc[0123] */
4785 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4787 /* %icc, %sfp, %gsr */
4788 CC_MODES, 0, D_MODES
4791 static const int hard_64bit_mode_classes[] = {
4792 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4793 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4794 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4795 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4797 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4798 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4799 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4800 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4802 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4803 and none can hold SFmode/SImode values. */
4804 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4805 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4806 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4807 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4809 /* %fcc[0123] */
4810 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4812 /* %icc, %sfp, %gsr */
4813 CC_MODES, 0, D_MODES
4816 int sparc_mode_class [NUM_MACHINE_MODES];
4818 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4820 static void
4821 sparc_init_modes (void)
4823 int i;
4825 for (i = 0; i < NUM_MACHINE_MODES; i++)
4827 switch (GET_MODE_CLASS (i))
4829 case MODE_INT:
4830 case MODE_PARTIAL_INT:
4831 case MODE_COMPLEX_INT:
4832 if (GET_MODE_SIZE (i) < 4)
4833 sparc_mode_class[i] = 1 << (int) H_MODE;
4834 else if (GET_MODE_SIZE (i) == 4)
4835 sparc_mode_class[i] = 1 << (int) S_MODE;
4836 else if (GET_MODE_SIZE (i) == 8)
4837 sparc_mode_class[i] = 1 << (int) D_MODE;
4838 else if (GET_MODE_SIZE (i) == 16)
4839 sparc_mode_class[i] = 1 << (int) T_MODE;
4840 else if (GET_MODE_SIZE (i) == 32)
4841 sparc_mode_class[i] = 1 << (int) O_MODE;
4842 else
4843 sparc_mode_class[i] = 0;
4844 break;
4845 case MODE_VECTOR_INT:
4846 if (GET_MODE_SIZE (i) == 4)
4847 sparc_mode_class[i] = 1 << (int) SF_MODE;
4848 else if (GET_MODE_SIZE (i) == 8)
4849 sparc_mode_class[i] = 1 << (int) DF_MODE;
4850 else
4851 sparc_mode_class[i] = 0;
4852 break;
4853 case MODE_FLOAT:
4854 case MODE_COMPLEX_FLOAT:
4855 if (GET_MODE_SIZE (i) == 4)
4856 sparc_mode_class[i] = 1 << (int) SF_MODE;
4857 else if (GET_MODE_SIZE (i) == 8)
4858 sparc_mode_class[i] = 1 << (int) DF_MODE;
4859 else if (GET_MODE_SIZE (i) == 16)
4860 sparc_mode_class[i] = 1 << (int) TF_MODE;
4861 else if (GET_MODE_SIZE (i) == 32)
4862 sparc_mode_class[i] = 1 << (int) OF_MODE;
4863 else
4864 sparc_mode_class[i] = 0;
4865 break;
4866 case MODE_CC:
4867 if (i == (int) CCFPmode || i == (int) CCFPEmode)
4868 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4869 else
4870 sparc_mode_class[i] = 1 << (int) CC_MODE;
4871 break;
4872 default:
4873 sparc_mode_class[i] = 0;
4874 break;
4878 if (TARGET_ARCH64)
4879 hard_regno_mode_classes = hard_64bit_mode_classes;
4880 else
4881 hard_regno_mode_classes = hard_32bit_mode_classes;
4883 /* Initialize the array used by REGNO_REG_CLASS. */
4884 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4886 if (i < 16 && TARGET_V8PLUS)
4887 sparc_regno_reg_class[i] = I64_REGS;
4888 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4889 sparc_regno_reg_class[i] = GENERAL_REGS;
4890 else if (i < 64)
4891 sparc_regno_reg_class[i] = FP_REGS;
4892 else if (i < 96)
4893 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4894 else if (i < 100)
4895 sparc_regno_reg_class[i] = FPCC_REGS;
4896 else
4897 sparc_regno_reg_class[i] = NO_REGS;
4901 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4903 static inline bool
4904 save_global_or_fp_reg_p (unsigned int regno,
4905 int leaf_function ATTRIBUTE_UNUSED)
4907 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4910 /* Return whether the return address register (%i7) is needed. */
4912 static inline bool
4913 return_addr_reg_needed_p (int leaf_function)
4915 /* If it is live, for example because of __builtin_return_address (0). */
4916 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4917 return true;
4919 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4920 if (!leaf_function
4921 /* Loading the GOT register clobbers %o7. */
4922 || crtl->uses_pic_offset_table
4923 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4924 return true;
4926 return false;
4929 /* Return whether REGNO, a local or in register, must be saved/restored. */
4931 static bool
4932 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4934 /* General case: call-saved registers live at some point. */
4935 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4936 return true;
4938 /* Frame pointer register (%fp) if needed. */
4939 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4940 return true;
4942 /* Return address register (%i7) if needed. */
4943 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4944 return true;
4946 /* GOT register (%l7) if needed. */
4947 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
4948 return true;
4950 /* If the function accesses prior frames, the frame pointer and the return
4951 address of the previous frame must be saved on the stack. */
4952 if (crtl->accesses_prior_frames
4953 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
4954 return true;
4956 return false;
4959 /* Compute the frame size required by the function. This function is called
4960 during the reload pass and also by sparc_expand_prologue. */
4962 HOST_WIDE_INT
4963 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
4965 HOST_WIDE_INT frame_size, apparent_frame_size;
4966 int args_size, n_global_fp_regs = 0;
4967 bool save_local_in_regs_p = false;
4968 unsigned int i;
4970 /* If the function allocates dynamic stack space, the dynamic offset is
4971 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
4972 if (leaf_function && !cfun->calls_alloca)
4973 args_size = 0;
4974 else
4975 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
4977 /* Calculate space needed for global registers. */
4978 if (TARGET_ARCH64)
4979 for (i = 0; i < 8; i++)
4980 if (save_global_or_fp_reg_p (i, 0))
4981 n_global_fp_regs += 2;
4982 else
4983 for (i = 0; i < 8; i += 2)
4984 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4985 n_global_fp_regs += 2;
4987 /* In the flat window model, find out which local and in registers need to
4988 be saved. We don't reserve space in the current frame for them as they
4989 will be spilled into the register window save area of the caller's frame.
4990 However, as soon as we use this register window save area, we must create
4991 that of the current frame to make it the live one. */
4992 if (TARGET_FLAT)
4993 for (i = 16; i < 32; i++)
4994 if (save_local_or_in_reg_p (i, leaf_function))
4996 save_local_in_regs_p = true;
4997 break;
5000 /* Calculate space needed for FP registers. */
5001 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5002 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5003 n_global_fp_regs += 2;
5005 if (size == 0
5006 && n_global_fp_regs == 0
5007 && args_size == 0
5008 && !save_local_in_regs_p)
5009 frame_size = apparent_frame_size = 0;
5010 else
5012 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
5013 apparent_frame_size = (size - STARTING_FRAME_OFFSET + 7) & -8;
5014 apparent_frame_size += n_global_fp_regs * 4;
5016 /* We need to add the size of the outgoing argument area. */
5017 frame_size = apparent_frame_size + ((args_size + 7) & -8);
5019 /* And that of the register window save area. */
5020 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5022 /* Finally, bump to the appropriate alignment. */
5023 frame_size = SPARC_STACK_ALIGN (frame_size);
5026 /* Set up values for use in prologue and epilogue. */
5027 sparc_frame_size = frame_size;
5028 sparc_apparent_frame_size = apparent_frame_size;
5029 sparc_n_global_fp_regs = n_global_fp_regs;
5030 sparc_save_local_in_regs_p = save_local_in_regs_p;
5032 return frame_size;
5035 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5038 sparc_initial_elimination_offset (int to)
5040 int offset;
5042 if (to == STACK_POINTER_REGNUM)
5043 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5044 else
5045 offset = 0;
5047 offset += SPARC_STACK_BIAS;
5048 return offset;
5051 /* Output any necessary .register pseudo-ops. */
5053 void
5054 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5056 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5057 int i;
5059 if (TARGET_ARCH32)
5060 return;
5062 /* Check if %g[2367] were used without
5063 .register being printed for them already. */
5064 for (i = 2; i < 8; i++)
5066 if (df_regs_ever_live_p (i)
5067 && ! sparc_hard_reg_printed [i])
5069 sparc_hard_reg_printed [i] = 1;
5070 /* %g7 is used as TLS base register, use #ignore
5071 for it instead of #scratch. */
5072 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5073 i == 7 ? "ignore" : "scratch");
5075 if (i == 3) i = 5;
5077 #endif
5080 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5082 #if PROBE_INTERVAL > 4096
5083 #error Cannot use indexed addressing mode for stack probing
5084 #endif
5086 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5087 inclusive. These are offsets from the current stack pointer.
5089 Note that we don't use the REG+REG addressing mode for the probes because
5090 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5091 so the advantages of having a single code win here. */
5093 static void
5094 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5096 rtx g1 = gen_rtx_REG (Pmode, 1);
5098 /* See if we have a constant small number of probes to generate. If so,
5099 that's the easy case. */
5100 if (size <= PROBE_INTERVAL)
5102 emit_move_insn (g1, GEN_INT (first));
5103 emit_insn (gen_rtx_SET (VOIDmode, g1,
5104 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5105 emit_stack_probe (plus_constant (Pmode, g1, -size));
5108 /* The run-time loop is made up of 10 insns in the generic case while the
5109 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5110 else if (size <= 5 * PROBE_INTERVAL)
5112 HOST_WIDE_INT i;
5114 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5115 emit_insn (gen_rtx_SET (VOIDmode, g1,
5116 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5117 emit_stack_probe (g1);
5119 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5120 it exceeds SIZE. If only two probes are needed, this will not
5121 generate any code. Then probe at FIRST + SIZE. */
5122 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5124 emit_insn (gen_rtx_SET (VOIDmode, g1,
5125 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5126 emit_stack_probe (g1);
5129 emit_stack_probe (plus_constant (Pmode, g1,
5130 (i - PROBE_INTERVAL) - size));
5133 /* Otherwise, do the same as above, but in a loop. Note that we must be
5134 extra careful with variables wrapping around because we might be at
5135 the very top (or the very bottom) of the address space and we have
5136 to be able to handle this case properly; in particular, we use an
5137 equality test for the loop condition. */
5138 else
5140 HOST_WIDE_INT rounded_size;
5141 rtx g4 = gen_rtx_REG (Pmode, 4);
5143 emit_move_insn (g1, GEN_INT (first));
5146 /* Step 1: round SIZE to the previous multiple of the interval. */
5148 rounded_size = size & -PROBE_INTERVAL;
5149 emit_move_insn (g4, GEN_INT (rounded_size));
5152 /* Step 2: compute initial and final value of the loop counter. */
5154 /* TEST_ADDR = SP + FIRST. */
5155 emit_insn (gen_rtx_SET (VOIDmode, g1,
5156 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5158 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5159 emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4)));
5162 /* Step 3: the loop
5164 while (TEST_ADDR != LAST_ADDR)
5166 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5167 probe at TEST_ADDR
5170 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5171 until it is equal to ROUNDED_SIZE. */
5173 if (TARGET_ARCH64)
5174 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5175 else
5176 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5179 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5180 that SIZE is equal to ROUNDED_SIZE. */
5182 if (size != rounded_size)
5183 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5186 /* Make sure nothing is scheduled before we are done. */
5187 emit_insn (gen_blockage ());
5190 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5191 absolute addresses. */
5193 const char *
5194 output_probe_stack_range (rtx reg1, rtx reg2)
5196 static int labelno = 0;
5197 char loop_lab[32], end_lab[32];
5198 rtx xops[2];
5200 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
5201 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
5203 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5205 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
5206 xops[0] = reg1;
5207 xops[1] = reg2;
5208 output_asm_insn ("cmp\t%0, %1", xops);
5209 if (TARGET_ARCH64)
5210 fputs ("\tbe,pn\t%xcc,", asm_out_file);
5211 else
5212 fputs ("\tbe\t", asm_out_file);
5213 assemble_name_raw (asm_out_file, end_lab);
5214 fputc ('\n', asm_out_file);
5216 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5217 xops[1] = GEN_INT (-PROBE_INTERVAL);
5218 output_asm_insn (" add\t%0, %1, %0", xops);
5220 /* Probe at TEST_ADDR and branch. */
5221 if (TARGET_ARCH64)
5222 fputs ("\tba,pt\t%xcc,", asm_out_file);
5223 else
5224 fputs ("\tba\t", asm_out_file);
5225 assemble_name_raw (asm_out_file, loop_lab);
5226 fputc ('\n', asm_out_file);
5227 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5228 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5230 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
5232 return "";
5235 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5236 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5237 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5238 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5239 the action to be performed if it returns false. Return the new offset. */
5241 typedef bool (*sorr_pred_t) (unsigned int, int);
5242 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5244 static int
5245 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5246 int offset, int leaf_function, sorr_pred_t save_p,
5247 sorr_act_t action_true, sorr_act_t action_false)
5249 unsigned int i;
5250 rtx mem, insn;
5252 if (TARGET_ARCH64 && high <= 32)
5254 int fp_offset = -1;
5256 for (i = low; i < high; i++)
5258 if (save_p (i, leaf_function))
5260 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5261 base, offset));
5262 if (action_true == SORR_SAVE)
5264 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5265 RTX_FRAME_RELATED_P (insn) = 1;
5267 else /* action_true == SORR_RESTORE */
5269 /* The frame pointer must be restored last since its old
5270 value may be used as base address for the frame. This
5271 is problematic in 64-bit mode only because of the lack
5272 of double-word load instruction. */
5273 if (i == HARD_FRAME_POINTER_REGNUM)
5274 fp_offset = offset;
5275 else
5276 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5278 offset += 8;
5280 else if (action_false == SORR_ADVANCE)
5281 offset += 8;
5284 if (fp_offset >= 0)
5286 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5287 emit_move_insn (hard_frame_pointer_rtx, mem);
5290 else
5292 for (i = low; i < high; i += 2)
5294 bool reg0 = save_p (i, leaf_function);
5295 bool reg1 = save_p (i + 1, leaf_function);
5296 enum machine_mode mode;
5297 int regno;
5299 if (reg0 && reg1)
5301 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5302 regno = i;
5304 else if (reg0)
5306 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5307 regno = i;
5309 else if (reg1)
5311 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5312 regno = i + 1;
5313 offset += 4;
5315 else
5317 if (action_false == SORR_ADVANCE)
5318 offset += 8;
5319 continue;
5322 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5323 if (action_true == SORR_SAVE)
5325 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5326 RTX_FRAME_RELATED_P (insn) = 1;
5327 if (mode == DImode)
5329 rtx set1, set2;
5330 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5331 offset));
5332 set1 = gen_rtx_SET (VOIDmode, mem,
5333 gen_rtx_REG (SImode, regno));
5334 RTX_FRAME_RELATED_P (set1) = 1;
5336 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5337 offset + 4));
5338 set2 = gen_rtx_SET (VOIDmode, mem,
5339 gen_rtx_REG (SImode, regno + 1));
5340 RTX_FRAME_RELATED_P (set2) = 1;
5341 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5342 gen_rtx_PARALLEL (VOIDmode,
5343 gen_rtvec (2, set1, set2)));
5346 else /* action_true == SORR_RESTORE */
5347 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5349 /* Always preserve double-word alignment. */
5350 offset = (offset + 8) & -8;
5354 return offset;
5357 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5359 static rtx
5360 emit_adjust_base_to_offset (rtx base, int offset)
5362 /* ??? This might be optimized a little as %g1 might already have a
5363 value close enough that a single add insn will do. */
5364 /* ??? Although, all of this is probably only a temporary fix because
5365 if %g1 can hold a function result, then sparc_expand_epilogue will
5366 lose (the result will be clobbered). */
5367 rtx new_base = gen_rtx_REG (Pmode, 1);
5368 emit_move_insn (new_base, GEN_INT (offset));
5369 emit_insn (gen_rtx_SET (VOIDmode,
5370 new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5371 return new_base;
5374 /* Emit code to save/restore call-saved global and FP registers. */
5376 static void
5377 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5379 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5381 base = emit_adjust_base_to_offset (base, offset);
5382 offset = 0;
5385 offset
5386 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5387 save_global_or_fp_reg_p, action, SORR_NONE);
5388 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5389 save_global_or_fp_reg_p, action, SORR_NONE);
5392 /* Emit code to save/restore call-saved local and in registers. */
5394 static void
5395 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5397 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5399 base = emit_adjust_base_to_offset (base, offset);
5400 offset = 0;
5403 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5404 save_local_or_in_reg_p, action, SORR_ADVANCE);
5407 /* Emit a window_save insn. */
5409 static rtx
5410 emit_window_save (rtx increment)
5412 rtx insn = emit_insn (gen_window_save (increment));
5413 RTX_FRAME_RELATED_P (insn) = 1;
5415 /* The incoming return address (%o7) is saved in %i7. */
5416 add_reg_note (insn, REG_CFA_REGISTER,
5417 gen_rtx_SET (VOIDmode,
5418 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5419 gen_rtx_REG (Pmode,
5420 INCOMING_RETURN_ADDR_REGNUM)));
5422 /* The window save event. */
5423 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5425 /* The CFA is %fp, the hard frame pointer. */
5426 add_reg_note (insn, REG_CFA_DEF_CFA,
5427 plus_constant (Pmode, hard_frame_pointer_rtx,
5428 INCOMING_FRAME_SP_OFFSET));
5430 return insn;
5433 /* Generate an increment for the stack pointer. */
5435 static rtx
5436 gen_stack_pointer_inc (rtx increment)
5438 return gen_rtx_SET (VOIDmode,
5439 stack_pointer_rtx,
5440 gen_rtx_PLUS (Pmode,
5441 stack_pointer_rtx,
5442 increment));
5445 /* Expand the function prologue. The prologue is responsible for reserving
5446 storage for the frame, saving the call-saved registers and loading the
5447 GOT register if needed. */
5449 void
5450 sparc_expand_prologue (void)
5452 HOST_WIDE_INT size;
5453 rtx insn;
5455 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5456 on the final value of the flag means deferring the prologue/epilogue
5457 expansion until just before the second scheduling pass, which is too
5458 late to emit multiple epilogues or return insns.
5460 Of course we are making the assumption that the value of the flag
5461 will not change between now and its final value. Of the three parts
5462 of the formula, only the last one can reasonably vary. Let's take a
5463 closer look, after assuming that the first two ones are set to true
5464 (otherwise the last value is effectively silenced).
5466 If only_leaf_regs_used returns false, the global predicate will also
5467 be false so the actual frame size calculated below will be positive.
5468 As a consequence, the save_register_window insn will be emitted in
5469 the instruction stream; now this insn explicitly references %fp
5470 which is not a leaf register so only_leaf_regs_used will always
5471 return false subsequently.
5473 If only_leaf_regs_used returns true, we hope that the subsequent
5474 optimization passes won't cause non-leaf registers to pop up. For
5475 example, the regrename pass has special provisions to not rename to
5476 non-leaf registers in a leaf function. */
5477 sparc_leaf_function_p
5478 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5480 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5482 if (flag_stack_usage_info)
5483 current_function_static_stack_size = size;
5485 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5487 if (crtl->is_leaf && !cfun->calls_alloca)
5489 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5490 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5491 size - STACK_CHECK_PROTECT);
5493 else if (size > 0)
5494 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5497 if (size == 0)
5498 ; /* do nothing. */
5499 else if (sparc_leaf_function_p)
5501 rtx size_int_rtx = GEN_INT (-size);
5503 if (size <= 4096)
5504 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5505 else if (size <= 8192)
5507 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5508 RTX_FRAME_RELATED_P (insn) = 1;
5510 /* %sp is still the CFA register. */
5511 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5513 else
5515 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5516 emit_move_insn (size_rtx, size_int_rtx);
5517 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5518 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5519 gen_stack_pointer_inc (size_int_rtx));
5522 RTX_FRAME_RELATED_P (insn) = 1;
5524 else
5526 rtx size_int_rtx = GEN_INT (-size);
5528 if (size <= 4096)
5529 emit_window_save (size_int_rtx);
5530 else if (size <= 8192)
5532 emit_window_save (GEN_INT (-4096));
5534 /* %sp is not the CFA register anymore. */
5535 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5537 /* Make sure no %fp-based store is issued until after the frame is
5538 established. The offset between the frame pointer and the stack
5539 pointer is calculated relative to the value of the stack pointer
5540 at the end of the function prologue, and moving instructions that
5541 access the stack via the frame pointer between the instructions
5542 that decrement the stack pointer could result in accessing the
5543 register window save area, which is volatile. */
5544 emit_insn (gen_frame_blockage ());
5546 else
5548 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5549 emit_move_insn (size_rtx, size_int_rtx);
5550 emit_window_save (size_rtx);
5554 if (sparc_leaf_function_p)
5556 sparc_frame_base_reg = stack_pointer_rtx;
5557 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5559 else
5561 sparc_frame_base_reg = hard_frame_pointer_rtx;
5562 sparc_frame_base_offset = SPARC_STACK_BIAS;
5565 if (sparc_n_global_fp_regs > 0)
5566 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5567 sparc_frame_base_offset
5568 - sparc_apparent_frame_size,
5569 SORR_SAVE);
5571 /* Load the GOT register if needed. */
5572 if (crtl->uses_pic_offset_table)
5573 load_got_register ();
5575 /* Advertise that the data calculated just above are now valid. */
5576 sparc_prologue_data_valid_p = true;
5579 /* Expand the function prologue. The prologue is responsible for reserving
5580 storage for the frame, saving the call-saved registers and loading the
5581 GOT register if needed. */
5583 void
5584 sparc_flat_expand_prologue (void)
5586 HOST_WIDE_INT size;
5587 rtx insn;
5589 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5591 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5593 if (flag_stack_usage_info)
5594 current_function_static_stack_size = size;
5596 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5598 if (crtl->is_leaf && !cfun->calls_alloca)
5600 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5601 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5602 size - STACK_CHECK_PROTECT);
5604 else if (size > 0)
5605 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5608 if (sparc_save_local_in_regs_p)
5609 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5610 SORR_SAVE);
5612 if (size == 0)
5613 ; /* do nothing. */
5614 else
5616 rtx size_int_rtx, size_rtx;
5618 size_rtx = size_int_rtx = GEN_INT (-size);
5620 /* We establish the frame (i.e. decrement the stack pointer) first, even
5621 if we use a frame pointer, because we cannot clobber any call-saved
5622 registers, including the frame pointer, if we haven't created a new
5623 register save area, for the sake of compatibility with the ABI. */
5624 if (size <= 4096)
5625 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5626 else if (size <= 8192 && !frame_pointer_needed)
5628 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5629 RTX_FRAME_RELATED_P (insn) = 1;
5630 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5632 else
5634 size_rtx = gen_rtx_REG (Pmode, 1);
5635 emit_move_insn (size_rtx, size_int_rtx);
5636 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5637 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5638 gen_stack_pointer_inc (size_int_rtx));
5640 RTX_FRAME_RELATED_P (insn) = 1;
5642 /* Ensure nothing is scheduled until after the frame is established. */
5643 emit_insn (gen_blockage ());
5645 if (frame_pointer_needed)
5647 insn = emit_insn (gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5648 gen_rtx_MINUS (Pmode,
5649 stack_pointer_rtx,
5650 size_rtx)));
5651 RTX_FRAME_RELATED_P (insn) = 1;
5653 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5654 gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5655 plus_constant (Pmode, stack_pointer_rtx,
5656 size)));
5659 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5661 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5662 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5664 insn = emit_move_insn (i7, o7);
5665 RTX_FRAME_RELATED_P (insn) = 1;
5667 add_reg_note (insn, REG_CFA_REGISTER,
5668 gen_rtx_SET (VOIDmode, i7, o7));
5670 /* Prevent this instruction from ever being considered dead,
5671 even if this function has no epilogue. */
5672 emit_use (i7);
5676 if (frame_pointer_needed)
5678 sparc_frame_base_reg = hard_frame_pointer_rtx;
5679 sparc_frame_base_offset = SPARC_STACK_BIAS;
5681 else
5683 sparc_frame_base_reg = stack_pointer_rtx;
5684 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5687 if (sparc_n_global_fp_regs > 0)
5688 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5689 sparc_frame_base_offset
5690 - sparc_apparent_frame_size,
5691 SORR_SAVE);
5693 /* Load the GOT register if needed. */
5694 if (crtl->uses_pic_offset_table)
5695 load_got_register ();
5697 /* Advertise that the data calculated just above are now valid. */
5698 sparc_prologue_data_valid_p = true;
5701 /* This function generates the assembly code for function entry, which boils
5702 down to emitting the necessary .register directives. */
5704 static void
5705 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5707 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5708 if (!TARGET_FLAT)
5709 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5711 sparc_output_scratch_registers (file);
5714 /* Expand the function epilogue, either normal or part of a sibcall.
5715 We emit all the instructions except the return or the call. */
5717 void
5718 sparc_expand_epilogue (bool for_eh)
5720 HOST_WIDE_INT size = sparc_frame_size;
5722 if (sparc_n_global_fp_regs > 0)
5723 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5724 sparc_frame_base_offset
5725 - sparc_apparent_frame_size,
5726 SORR_RESTORE);
5728 if (size == 0 || for_eh)
5729 ; /* do nothing. */
5730 else if (sparc_leaf_function_p)
5732 if (size <= 4096)
5733 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5734 else if (size <= 8192)
5736 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5737 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5739 else
5741 rtx reg = gen_rtx_REG (Pmode, 1);
5742 emit_move_insn (reg, GEN_INT (size));
5743 emit_insn (gen_stack_pointer_inc (reg));
5748 /* Expand the function epilogue, either normal or part of a sibcall.
5749 We emit all the instructions except the return or the call. */
5751 void
5752 sparc_flat_expand_epilogue (bool for_eh)
5754 HOST_WIDE_INT size = sparc_frame_size;
5756 if (sparc_n_global_fp_regs > 0)
5757 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5758 sparc_frame_base_offset
5759 - sparc_apparent_frame_size,
5760 SORR_RESTORE);
5762 /* If we have a frame pointer, we'll need both to restore it before the
5763 frame is destroyed and use its current value in destroying the frame.
5764 Since we don't have an atomic way to do that in the flat window model,
5765 we save the current value into a temporary register (%g1). */
5766 if (frame_pointer_needed && !for_eh)
5767 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5769 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5770 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5771 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5773 if (sparc_save_local_in_regs_p)
5774 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5775 sparc_frame_base_offset,
5776 SORR_RESTORE);
5778 if (size == 0 || for_eh)
5779 ; /* do nothing. */
5780 else if (frame_pointer_needed)
5782 /* Make sure the frame is destroyed after everything else is done. */
5783 emit_insn (gen_blockage ());
5785 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5787 else
5789 /* Likewise. */
5790 emit_insn (gen_blockage ());
5792 if (size <= 4096)
5793 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5794 else if (size <= 8192)
5796 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5797 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5799 else
5801 rtx reg = gen_rtx_REG (Pmode, 1);
5802 emit_move_insn (reg, GEN_INT (size));
5803 emit_insn (gen_stack_pointer_inc (reg));
5808 /* Return true if it is appropriate to emit `return' instructions in the
5809 body of a function. */
5811 bool
5812 sparc_can_use_return_insn_p (void)
5814 return sparc_prologue_data_valid_p
5815 && sparc_n_global_fp_regs == 0
5816 && TARGET_FLAT
5817 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5818 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5821 /* This function generates the assembly code for function exit. */
5823 static void
5824 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5826 /* If the last two instructions of a function are "call foo; dslot;"
5827 the return address might point to the first instruction in the next
5828 function and we have to output a dummy nop for the sake of sane
5829 backtraces in such cases. This is pointless for sibling calls since
5830 the return address is explicitly adjusted. */
5832 rtx insn, last_real_insn;
5834 insn = get_last_insn ();
5836 last_real_insn = prev_real_insn (insn);
5837 if (last_real_insn
5838 && NONJUMP_INSN_P (last_real_insn)
5839 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5840 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5842 if (last_real_insn
5843 && CALL_P (last_real_insn)
5844 && !SIBLING_CALL_P (last_real_insn))
5845 fputs("\tnop\n", file);
5847 sparc_output_deferred_case_vectors ();
5850 /* Output a 'restore' instruction. */
5852 static void
5853 output_restore (rtx pat)
5855 rtx operands[3];
5857 if (! pat)
5859 fputs ("\t restore\n", asm_out_file);
5860 return;
5863 gcc_assert (GET_CODE (pat) == SET);
5865 operands[0] = SET_DEST (pat);
5866 pat = SET_SRC (pat);
5868 switch (GET_CODE (pat))
5870 case PLUS:
5871 operands[1] = XEXP (pat, 0);
5872 operands[2] = XEXP (pat, 1);
5873 output_asm_insn (" restore %r1, %2, %Y0", operands);
5874 break;
5875 case LO_SUM:
5876 operands[1] = XEXP (pat, 0);
5877 operands[2] = XEXP (pat, 1);
5878 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5879 break;
5880 case ASHIFT:
5881 operands[1] = XEXP (pat, 0);
5882 gcc_assert (XEXP (pat, 1) == const1_rtx);
5883 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5884 break;
5885 default:
5886 operands[1] = pat;
5887 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5888 break;
5892 /* Output a return. */
5894 const char *
5895 output_return (rtx insn)
5897 if (crtl->calls_eh_return)
5899 /* If the function uses __builtin_eh_return, the eh_return
5900 machinery occupies the delay slot. */
5901 gcc_assert (!final_sequence);
5903 if (flag_delayed_branch)
5905 if (!TARGET_FLAT && TARGET_V9)
5906 fputs ("\treturn\t%i7+8\n", asm_out_file);
5907 else
5909 if (!TARGET_FLAT)
5910 fputs ("\trestore\n", asm_out_file);
5912 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5915 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5917 else
5919 if (!TARGET_FLAT)
5920 fputs ("\trestore\n", asm_out_file);
5922 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5923 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5926 else if (sparc_leaf_function_p || TARGET_FLAT)
5928 /* This is a leaf or flat function so we don't have to bother restoring
5929 the register window, which frees us from dealing with the convoluted
5930 semantics of restore/return. We simply output the jump to the
5931 return address and the insn in the delay slot (if any). */
5933 return "jmp\t%%o7+%)%#";
5935 else
5937 /* This is a regular function so we have to restore the register window.
5938 We may have a pending insn for the delay slot, which will be either
5939 combined with the 'restore' instruction or put in the delay slot of
5940 the 'return' instruction. */
5942 if (final_sequence)
5944 rtx delay, pat;
5946 delay = NEXT_INSN (insn);
5947 gcc_assert (delay);
5949 pat = PATTERN (delay);
5951 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5953 epilogue_renumber (&pat, 0);
5954 return "return\t%%i7+%)%#";
5956 else
5958 output_asm_insn ("jmp\t%%i7+%)", NULL);
5959 output_restore (pat);
5960 PATTERN (delay) = gen_blockage ();
5961 INSN_CODE (delay) = -1;
5964 else
5966 /* The delay slot is empty. */
5967 if (TARGET_V9)
5968 return "return\t%%i7+%)\n\t nop";
5969 else if (flag_delayed_branch)
5970 return "jmp\t%%i7+%)\n\t restore";
5971 else
5972 return "restore\n\tjmp\t%%o7+%)\n\t nop";
5976 return "";
5979 /* Output a sibling call. */
5981 const char *
5982 output_sibcall (rtx insn, rtx call_operand)
5984 rtx operands[1];
5986 gcc_assert (flag_delayed_branch);
5988 operands[0] = call_operand;
5990 if (sparc_leaf_function_p || TARGET_FLAT)
5992 /* This is a leaf or flat function so we don't have to bother restoring
5993 the register window. We simply output the jump to the function and
5994 the insn in the delay slot (if any). */
5996 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
5998 if (final_sequence)
5999 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6000 operands);
6001 else
6002 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6003 it into branch if possible. */
6004 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6005 operands);
6007 else
6009 /* This is a regular function so we have to restore the register window.
6010 We may have a pending insn for the delay slot, which will be combined
6011 with the 'restore' instruction. */
6013 output_asm_insn ("call\t%a0, 0", operands);
6015 if (final_sequence)
6017 rtx delay = NEXT_INSN (insn);
6018 gcc_assert (delay);
6020 output_restore (PATTERN (delay));
6022 PATTERN (delay) = gen_blockage ();
6023 INSN_CODE (delay) = -1;
6025 else
6026 output_restore (NULL_RTX);
6029 return "";
6032 /* Functions for handling argument passing.
6034 For 32-bit, the first 6 args are normally in registers and the rest are
6035 pushed. Any arg that starts within the first 6 words is at least
6036 partially passed in a register unless its data type forbids.
6038 For 64-bit, the argument registers are laid out as an array of 16 elements
6039 and arguments are added sequentially. The first 6 int args and up to the
6040 first 16 fp args (depending on size) are passed in regs.
6042 Slot Stack Integral Float Float in structure Double Long Double
6043 ---- ----- -------- ----- ------------------ ------ -----------
6044 15 [SP+248] %f31 %f30,%f31 %d30
6045 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6046 13 [SP+232] %f27 %f26,%f27 %d26
6047 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6048 11 [SP+216] %f23 %f22,%f23 %d22
6049 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6050 9 [SP+200] %f19 %f18,%f19 %d18
6051 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6052 7 [SP+184] %f15 %f14,%f15 %d14
6053 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6054 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6055 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6056 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6057 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6058 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6059 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6061 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6063 Integral arguments are always passed as 64-bit quantities appropriately
6064 extended.
6066 Passing of floating point values is handled as follows.
6067 If a prototype is in scope:
6068 If the value is in a named argument (i.e. not a stdarg function or a
6069 value not part of the `...') then the value is passed in the appropriate
6070 fp reg.
6071 If the value is part of the `...' and is passed in one of the first 6
6072 slots then the value is passed in the appropriate int reg.
6073 If the value is part of the `...' and is not passed in one of the first 6
6074 slots then the value is passed in memory.
6075 If a prototype is not in scope:
6076 If the value is one of the first 6 arguments the value is passed in the
6077 appropriate integer reg and the appropriate fp reg.
6078 If the value is not one of the first 6 arguments the value is passed in
6079 the appropriate fp reg and in memory.
6082 Summary of the calling conventions implemented by GCC on the SPARC:
6084 32-bit ABI:
6085 size argument return value
6087 small integer <4 int. reg. int. reg.
6088 word 4 int. reg. int. reg.
6089 double word 8 int. reg. int. reg.
6091 _Complex small integer <8 int. reg. int. reg.
6092 _Complex word 8 int. reg. int. reg.
6093 _Complex double word 16 memory int. reg.
6095 vector integer <=8 int. reg. FP reg.
6096 vector integer >8 memory memory
6098 float 4 int. reg. FP reg.
6099 double 8 int. reg. FP reg.
6100 long double 16 memory memory
6102 _Complex float 8 memory FP reg.
6103 _Complex double 16 memory FP reg.
6104 _Complex long double 32 memory FP reg.
6106 vector float any memory memory
6108 aggregate any memory memory
6112 64-bit ABI:
6113 size argument return value
6115 small integer <8 int. reg. int. reg.
6116 word 8 int. reg. int. reg.
6117 double word 16 int. reg. int. reg.
6119 _Complex small integer <16 int. reg. int. reg.
6120 _Complex word 16 int. reg. int. reg.
6121 _Complex double word 32 memory int. reg.
6123 vector integer <=16 FP reg. FP reg.
6124 vector integer 16<s<=32 memory FP reg.
6125 vector integer >32 memory memory
6127 float 4 FP reg. FP reg.
6128 double 8 FP reg. FP reg.
6129 long double 16 FP reg. FP reg.
6131 _Complex float 8 FP reg. FP reg.
6132 _Complex double 16 FP reg. FP reg.
6133 _Complex long double 32 memory FP reg.
6135 vector float <=16 FP reg. FP reg.
6136 vector float 16<s<=32 memory FP reg.
6137 vector float >32 memory memory
6139 aggregate <=16 reg. reg.
6140 aggregate 16<s<=32 memory reg.
6141 aggregate >32 memory memory
6145 Note #1: complex floating-point types follow the extended SPARC ABIs as
6146 implemented by the Sun compiler.
6148 Note #2: integral vector types follow the scalar floating-point types
6149 conventions to match what is implemented by the Sun VIS SDK.
6151 Note #3: floating-point vector types follow the aggregate types
6152 conventions. */
6155 /* Maximum number of int regs for args. */
6156 #define SPARC_INT_ARG_MAX 6
6157 /* Maximum number of fp regs for args. */
6158 #define SPARC_FP_ARG_MAX 16
6160 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
6162 /* Handle the INIT_CUMULATIVE_ARGS macro.
6163 Initialize a variable CUM of type CUMULATIVE_ARGS
6164 for a call to a function whose data type is FNTYPE.
6165 For a library call, FNTYPE is 0. */
6167 void
6168 init_cumulative_args (struct sparc_args *cum, tree fntype,
6169 rtx libname ATTRIBUTE_UNUSED,
6170 tree fndecl ATTRIBUTE_UNUSED)
6172 cum->words = 0;
6173 cum->prototype_p = fntype && prototype_p (fntype);
6174 cum->libcall_p = fntype == 0;
6177 /* Handle promotion of pointer and integer arguments. */
6179 static enum machine_mode
6180 sparc_promote_function_mode (const_tree type,
6181 enum machine_mode mode,
6182 int *punsignedp,
6183 const_tree fntype ATTRIBUTE_UNUSED,
6184 int for_return ATTRIBUTE_UNUSED)
6186 if (type != NULL_TREE && POINTER_TYPE_P (type))
6188 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6189 return Pmode;
6192 /* Integral arguments are passed as full words, as per the ABI. */
6193 if (GET_MODE_CLASS (mode) == MODE_INT
6194 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6195 return word_mode;
6197 return mode;
6200 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6202 static bool
6203 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6205 return TARGET_ARCH64 ? true : false;
6208 /* Scan the record type TYPE and return the following predicates:
6209 - INTREGS_P: the record contains at least one field or sub-field
6210 that is eligible for promotion in integer registers.
6211 - FP_REGS_P: the record contains at least one field or sub-field
6212 that is eligible for promotion in floating-point registers.
6213 - PACKED_P: the record contains at least one field that is packed.
6215 Sub-fields are not taken into account for the PACKED_P predicate. */
6217 static void
6218 scan_record_type (const_tree type, int *intregs_p, int *fpregs_p,
6219 int *packed_p)
6221 tree field;
6223 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6225 if (TREE_CODE (field) == FIELD_DECL)
6227 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6228 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
6229 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6230 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6231 && TARGET_FPU)
6232 *fpregs_p = 1;
6233 else
6234 *intregs_p = 1;
6236 if (packed_p && DECL_PACKED (field))
6237 *packed_p = 1;
6242 /* Compute the slot number to pass an argument in.
6243 Return the slot number or -1 if passing on the stack.
6245 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6246 the preceding args and about the function being called.
6247 MODE is the argument's machine mode.
6248 TYPE is the data type of the argument (as a tree).
6249 This is null for libcalls where that information may
6250 not be available.
6251 NAMED is nonzero if this argument is a named parameter
6252 (otherwise it is an extra parameter matching an ellipsis).
6253 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6254 *PREGNO records the register number to use if scalar type.
6255 *PPADDING records the amount of padding needed in words. */
6257 static int
6258 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
6259 const_tree type, bool named, bool incoming_p,
6260 int *pregno, int *ppadding)
6262 int regbase = (incoming_p
6263 ? SPARC_INCOMING_INT_ARG_FIRST
6264 : SPARC_OUTGOING_INT_ARG_FIRST);
6265 int slotno = cum->words;
6266 enum mode_class mclass;
6267 int regno;
6269 *ppadding = 0;
6271 if (type && TREE_ADDRESSABLE (type))
6272 return -1;
6274 if (TARGET_ARCH32
6275 && mode == BLKmode
6276 && type
6277 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6278 return -1;
6280 /* For SPARC64, objects requiring 16-byte alignment get it. */
6281 if (TARGET_ARCH64
6282 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6283 && (slotno & 1) != 0)
6284 slotno++, *ppadding = 1;
6286 mclass = GET_MODE_CLASS (mode);
6287 if (type && TREE_CODE (type) == VECTOR_TYPE)
6289 /* Vector types deserve special treatment because they are
6290 polymorphic wrt their mode, depending upon whether VIS
6291 instructions are enabled. */
6292 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6294 /* The SPARC port defines no floating-point vector modes. */
6295 gcc_assert (mode == BLKmode);
6297 else
6299 /* Integral vector types should either have a vector
6300 mode or an integral mode, because we are guaranteed
6301 by pass_by_reference that their size is not greater
6302 than 16 bytes and TImode is 16-byte wide. */
6303 gcc_assert (mode != BLKmode);
6305 /* Vector integers are handled like floats according to
6306 the Sun VIS SDK. */
6307 mclass = MODE_FLOAT;
6311 switch (mclass)
6313 case MODE_FLOAT:
6314 case MODE_COMPLEX_FLOAT:
6315 case MODE_VECTOR_INT:
6316 if (TARGET_ARCH64 && TARGET_FPU && named)
6318 if (slotno >= SPARC_FP_ARG_MAX)
6319 return -1;
6320 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6321 /* Arguments filling only one single FP register are
6322 right-justified in the outer double FP register. */
6323 if (GET_MODE_SIZE (mode) <= 4)
6324 regno++;
6325 break;
6327 /* fallthrough */
6329 case MODE_INT:
6330 case MODE_COMPLEX_INT:
6331 if (slotno >= SPARC_INT_ARG_MAX)
6332 return -1;
6333 regno = regbase + slotno;
6334 break;
6336 case MODE_RANDOM:
6337 if (mode == VOIDmode)
6338 /* MODE is VOIDmode when generating the actual call. */
6339 return -1;
6341 gcc_assert (mode == BLKmode);
6343 if (TARGET_ARCH32
6344 || !type
6345 || (TREE_CODE (type) != VECTOR_TYPE
6346 && TREE_CODE (type) != RECORD_TYPE))
6348 if (slotno >= SPARC_INT_ARG_MAX)
6349 return -1;
6350 regno = regbase + slotno;
6352 else /* TARGET_ARCH64 && type */
6354 int intregs_p = 0, fpregs_p = 0, packed_p = 0;
6356 /* First see what kinds of registers we would need. */
6357 if (TREE_CODE (type) == VECTOR_TYPE)
6358 fpregs_p = 1;
6359 else
6360 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
6362 /* The ABI obviously doesn't specify how packed structures
6363 are passed. These are defined to be passed in int regs
6364 if possible, otherwise memory. */
6365 if (packed_p || !named)
6366 fpregs_p = 0, intregs_p = 1;
6368 /* If all arg slots are filled, then must pass on stack. */
6369 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
6370 return -1;
6372 /* If there are only int args and all int arg slots are filled,
6373 then must pass on stack. */
6374 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
6375 return -1;
6377 /* Note that even if all int arg slots are filled, fp members may
6378 still be passed in regs if such regs are available.
6379 *PREGNO isn't set because there may be more than one, it's up
6380 to the caller to compute them. */
6381 return slotno;
6383 break;
6385 default :
6386 gcc_unreachable ();
6389 *pregno = regno;
6390 return slotno;
6393 /* Handle recursive register counting for structure field layout. */
6395 struct function_arg_record_value_parms
6397 rtx ret; /* return expression being built. */
6398 int slotno; /* slot number of the argument. */
6399 int named; /* whether the argument is named. */
6400 int regbase; /* regno of the base register. */
6401 int stack; /* 1 if part of the argument is on the stack. */
6402 int intoffset; /* offset of the first pending integer field. */
6403 unsigned int nregs; /* number of words passed in registers. */
6406 static void function_arg_record_value_3
6407 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
6408 static void function_arg_record_value_2
6409 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6410 static void function_arg_record_value_1
6411 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6412 static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
6413 static rtx function_arg_union_value (int, enum machine_mode, int, int);
6415 /* A subroutine of function_arg_record_value. Traverse the structure
6416 recursively and determine how many registers will be required. */
6418 static void
6419 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
6420 struct function_arg_record_value_parms *parms,
6421 bool packed_p)
6423 tree field;
6425 /* We need to compute how many registers are needed so we can
6426 allocate the PARALLEL but before we can do that we need to know
6427 whether there are any packed fields. The ABI obviously doesn't
6428 specify how structures are passed in this case, so they are
6429 defined to be passed in int regs if possible, otherwise memory,
6430 regardless of whether there are fp values present. */
6432 if (! packed_p)
6433 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6435 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6437 packed_p = true;
6438 break;
6442 /* Compute how many registers we need. */
6443 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6445 if (TREE_CODE (field) == FIELD_DECL)
6447 HOST_WIDE_INT bitpos = startbitpos;
6449 if (DECL_SIZE (field) != 0)
6451 if (integer_zerop (DECL_SIZE (field)))
6452 continue;
6454 if (tree_fits_uhwi_p (bit_position (field)))
6455 bitpos += int_bit_position (field);
6458 /* ??? FIXME: else assume zero offset. */
6460 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6461 function_arg_record_value_1 (TREE_TYPE (field),
6462 bitpos,
6463 parms,
6464 packed_p);
6465 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6466 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6467 && TARGET_FPU
6468 && parms->named
6469 && ! packed_p)
6471 if (parms->intoffset != -1)
6473 unsigned int startbit, endbit;
6474 int intslots, this_slotno;
6476 startbit = parms->intoffset & -BITS_PER_WORD;
6477 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6479 intslots = (endbit - startbit) / BITS_PER_WORD;
6480 this_slotno = parms->slotno + parms->intoffset
6481 / BITS_PER_WORD;
6483 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6485 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6486 /* We need to pass this field on the stack. */
6487 parms->stack = 1;
6490 parms->nregs += intslots;
6491 parms->intoffset = -1;
6494 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
6495 If it wasn't true we wouldn't be here. */
6496 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6497 && DECL_MODE (field) == BLKmode)
6498 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6499 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6500 parms->nregs += 2;
6501 else
6502 parms->nregs += 1;
6504 else
6506 if (parms->intoffset == -1)
6507 parms->intoffset = bitpos;
6513 /* A subroutine of function_arg_record_value. Assign the bits of the
6514 structure between parms->intoffset and bitpos to integer registers. */
6516 static void
6517 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
6518 struct function_arg_record_value_parms *parms)
6520 enum machine_mode mode;
6521 unsigned int regno;
6522 unsigned int startbit, endbit;
6523 int this_slotno, intslots, intoffset;
6524 rtx reg;
6526 if (parms->intoffset == -1)
6527 return;
6529 intoffset = parms->intoffset;
6530 parms->intoffset = -1;
6532 startbit = intoffset & -BITS_PER_WORD;
6533 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6534 intslots = (endbit - startbit) / BITS_PER_WORD;
6535 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
6537 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
6538 if (intslots <= 0)
6539 return;
6541 /* If this is the trailing part of a word, only load that much into
6542 the register. Otherwise load the whole register. Note that in
6543 the latter case we may pick up unwanted bits. It's not a problem
6544 at the moment but may wish to revisit. */
6546 if (intoffset % BITS_PER_WORD != 0)
6547 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6548 MODE_INT);
6549 else
6550 mode = word_mode;
6552 intoffset /= BITS_PER_UNIT;
6555 regno = parms->regbase + this_slotno;
6556 reg = gen_rtx_REG (mode, regno);
6557 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6558 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6560 this_slotno += 1;
6561 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
6562 mode = word_mode;
6563 parms->nregs += 1;
6564 intslots -= 1;
6566 while (intslots > 0);
6569 /* A subroutine of function_arg_record_value. Traverse the structure
6570 recursively and assign bits to floating point registers. Track which
6571 bits in between need integer registers; invoke function_arg_record_value_3
6572 to make that happen. */
6574 static void
6575 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
6576 struct function_arg_record_value_parms *parms,
6577 bool packed_p)
6579 tree field;
6581 if (! packed_p)
6582 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6584 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6586 packed_p = true;
6587 break;
6591 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6593 if (TREE_CODE (field) == FIELD_DECL)
6595 HOST_WIDE_INT bitpos = startbitpos;
6597 if (DECL_SIZE (field) != 0)
6599 if (integer_zerop (DECL_SIZE (field)))
6600 continue;
6602 if (tree_fits_uhwi_p (bit_position (field)))
6603 bitpos += int_bit_position (field);
6606 /* ??? FIXME: else assume zero offset. */
6608 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6609 function_arg_record_value_2 (TREE_TYPE (field),
6610 bitpos,
6611 parms,
6612 packed_p);
6613 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6614 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6615 && TARGET_FPU
6616 && parms->named
6617 && ! packed_p)
6619 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
6620 int regno, nregs, pos;
6621 enum machine_mode mode = DECL_MODE (field);
6622 rtx reg;
6624 function_arg_record_value_3 (bitpos, parms);
6626 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6627 && mode == BLKmode)
6629 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6630 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6632 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6634 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6635 nregs = 2;
6637 else
6638 nregs = 1;
6640 regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6641 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6642 regno++;
6643 reg = gen_rtx_REG (mode, regno);
6644 pos = bitpos / BITS_PER_UNIT;
6645 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6646 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6647 parms->nregs += 1;
6648 while (--nregs > 0)
6650 regno += GET_MODE_SIZE (mode) / 4;
6651 reg = gen_rtx_REG (mode, regno);
6652 pos += GET_MODE_SIZE (mode);
6653 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6654 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6655 parms->nregs += 1;
6658 else
6660 if (parms->intoffset == -1)
6661 parms->intoffset = bitpos;
6667 /* Used by function_arg and sparc_function_value_1 to implement the complex
6668 conventions of the 64-bit ABI for passing and returning structures.
6669 Return an expression valid as a return value for the FUNCTION_ARG
6670 and TARGET_FUNCTION_VALUE.
6672 TYPE is the data type of the argument (as a tree).
6673 This is null for libcalls where that information may
6674 not be available.
6675 MODE is the argument's machine mode.
6676 SLOTNO is the index number of the argument's slot in the parameter array.
6677 NAMED is nonzero if this argument is a named parameter
6678 (otherwise it is an extra parameter matching an ellipsis).
6679 REGBASE is the regno of the base register for the parameter array. */
6681 static rtx
6682 function_arg_record_value (const_tree type, enum machine_mode mode,
6683 int slotno, int named, int regbase)
6685 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6686 struct function_arg_record_value_parms parms;
6687 unsigned int nregs;
6689 parms.ret = NULL_RTX;
6690 parms.slotno = slotno;
6691 parms.named = named;
6692 parms.regbase = regbase;
6693 parms.stack = 0;
6695 /* Compute how many registers we need. */
6696 parms.nregs = 0;
6697 parms.intoffset = 0;
6698 function_arg_record_value_1 (type, 0, &parms, false);
6700 /* Take into account pending integer fields. */
6701 if (parms.intoffset != -1)
6703 unsigned int startbit, endbit;
6704 int intslots, this_slotno;
6706 startbit = parms.intoffset & -BITS_PER_WORD;
6707 endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6708 intslots = (endbit - startbit) / BITS_PER_WORD;
6709 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
6711 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6713 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6714 /* We need to pass this field on the stack. */
6715 parms.stack = 1;
6718 parms.nregs += intslots;
6720 nregs = parms.nregs;
6722 /* Allocate the vector and handle some annoying special cases. */
6723 if (nregs == 0)
6725 /* ??? Empty structure has no value? Duh? */
6726 if (typesize <= 0)
6728 /* Though there's nothing really to store, return a word register
6729 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6730 leads to breakage due to the fact that there are zero bytes to
6731 load. */
6732 return gen_rtx_REG (mode, regbase);
6734 else
6736 /* ??? C++ has structures with no fields, and yet a size. Give up
6737 for now and pass everything back in integer registers. */
6738 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6740 if (nregs + slotno > SPARC_INT_ARG_MAX)
6741 nregs = SPARC_INT_ARG_MAX - slotno;
6743 gcc_assert (nregs != 0);
6745 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
6747 /* If at least one field must be passed on the stack, generate
6748 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6749 also be passed on the stack. We can't do much better because the
6750 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6751 of structures for which the fields passed exclusively in registers
6752 are not at the beginning of the structure. */
6753 if (parms.stack)
6754 XVECEXP (parms.ret, 0, 0)
6755 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6757 /* Fill in the entries. */
6758 parms.nregs = 0;
6759 parms.intoffset = 0;
6760 function_arg_record_value_2 (type, 0, &parms, false);
6761 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
6763 gcc_assert (parms.nregs == nregs);
6765 return parms.ret;
6768 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6769 of the 64-bit ABI for passing and returning unions.
6770 Return an expression valid as a return value for the FUNCTION_ARG
6771 and TARGET_FUNCTION_VALUE.
6773 SIZE is the size in bytes of the union.
6774 MODE is the argument's machine mode.
6775 REGNO is the hard register the union will be passed in. */
6777 static rtx
6778 function_arg_union_value (int size, enum machine_mode mode, int slotno,
6779 int regno)
6781 int nwords = ROUND_ADVANCE (size), i;
6782 rtx regs;
6784 /* See comment in previous function for empty structures. */
6785 if (nwords == 0)
6786 return gen_rtx_REG (mode, regno);
6788 if (slotno == SPARC_INT_ARG_MAX - 1)
6789 nwords = 1;
6791 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6793 for (i = 0; i < nwords; i++)
6795 /* Unions are passed left-justified. */
6796 XVECEXP (regs, 0, i)
6797 = gen_rtx_EXPR_LIST (VOIDmode,
6798 gen_rtx_REG (word_mode, regno),
6799 GEN_INT (UNITS_PER_WORD * i));
6800 regno++;
6803 return regs;
6806 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6807 for passing and returning BLKmode vectors.
6808 Return an expression valid as a return value for the FUNCTION_ARG
6809 and TARGET_FUNCTION_VALUE.
6811 SIZE is the size in bytes of the vector.
6812 REGNO is the FP hard register the vector will be passed in. */
6814 static rtx
6815 function_arg_vector_value (int size, int regno)
6817 const int nregs = MAX (1, size / 8);
6818 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6820 if (size < 8)
6821 XVECEXP (regs, 0, 0)
6822 = gen_rtx_EXPR_LIST (VOIDmode,
6823 gen_rtx_REG (SImode, regno),
6824 const0_rtx);
6825 else
6826 for (int i = 0; i < nregs; i++)
6827 XVECEXP (regs, 0, i)
6828 = gen_rtx_EXPR_LIST (VOIDmode,
6829 gen_rtx_REG (DImode, regno + 2*i),
6830 GEN_INT (i*8));
6832 return regs;
6835 /* Determine where to put an argument to a function.
6836 Value is zero to push the argument on the stack,
6837 or a hard register in which to store the argument.
6839 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6840 the preceding args and about the function being called.
6841 MODE is the argument's machine mode.
6842 TYPE is the data type of the argument (as a tree).
6843 This is null for libcalls where that information may
6844 not be available.
6845 NAMED is true if this argument is a named parameter
6846 (otherwise it is an extra parameter matching an ellipsis).
6847 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6848 TARGET_FUNCTION_INCOMING_ARG. */
6850 static rtx
6851 sparc_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
6852 const_tree type, bool named, bool incoming_p)
6854 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6856 int regbase = (incoming_p
6857 ? SPARC_INCOMING_INT_ARG_FIRST
6858 : SPARC_OUTGOING_INT_ARG_FIRST);
6859 int slotno, regno, padding;
6860 enum mode_class mclass = GET_MODE_CLASS (mode);
6862 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
6863 &regno, &padding);
6864 if (slotno == -1)
6865 return 0;
6867 /* Vector types deserve special treatment because they are polymorphic wrt
6868 their mode, depending upon whether VIS instructions are enabled. */
6869 if (type && TREE_CODE (type) == VECTOR_TYPE)
6871 HOST_WIDE_INT size = int_size_in_bytes (type);
6872 gcc_assert ((TARGET_ARCH32 && size <= 8)
6873 || (TARGET_ARCH64 && size <= 16));
6875 if (mode == BLKmode)
6876 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
6878 mclass = MODE_FLOAT;
6881 if (TARGET_ARCH32)
6882 return gen_rtx_REG (mode, regno);
6884 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6885 and are promoted to registers if possible. */
6886 if (type && TREE_CODE (type) == RECORD_TYPE)
6888 HOST_WIDE_INT size = int_size_in_bytes (type);
6889 gcc_assert (size <= 16);
6891 return function_arg_record_value (type, mode, slotno, named, regbase);
6894 /* Unions up to 16 bytes in size are passed in integer registers. */
6895 else if (type && TREE_CODE (type) == UNION_TYPE)
6897 HOST_WIDE_INT size = int_size_in_bytes (type);
6898 gcc_assert (size <= 16);
6900 return function_arg_union_value (size, mode, slotno, regno);
6903 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6904 but also have the slot allocated for them.
6905 If no prototype is in scope fp values in register slots get passed
6906 in two places, either fp regs and int regs or fp regs and memory. */
6907 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6908 && SPARC_FP_REG_P (regno))
6910 rtx reg = gen_rtx_REG (mode, regno);
6911 if (cum->prototype_p || cum->libcall_p)
6913 /* "* 2" because fp reg numbers are recorded in 4 byte
6914 quantities. */
6915 #if 0
6916 /* ??? This will cause the value to be passed in the fp reg and
6917 in the stack. When a prototype exists we want to pass the
6918 value in the reg but reserve space on the stack. That's an
6919 optimization, and is deferred [for a bit]. */
6920 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
6921 return gen_rtx_PARALLEL (mode,
6922 gen_rtvec (2,
6923 gen_rtx_EXPR_LIST (VOIDmode,
6924 NULL_RTX, const0_rtx),
6925 gen_rtx_EXPR_LIST (VOIDmode,
6926 reg, const0_rtx)));
6927 else
6928 #else
6929 /* ??? It seems that passing back a register even when past
6930 the area declared by REG_PARM_STACK_SPACE will allocate
6931 space appropriately, and will not copy the data onto the
6932 stack, exactly as we desire.
6934 This is due to locate_and_pad_parm being called in
6935 expand_call whenever reg_parm_stack_space > 0, which
6936 while beneficial to our example here, would seem to be
6937 in error from what had been intended. Ho hum... -- r~ */
6938 #endif
6939 return reg;
6941 else
6943 rtx v0, v1;
6945 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6947 int intreg;
6949 /* On incoming, we don't need to know that the value
6950 is passed in %f0 and %i0, and it confuses other parts
6951 causing needless spillage even on the simplest cases. */
6952 if (incoming_p)
6953 return reg;
6955 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6956 + (regno - SPARC_FP_ARG_FIRST) / 2);
6958 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6959 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6960 const0_rtx);
6961 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6963 else
6965 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6966 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6967 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6972 /* All other aggregate types are passed in an integer register in a mode
6973 corresponding to the size of the type. */
6974 else if (type && AGGREGATE_TYPE_P (type))
6976 HOST_WIDE_INT size = int_size_in_bytes (type);
6977 gcc_assert (size <= 16);
6979 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6982 return gen_rtx_REG (mode, regno);
6985 /* Handle the TARGET_FUNCTION_ARG target hook. */
6987 static rtx
6988 sparc_function_arg (cumulative_args_t cum, enum machine_mode mode,
6989 const_tree type, bool named)
6991 return sparc_function_arg_1 (cum, mode, type, named, false);
6994 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6996 static rtx
6997 sparc_function_incoming_arg (cumulative_args_t cum, enum machine_mode mode,
6998 const_tree type, bool named)
7000 return sparc_function_arg_1 (cum, mode, type, named, true);
7003 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7005 static unsigned int
7006 sparc_function_arg_boundary (enum machine_mode mode, const_tree type)
7008 return ((TARGET_ARCH64
7009 && (GET_MODE_ALIGNMENT (mode) == 128
7010 || (type && TYPE_ALIGN (type) == 128)))
7011 ? 128
7012 : PARM_BOUNDARY);
7015 /* For an arg passed partly in registers and partly in memory,
7016 this is the number of bytes of registers used.
7017 For args passed entirely in registers or entirely in memory, zero.
7019 Any arg that starts in the first 6 regs but won't entirely fit in them
7020 needs partial registers on v8. On v9, structures with integer
7021 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7022 values that begin in the last fp reg [where "last fp reg" varies with the
7023 mode] will be split between that reg and memory. */
7025 static int
7026 sparc_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode,
7027 tree type, bool named)
7029 int slotno, regno, padding;
7031 /* We pass false for incoming_p here, it doesn't matter. */
7032 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7033 false, &regno, &padding);
7035 if (slotno == -1)
7036 return 0;
7038 if (TARGET_ARCH32)
7040 if ((slotno + (mode == BLKmode
7041 ? ROUND_ADVANCE (int_size_in_bytes (type))
7042 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
7043 > SPARC_INT_ARG_MAX)
7044 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7046 else
7048 /* We are guaranteed by pass_by_reference that the size of the
7049 argument is not greater than 16 bytes, so we only need to return
7050 one word if the argument is partially passed in registers. */
7052 if (type && AGGREGATE_TYPE_P (type))
7054 int size = int_size_in_bytes (type);
7056 if (size > UNITS_PER_WORD
7057 && slotno == SPARC_INT_ARG_MAX - 1)
7058 return UNITS_PER_WORD;
7060 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7061 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7062 && ! (TARGET_FPU && named)))
7064 /* The complex types are passed as packed types. */
7065 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7066 && slotno == SPARC_INT_ARG_MAX - 1)
7067 return UNITS_PER_WORD;
7069 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7071 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7072 > SPARC_FP_ARG_MAX)
7073 return UNITS_PER_WORD;
7077 return 0;
7080 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7081 Specify whether to pass the argument by reference. */
7083 static bool
7084 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7085 enum machine_mode mode, const_tree type,
7086 bool named ATTRIBUTE_UNUSED)
7088 if (TARGET_ARCH32)
7089 /* Original SPARC 32-bit ABI says that structures and unions,
7090 and quad-precision floats are passed by reference. For Pascal,
7091 also pass arrays by reference. All other base types are passed
7092 in registers.
7094 Extended ABI (as implemented by the Sun compiler) says that all
7095 complex floats are passed by reference. Pass complex integers
7096 in registers up to 8 bytes. More generally, enforce the 2-word
7097 cap for passing arguments in registers.
7099 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7100 integers are passed like floats of the same size, that is in
7101 registers up to 8 bytes. Pass all vector floats by reference
7102 like structure and unions. */
7103 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7104 || mode == SCmode
7105 /* Catch CDImode, TFmode, DCmode and TCmode. */
7106 || GET_MODE_SIZE (mode) > 8
7107 || (type
7108 && TREE_CODE (type) == VECTOR_TYPE
7109 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7110 else
7111 /* Original SPARC 64-bit ABI says that structures and unions
7112 smaller than 16 bytes are passed in registers, as well as
7113 all other base types.
7115 Extended ABI (as implemented by the Sun compiler) says that
7116 complex floats are passed in registers up to 16 bytes. Pass
7117 all complex integers in registers up to 16 bytes. More generally,
7118 enforce the 2-word cap for passing arguments in registers.
7120 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7121 integers are passed like floats of the same size, that is in
7122 registers (up to 16 bytes). Pass all vector floats like structure
7123 and unions. */
7124 return ((type
7125 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7126 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7127 /* Catch CTImode and TCmode. */
7128 || GET_MODE_SIZE (mode) > 16);
7131 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7132 Update the data in CUM to advance over an argument
7133 of mode MODE and data type TYPE.
7134 TYPE is null for libcalls where that information may not be available. */
7136 static void
7137 sparc_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7138 const_tree type, bool named)
7140 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7141 int regno, padding;
7143 /* We pass false for incoming_p here, it doesn't matter. */
7144 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7146 /* If argument requires leading padding, add it. */
7147 cum->words += padding;
7149 if (TARGET_ARCH32)
7151 cum->words += (mode != BLKmode
7152 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7153 : ROUND_ADVANCE (int_size_in_bytes (type)));
7155 else
7157 if (type && AGGREGATE_TYPE_P (type))
7159 int size = int_size_in_bytes (type);
7161 if (size <= 8)
7162 ++cum->words;
7163 else if (size <= 16)
7164 cum->words += 2;
7165 else /* passed by reference */
7166 ++cum->words;
7168 else
7170 cum->words += (mode != BLKmode
7171 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7172 : ROUND_ADVANCE (int_size_in_bytes (type)));
7177 /* Handle the FUNCTION_ARG_PADDING macro.
7178 For the 64 bit ABI structs are always stored left shifted in their
7179 argument slot. */
7181 enum direction
7182 function_arg_padding (enum machine_mode mode, const_tree type)
7184 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
7185 return upward;
7187 /* Fall back to the default. */
7188 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7191 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7192 Specify whether to return the return value in memory. */
7194 static bool
7195 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7197 if (TARGET_ARCH32)
7198 /* Original SPARC 32-bit ABI says that structures and unions,
7199 and quad-precision floats are returned in memory. All other
7200 base types are returned in registers.
7202 Extended ABI (as implemented by the Sun compiler) says that
7203 all complex floats are returned in registers (8 FP registers
7204 at most for '_Complex long double'). Return all complex integers
7205 in registers (4 at most for '_Complex long long').
7207 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7208 integers are returned like floats of the same size, that is in
7209 registers up to 8 bytes and in memory otherwise. Return all
7210 vector floats in memory like structure and unions; note that
7211 they always have BLKmode like the latter. */
7212 return (TYPE_MODE (type) == BLKmode
7213 || TYPE_MODE (type) == TFmode
7214 || (TREE_CODE (type) == VECTOR_TYPE
7215 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7216 else
7217 /* Original SPARC 64-bit ABI says that structures and unions
7218 smaller than 32 bytes are returned in registers, as well as
7219 all other base types.
7221 Extended ABI (as implemented by the Sun compiler) says that all
7222 complex floats are returned in registers (8 FP registers at most
7223 for '_Complex long double'). Return all complex integers in
7224 registers (4 at most for '_Complex TItype').
7226 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7227 integers are returned like floats of the same size, that is in
7228 registers. Return all vector floats like structure and unions;
7229 note that they always have BLKmode like the latter. */
7230 return (TYPE_MODE (type) == BLKmode
7231 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7234 /* Handle the TARGET_STRUCT_VALUE target hook.
7235 Return where to find the structure return value address. */
7237 static rtx
7238 sparc_struct_value_rtx (tree fndecl, int incoming)
7240 if (TARGET_ARCH64)
7241 return 0;
7242 else
7244 rtx mem;
7246 if (incoming)
7247 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7248 STRUCT_VALUE_OFFSET));
7249 else
7250 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7251 STRUCT_VALUE_OFFSET));
7253 /* Only follow the SPARC ABI for fixed-size structure returns.
7254 Variable size structure returns are handled per the normal
7255 procedures in GCC. This is enabled by -mstd-struct-return */
7256 if (incoming == 2
7257 && sparc_std_struct_return
7258 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7259 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7261 /* We must check and adjust the return address, as it is
7262 optional as to whether the return object is really
7263 provided. */
7264 rtx ret_reg = gen_rtx_REG (Pmode, 31);
7265 rtx scratch = gen_reg_rtx (SImode);
7266 rtx endlab = gen_label_rtx ();
7268 /* Calculate the return object size */
7269 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7270 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7271 /* Construct a temporary return value */
7272 rtx temp_val
7273 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7275 /* Implement SPARC 32-bit psABI callee return struct checking:
7277 Fetch the instruction where we will return to and see if
7278 it's an unimp instruction (the most significant 10 bits
7279 will be zero). */
7280 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7281 plus_constant (Pmode,
7282 ret_reg, 8)));
7283 /* Assume the size is valid and pre-adjust */
7284 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7285 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7286 0, endlab);
7287 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7288 /* Write the address of the memory pointed to by temp_val into
7289 the memory pointed to by mem */
7290 emit_move_insn (mem, XEXP (temp_val, 0));
7291 emit_label (endlab);
7294 return mem;
7298 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7299 For v9, function return values are subject to the same rules as arguments,
7300 except that up to 32 bytes may be returned in registers. */
7302 static rtx
7303 sparc_function_value_1 (const_tree type, enum machine_mode mode,
7304 bool outgoing)
7306 /* Beware that the two values are swapped here wrt function_arg. */
7307 int regbase = (outgoing
7308 ? SPARC_INCOMING_INT_ARG_FIRST
7309 : SPARC_OUTGOING_INT_ARG_FIRST);
7310 enum mode_class mclass = GET_MODE_CLASS (mode);
7311 int regno;
7313 /* Vector types deserve special treatment because they are polymorphic wrt
7314 their mode, depending upon whether VIS instructions are enabled. */
7315 if (type && TREE_CODE (type) == VECTOR_TYPE)
7317 HOST_WIDE_INT size = int_size_in_bytes (type);
7318 gcc_assert ((TARGET_ARCH32 && size <= 8)
7319 || (TARGET_ARCH64 && size <= 32));
7321 if (mode == BLKmode)
7322 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7324 mclass = MODE_FLOAT;
7327 if (TARGET_ARCH64 && type)
7329 /* Structures up to 32 bytes in size are returned in registers. */
7330 if (TREE_CODE (type) == RECORD_TYPE)
7332 HOST_WIDE_INT size = int_size_in_bytes (type);
7333 gcc_assert (size <= 32);
7335 return function_arg_record_value (type, mode, 0, 1, regbase);
7338 /* Unions up to 32 bytes in size are returned in integer registers. */
7339 else if (TREE_CODE (type) == UNION_TYPE)
7341 HOST_WIDE_INT size = int_size_in_bytes (type);
7342 gcc_assert (size <= 32);
7344 return function_arg_union_value (size, mode, 0, regbase);
7347 /* Objects that require it are returned in FP registers. */
7348 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7351 /* All other aggregate types are returned in an integer register in a
7352 mode corresponding to the size of the type. */
7353 else if (AGGREGATE_TYPE_P (type))
7355 /* All other aggregate types are passed in an integer register
7356 in a mode corresponding to the size of the type. */
7357 HOST_WIDE_INT size = int_size_in_bytes (type);
7358 gcc_assert (size <= 32);
7360 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7362 /* ??? We probably should have made the same ABI change in
7363 3.4.0 as the one we made for unions. The latter was
7364 required by the SCD though, while the former is not
7365 specified, so we favored compatibility and efficiency.
7367 Now we're stuck for aggregates larger than 16 bytes,
7368 because OImode vanished in the meantime. Let's not
7369 try to be unduly clever, and simply follow the ABI
7370 for unions in that case. */
7371 if (mode == BLKmode)
7372 return function_arg_union_value (size, mode, 0, regbase);
7373 else
7374 mclass = MODE_INT;
7377 /* We should only have pointer and integer types at this point. This
7378 must match sparc_promote_function_mode. */
7379 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7380 mode = word_mode;
7383 /* We should only have pointer and integer types at this point. This must
7384 match sparc_promote_function_mode. */
7385 else if (TARGET_ARCH32
7386 && mclass == MODE_INT
7387 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7388 mode = word_mode;
7390 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7391 regno = SPARC_FP_ARG_FIRST;
7392 else
7393 regno = regbase;
7395 return gen_rtx_REG (mode, regno);
7398 /* Handle TARGET_FUNCTION_VALUE.
7399 On the SPARC, the value is found in the first "output" register, but the
7400 called function leaves it in the first "input" register. */
7402 static rtx
7403 sparc_function_value (const_tree valtype,
7404 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7405 bool outgoing)
7407 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7410 /* Handle TARGET_LIBCALL_VALUE. */
7412 static rtx
7413 sparc_libcall_value (enum machine_mode mode,
7414 const_rtx fun ATTRIBUTE_UNUSED)
7416 return sparc_function_value_1 (NULL_TREE, mode, false);
7419 /* Handle FUNCTION_VALUE_REGNO_P.
7420 On the SPARC, the first "output" reg is used for integer values, and the
7421 first floating point register is used for floating point values. */
7423 static bool
7424 sparc_function_value_regno_p (const unsigned int regno)
7426 return (regno == 8 || regno == 32);
7429 /* Do what is necessary for `va_start'. We look at the current function
7430 to determine if stdarg or varargs is used and return the address of
7431 the first unnamed parameter. */
7433 static rtx
7434 sparc_builtin_saveregs (void)
7436 int first_reg = crtl->args.info.words;
7437 rtx address;
7438 int regno;
7440 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7441 emit_move_insn (gen_rtx_MEM (word_mode,
7442 gen_rtx_PLUS (Pmode,
7443 frame_pointer_rtx,
7444 GEN_INT (FIRST_PARM_OFFSET (0)
7445 + (UNITS_PER_WORD
7446 * regno)))),
7447 gen_rtx_REG (word_mode,
7448 SPARC_INCOMING_INT_ARG_FIRST + regno));
7450 address = gen_rtx_PLUS (Pmode,
7451 frame_pointer_rtx,
7452 GEN_INT (FIRST_PARM_OFFSET (0)
7453 + UNITS_PER_WORD * first_reg));
7455 return address;
7458 /* Implement `va_start' for stdarg. */
7460 static void
7461 sparc_va_start (tree valist, rtx nextarg)
7463 nextarg = expand_builtin_saveregs ();
7464 std_expand_builtin_va_start (valist, nextarg);
7467 /* Implement `va_arg' for stdarg. */
7469 static tree
7470 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7471 gimple_seq *post_p)
7473 HOST_WIDE_INT size, rsize, align;
7474 tree addr, incr;
7475 bool indirect;
7476 tree ptrtype = build_pointer_type (type);
7478 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7480 indirect = true;
7481 size = rsize = UNITS_PER_WORD;
7482 align = 0;
7484 else
7486 indirect = false;
7487 size = int_size_in_bytes (type);
7488 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7489 align = 0;
7491 if (TARGET_ARCH64)
7493 /* For SPARC64, objects requiring 16-byte alignment get it. */
7494 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7495 align = 2 * UNITS_PER_WORD;
7497 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7498 are left-justified in their slots. */
7499 if (AGGREGATE_TYPE_P (type))
7501 if (size == 0)
7502 size = rsize = UNITS_PER_WORD;
7503 else
7504 size = rsize;
7509 incr = valist;
7510 if (align)
7512 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7513 incr = fold_convert (sizetype, incr);
7514 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7515 size_int (-align));
7516 incr = fold_convert (ptr_type_node, incr);
7519 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7520 addr = incr;
7522 if (BYTES_BIG_ENDIAN && size < rsize)
7523 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7525 if (indirect)
7527 addr = fold_convert (build_pointer_type (ptrtype), addr);
7528 addr = build_va_arg_indirect_ref (addr);
7531 /* If the address isn't aligned properly for the type, we need a temporary.
7532 FIXME: This is inefficient, usually we can do this in registers. */
7533 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7535 tree tmp = create_tmp_var (type, "va_arg_tmp");
7536 tree dest_addr = build_fold_addr_expr (tmp);
7537 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7538 3, dest_addr, addr, size_int (rsize));
7539 TREE_ADDRESSABLE (tmp) = 1;
7540 gimplify_and_add (copy, pre_p);
7541 addr = dest_addr;
7544 else
7545 addr = fold_convert (ptrtype, addr);
7547 incr = fold_build_pointer_plus_hwi (incr, rsize);
7548 gimplify_assign (valist, incr, post_p);
7550 return build_va_arg_indirect_ref (addr);
7553 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7554 Specify whether the vector mode is supported by the hardware. */
7556 static bool
7557 sparc_vector_mode_supported_p (enum machine_mode mode)
7559 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7562 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7564 static enum machine_mode
7565 sparc_preferred_simd_mode (enum machine_mode mode)
7567 if (TARGET_VIS)
7568 switch (mode)
7570 case SImode:
7571 return V2SImode;
7572 case HImode:
7573 return V4HImode;
7574 case QImode:
7575 return V8QImode;
7577 default:;
7580 return word_mode;
7583 /* Return the string to output an unconditional branch to LABEL, which is
7584 the operand number of the label.
7586 DEST is the destination insn (i.e. the label), INSN is the source. */
7588 const char *
7589 output_ubranch (rtx dest, rtx insn)
7591 static char string[64];
7592 bool v9_form = false;
7593 int delta;
7594 char *p;
7596 /* Even if we are trying to use cbcond for this, evaluate
7597 whether we can use V9 branches as our backup plan. */
7599 delta = 5000000;
7600 if (INSN_ADDRESSES_SET_P ())
7601 delta = (INSN_ADDRESSES (INSN_UID (dest))
7602 - INSN_ADDRESSES (INSN_UID (insn)));
7604 /* Leave some instructions for "slop". */
7605 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7606 v9_form = true;
7608 if (TARGET_CBCOND)
7610 bool emit_nop = emit_cbcond_nop (insn);
7611 bool far = false;
7612 const char *rval;
7614 if (delta < -500 || delta > 500)
7615 far = true;
7617 if (far)
7619 if (v9_form)
7620 rval = "ba,a,pt\t%%xcc, %l0";
7621 else
7622 rval = "b,a\t%l0";
7624 else
7626 if (emit_nop)
7627 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7628 else
7629 rval = "cwbe\t%%g0, %%g0, %l0";
7631 return rval;
7634 if (v9_form)
7635 strcpy (string, "ba%*,pt\t%%xcc, ");
7636 else
7637 strcpy (string, "b%*\t");
7639 p = strchr (string, '\0');
7640 *p++ = '%';
7641 *p++ = 'l';
7642 *p++ = '0';
7643 *p++ = '%';
7644 *p++ = '(';
7645 *p = '\0';
7647 return string;
7650 /* Return the string to output a conditional branch to LABEL, which is
7651 the operand number of the label. OP is the conditional expression.
7652 XEXP (OP, 0) is assumed to be a condition code register (integer or
7653 floating point) and its mode specifies what kind of comparison we made.
7655 DEST is the destination insn (i.e. the label), INSN is the source.
7657 REVERSED is nonzero if we should reverse the sense of the comparison.
7659 ANNUL is nonzero if we should generate an annulling branch. */
7661 const char *
7662 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7663 rtx insn)
7665 static char string[64];
7666 enum rtx_code code = GET_CODE (op);
7667 rtx cc_reg = XEXP (op, 0);
7668 enum machine_mode mode = GET_MODE (cc_reg);
7669 const char *labelno, *branch;
7670 int spaces = 8, far;
7671 char *p;
7673 /* v9 branches are limited to +-1MB. If it is too far away,
7674 change
7676 bne,pt %xcc, .LC30
7680 be,pn %xcc, .+12
7682 ba .LC30
7686 fbne,a,pn %fcc2, .LC29
7690 fbe,pt %fcc2, .+16
7692 ba .LC29 */
7694 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7695 if (reversed ^ far)
7697 /* Reversal of FP compares takes care -- an ordered compare
7698 becomes an unordered compare and vice versa. */
7699 if (mode == CCFPmode || mode == CCFPEmode)
7700 code = reverse_condition_maybe_unordered (code);
7701 else
7702 code = reverse_condition (code);
7705 /* Start by writing the branch condition. */
7706 if (mode == CCFPmode || mode == CCFPEmode)
7708 switch (code)
7710 case NE:
7711 branch = "fbne";
7712 break;
7713 case EQ:
7714 branch = "fbe";
7715 break;
7716 case GE:
7717 branch = "fbge";
7718 break;
7719 case GT:
7720 branch = "fbg";
7721 break;
7722 case LE:
7723 branch = "fble";
7724 break;
7725 case LT:
7726 branch = "fbl";
7727 break;
7728 case UNORDERED:
7729 branch = "fbu";
7730 break;
7731 case ORDERED:
7732 branch = "fbo";
7733 break;
7734 case UNGT:
7735 branch = "fbug";
7736 break;
7737 case UNLT:
7738 branch = "fbul";
7739 break;
7740 case UNEQ:
7741 branch = "fbue";
7742 break;
7743 case UNGE:
7744 branch = "fbuge";
7745 break;
7746 case UNLE:
7747 branch = "fbule";
7748 break;
7749 case LTGT:
7750 branch = "fblg";
7751 break;
7753 default:
7754 gcc_unreachable ();
7757 /* ??? !v9: FP branches cannot be preceded by another floating point
7758 insn. Because there is currently no concept of pre-delay slots,
7759 we can fix this only by always emitting a nop before a floating
7760 point branch. */
7762 string[0] = '\0';
7763 if (! TARGET_V9)
7764 strcpy (string, "nop\n\t");
7765 strcat (string, branch);
7767 else
7769 switch (code)
7771 case NE:
7772 branch = "bne";
7773 break;
7774 case EQ:
7775 branch = "be";
7776 break;
7777 case GE:
7778 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7779 branch = "bpos";
7780 else
7781 branch = "bge";
7782 break;
7783 case GT:
7784 branch = "bg";
7785 break;
7786 case LE:
7787 branch = "ble";
7788 break;
7789 case LT:
7790 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7791 branch = "bneg";
7792 else
7793 branch = "bl";
7794 break;
7795 case GEU:
7796 branch = "bgeu";
7797 break;
7798 case GTU:
7799 branch = "bgu";
7800 break;
7801 case LEU:
7802 branch = "bleu";
7803 break;
7804 case LTU:
7805 branch = "blu";
7806 break;
7808 default:
7809 gcc_unreachable ();
7811 strcpy (string, branch);
7813 spaces -= strlen (branch);
7814 p = strchr (string, '\0');
7816 /* Now add the annulling, the label, and a possible noop. */
7817 if (annul && ! far)
7819 strcpy (p, ",a");
7820 p += 2;
7821 spaces -= 2;
7824 if (TARGET_V9)
7826 rtx note;
7827 int v8 = 0;
7829 if (! far && insn && INSN_ADDRESSES_SET_P ())
7831 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7832 - INSN_ADDRESSES (INSN_UID (insn)));
7833 /* Leave some instructions for "slop". */
7834 if (delta < -260000 || delta >= 260000)
7835 v8 = 1;
7838 if (mode == CCFPmode || mode == CCFPEmode)
7840 static char v9_fcc_labelno[] = "%%fccX, ";
7841 /* Set the char indicating the number of the fcc reg to use. */
7842 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7843 labelno = v9_fcc_labelno;
7844 if (v8)
7846 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7847 labelno = "";
7850 else if (mode == CCXmode || mode == CCX_NOOVmode)
7852 labelno = "%%xcc, ";
7853 gcc_assert (! v8);
7855 else
7857 labelno = "%%icc, ";
7858 if (v8)
7859 labelno = "";
7862 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7864 strcpy (p,
7865 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
7866 ? ",pt" : ",pn");
7867 p += 3;
7868 spaces -= 3;
7871 else
7872 labelno = "";
7874 if (spaces > 0)
7875 *p++ = '\t';
7876 else
7877 *p++ = ' ';
7878 strcpy (p, labelno);
7879 p = strchr (p, '\0');
7880 if (far)
7882 strcpy (p, ".+12\n\t nop\n\tb\t");
7883 /* Skip the next insn if requested or
7884 if we know that it will be a nop. */
7885 if (annul || ! final_sequence)
7886 p[3] = '6';
7887 p += 14;
7889 *p++ = '%';
7890 *p++ = 'l';
7891 *p++ = label + '0';
7892 *p++ = '%';
7893 *p++ = '#';
7894 *p = '\0';
7896 return string;
7899 /* Emit a library call comparison between floating point X and Y.
7900 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7901 Return the new operator to be used in the comparison sequence.
7903 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7904 values as arguments instead of the TFmode registers themselves,
7905 that's why we cannot call emit_float_lib_cmp. */
7908 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7910 const char *qpfunc;
7911 rtx slot0, slot1, result, tem, tem2, libfunc;
7912 enum machine_mode mode;
7913 enum rtx_code new_comparison;
7915 switch (comparison)
7917 case EQ:
7918 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7919 break;
7921 case NE:
7922 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7923 break;
7925 case GT:
7926 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7927 break;
7929 case GE:
7930 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7931 break;
7933 case LT:
7934 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7935 break;
7937 case LE:
7938 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7939 break;
7941 case ORDERED:
7942 case UNORDERED:
7943 case UNGT:
7944 case UNLT:
7945 case UNEQ:
7946 case UNGE:
7947 case UNLE:
7948 case LTGT:
7949 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7950 break;
7952 default:
7953 gcc_unreachable ();
7956 if (TARGET_ARCH64)
7958 if (MEM_P (x))
7960 tree expr = MEM_EXPR (x);
7961 if (expr)
7962 mark_addressable (expr);
7963 slot0 = x;
7965 else
7967 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7968 emit_move_insn (slot0, x);
7971 if (MEM_P (y))
7973 tree expr = MEM_EXPR (y);
7974 if (expr)
7975 mark_addressable (expr);
7976 slot1 = y;
7978 else
7980 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7981 emit_move_insn (slot1, y);
7984 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7985 emit_library_call (libfunc, LCT_NORMAL,
7986 DImode, 2,
7987 XEXP (slot0, 0), Pmode,
7988 XEXP (slot1, 0), Pmode);
7989 mode = DImode;
7991 else
7993 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7994 emit_library_call (libfunc, LCT_NORMAL,
7995 SImode, 2,
7996 x, TFmode, y, TFmode);
7997 mode = SImode;
8001 /* Immediately move the result of the libcall into a pseudo
8002 register so reload doesn't clobber the value if it needs
8003 the return register for a spill reg. */
8004 result = gen_reg_rtx (mode);
8005 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8007 switch (comparison)
8009 default:
8010 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8011 case ORDERED:
8012 case UNORDERED:
8013 new_comparison = (comparison == UNORDERED ? EQ : NE);
8014 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8015 case UNGT:
8016 case UNGE:
8017 new_comparison = (comparison == UNGT ? GT : NE);
8018 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8019 case UNLE:
8020 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8021 case UNLT:
8022 tem = gen_reg_rtx (mode);
8023 if (TARGET_ARCH32)
8024 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8025 else
8026 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8027 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8028 case UNEQ:
8029 case LTGT:
8030 tem = gen_reg_rtx (mode);
8031 if (TARGET_ARCH32)
8032 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8033 else
8034 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8035 tem2 = gen_reg_rtx (mode);
8036 if (TARGET_ARCH32)
8037 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8038 else
8039 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8040 new_comparison = (comparison == UNEQ ? EQ : NE);
8041 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8044 gcc_unreachable ();
8047 /* Generate an unsigned DImode to FP conversion. This is the same code
8048 optabs would emit if we didn't have TFmode patterns. */
8050 void
8051 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
8053 rtx neglab, donelab, i0, i1, f0, in, out;
8055 out = operands[0];
8056 in = force_reg (DImode, operands[1]);
8057 neglab = gen_label_rtx ();
8058 donelab = gen_label_rtx ();
8059 i0 = gen_reg_rtx (DImode);
8060 i1 = gen_reg_rtx (DImode);
8061 f0 = gen_reg_rtx (mode);
8063 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8065 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
8066 emit_jump_insn (gen_jump (donelab));
8067 emit_barrier ();
8069 emit_label (neglab);
8071 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8072 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8073 emit_insn (gen_iordi3 (i0, i0, i1));
8074 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
8075 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
8077 emit_label (donelab);
8080 /* Generate an FP to unsigned DImode conversion. This is the same code
8081 optabs would emit if we didn't have TFmode patterns. */
8083 void
8084 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
8086 rtx neglab, donelab, i0, i1, f0, in, out, limit;
8088 out = operands[0];
8089 in = force_reg (mode, operands[1]);
8090 neglab = gen_label_rtx ();
8091 donelab = gen_label_rtx ();
8092 i0 = gen_reg_rtx (DImode);
8093 i1 = gen_reg_rtx (DImode);
8094 limit = gen_reg_rtx (mode);
8095 f0 = gen_reg_rtx (mode);
8097 emit_move_insn (limit,
8098 CONST_DOUBLE_FROM_REAL_VALUE (
8099 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8100 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8102 emit_insn (gen_rtx_SET (VOIDmode,
8103 out,
8104 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8105 emit_jump_insn (gen_jump (donelab));
8106 emit_barrier ();
8108 emit_label (neglab);
8110 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
8111 emit_insn (gen_rtx_SET (VOIDmode,
8113 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8114 emit_insn (gen_movdi (i1, const1_rtx));
8115 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8116 emit_insn (gen_xordi3 (out, i0, i1));
8118 emit_label (donelab);
8121 /* Return the string to output a compare and branch instruction to DEST.
8122 DEST is the destination insn (i.e. the label), INSN is the source,
8123 and OP is the conditional expression. */
8125 const char *
8126 output_cbcond (rtx op, rtx dest, rtx insn)
8128 enum machine_mode mode = GET_MODE (XEXP (op, 0));
8129 enum rtx_code code = GET_CODE (op);
8130 const char *cond_str, *tmpl;
8131 int far, emit_nop, len;
8132 static char string[64];
8133 char size_char;
8135 /* Compare and Branch is limited to +-2KB. If it is too far away,
8136 change
8138 cxbne X, Y, .LC30
8142 cxbe X, Y, .+16
8144 ba,pt xcc, .LC30
8145 nop */
8147 len = get_attr_length (insn);
8149 far = len == 4;
8150 emit_nop = len == 2;
8152 if (far)
8153 code = reverse_condition (code);
8155 size_char = ((mode == SImode) ? 'w' : 'x');
8157 switch (code)
8159 case NE:
8160 cond_str = "ne";
8161 break;
8163 case EQ:
8164 cond_str = "e";
8165 break;
8167 case GE:
8168 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8169 cond_str = "pos";
8170 else
8171 cond_str = "ge";
8172 break;
8174 case GT:
8175 cond_str = "g";
8176 break;
8178 case LE:
8179 cond_str = "le";
8180 break;
8182 case LT:
8183 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8184 cond_str = "neg";
8185 else
8186 cond_str = "l";
8187 break;
8189 case GEU:
8190 cond_str = "cc";
8191 break;
8193 case GTU:
8194 cond_str = "gu";
8195 break;
8197 case LEU:
8198 cond_str = "leu";
8199 break;
8201 case LTU:
8202 cond_str = "cs";
8203 break;
8205 default:
8206 gcc_unreachable ();
8209 if (far)
8211 int veryfar = 1, delta;
8213 if (INSN_ADDRESSES_SET_P ())
8215 delta = (INSN_ADDRESSES (INSN_UID (dest))
8216 - INSN_ADDRESSES (INSN_UID (insn)));
8217 /* Leave some instructions for "slop". */
8218 if (delta >= -260000 && delta < 260000)
8219 veryfar = 0;
8222 if (veryfar)
8223 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8224 else
8225 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8227 else
8229 if (emit_nop)
8230 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8231 else
8232 tmpl = "c%cb%s\t%%1, %%2, %%3";
8235 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8237 return string;
8240 /* Return the string to output a conditional branch to LABEL, testing
8241 register REG. LABEL is the operand number of the label; REG is the
8242 operand number of the reg. OP is the conditional expression. The mode
8243 of REG says what kind of comparison we made.
8245 DEST is the destination insn (i.e. the label), INSN is the source.
8247 REVERSED is nonzero if we should reverse the sense of the comparison.
8249 ANNUL is nonzero if we should generate an annulling branch. */
8251 const char *
8252 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8253 int annul, rtx insn)
8255 static char string[64];
8256 enum rtx_code code = GET_CODE (op);
8257 enum machine_mode mode = GET_MODE (XEXP (op, 0));
8258 rtx note;
8259 int far;
8260 char *p;
8262 /* branch on register are limited to +-128KB. If it is too far away,
8263 change
8265 brnz,pt %g1, .LC30
8269 brz,pn %g1, .+12
8271 ba,pt %xcc, .LC30
8275 brgez,a,pn %o1, .LC29
8279 brlz,pt %o1, .+16
8281 ba,pt %xcc, .LC29 */
8283 far = get_attr_length (insn) >= 3;
8285 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8286 if (reversed ^ far)
8287 code = reverse_condition (code);
8289 /* Only 64 bit versions of these instructions exist. */
8290 gcc_assert (mode == DImode);
8292 /* Start by writing the branch condition. */
8294 switch (code)
8296 case NE:
8297 strcpy (string, "brnz");
8298 break;
8300 case EQ:
8301 strcpy (string, "brz");
8302 break;
8304 case GE:
8305 strcpy (string, "brgez");
8306 break;
8308 case LT:
8309 strcpy (string, "brlz");
8310 break;
8312 case LE:
8313 strcpy (string, "brlez");
8314 break;
8316 case GT:
8317 strcpy (string, "brgz");
8318 break;
8320 default:
8321 gcc_unreachable ();
8324 p = strchr (string, '\0');
8326 /* Now add the annulling, reg, label, and nop. */
8327 if (annul && ! far)
8329 strcpy (p, ",a");
8330 p += 2;
8333 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8335 strcpy (p,
8336 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
8337 ? ",pt" : ",pn");
8338 p += 3;
8341 *p = p < string + 8 ? '\t' : ' ';
8342 p++;
8343 *p++ = '%';
8344 *p++ = '0' + reg;
8345 *p++ = ',';
8346 *p++ = ' ';
8347 if (far)
8349 int veryfar = 1, delta;
8351 if (INSN_ADDRESSES_SET_P ())
8353 delta = (INSN_ADDRESSES (INSN_UID (dest))
8354 - INSN_ADDRESSES (INSN_UID (insn)));
8355 /* Leave some instructions for "slop". */
8356 if (delta >= -260000 && delta < 260000)
8357 veryfar = 0;
8360 strcpy (p, ".+12\n\t nop\n\t");
8361 /* Skip the next insn if requested or
8362 if we know that it will be a nop. */
8363 if (annul || ! final_sequence)
8364 p[3] = '6';
8365 p += 12;
8366 if (veryfar)
8368 strcpy (p, "b\t");
8369 p += 2;
8371 else
8373 strcpy (p, "ba,pt\t%%xcc, ");
8374 p += 13;
8377 *p++ = '%';
8378 *p++ = 'l';
8379 *p++ = '0' + label;
8380 *p++ = '%';
8381 *p++ = '#';
8382 *p = '\0';
8384 return string;
8387 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8388 Such instructions cannot be used in the delay slot of return insn on v9.
8389 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8392 static int
8393 epilogue_renumber (register rtx *where, int test)
8395 register const char *fmt;
8396 register int i;
8397 register enum rtx_code code;
8399 if (*where == 0)
8400 return 0;
8402 code = GET_CODE (*where);
8404 switch (code)
8406 case REG:
8407 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8408 return 1;
8409 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8410 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8411 case SCRATCH:
8412 case CC0:
8413 case PC:
8414 case CONST_INT:
8415 case CONST_DOUBLE:
8416 return 0;
8418 /* Do not replace the frame pointer with the stack pointer because
8419 it can cause the delayed instruction to load below the stack.
8420 This occurs when instructions like:
8422 (set (reg/i:SI 24 %i0)
8423 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8424 (const_int -20 [0xffffffec])) 0))
8426 are in the return delayed slot. */
8427 case PLUS:
8428 if (GET_CODE (XEXP (*where, 0)) == REG
8429 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8430 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8431 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8432 return 1;
8433 break;
8435 case MEM:
8436 if (SPARC_STACK_BIAS
8437 && GET_CODE (XEXP (*where, 0)) == REG
8438 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8439 return 1;
8440 break;
8442 default:
8443 break;
8446 fmt = GET_RTX_FORMAT (code);
8448 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8450 if (fmt[i] == 'E')
8452 register int j;
8453 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8454 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8455 return 1;
8457 else if (fmt[i] == 'e'
8458 && epilogue_renumber (&(XEXP (*where, i)), test))
8459 return 1;
8461 return 0;
8464 /* Leaf functions and non-leaf functions have different needs. */
8466 static const int
8467 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8469 static const int
8470 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8472 static const int *const reg_alloc_orders[] = {
8473 reg_leaf_alloc_order,
8474 reg_nonleaf_alloc_order};
8476 void
8477 order_regs_for_local_alloc (void)
8479 static int last_order_nonleaf = 1;
8481 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8483 last_order_nonleaf = !last_order_nonleaf;
8484 memcpy ((char *) reg_alloc_order,
8485 (const char *) reg_alloc_orders[last_order_nonleaf],
8486 FIRST_PSEUDO_REGISTER * sizeof (int));
8490 /* Return 1 if REG and MEM are legitimate enough to allow the various
8491 mem<-->reg splits to be run. */
8494 sparc_splitdi_legitimate (rtx reg, rtx mem)
8496 /* Punt if we are here by mistake. */
8497 gcc_assert (reload_completed);
8499 /* We must have an offsettable memory reference. */
8500 if (! offsettable_memref_p (mem))
8501 return 0;
8503 /* If we have legitimate args for ldd/std, we do not want
8504 the split to happen. */
8505 if ((REGNO (reg) % 2) == 0
8506 && mem_min_alignment (mem, 8))
8507 return 0;
8509 /* Success. */
8510 return 1;
8513 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8516 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8518 int regno1, regno2;
8520 if (GET_CODE (reg1) == SUBREG)
8521 reg1 = SUBREG_REG (reg1);
8522 if (GET_CODE (reg1) != REG)
8523 return 0;
8524 regno1 = REGNO (reg1);
8526 if (GET_CODE (reg2) == SUBREG)
8527 reg2 = SUBREG_REG (reg2);
8528 if (GET_CODE (reg2) != REG)
8529 return 0;
8530 regno2 = REGNO (reg2);
8532 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8533 return 1;
8535 if (TARGET_VIS3)
8537 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8538 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8539 return 1;
8542 return 0;
8545 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8546 This makes them candidates for using ldd and std insns.
8548 Note reg1 and reg2 *must* be hard registers. */
8551 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8553 /* We might have been passed a SUBREG. */
8554 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8555 return 0;
8557 if (REGNO (reg1) % 2 != 0)
8558 return 0;
8560 /* Integer ldd is deprecated in SPARC V9 */
8561 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8562 return 0;
8564 return (REGNO (reg1) == REGNO (reg2) - 1);
8567 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8568 an ldd or std insn.
8570 This can only happen when addr1 and addr2, the addresses in mem1
8571 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8572 addr1 must also be aligned on a 64-bit boundary.
8574 Also iff dependent_reg_rtx is not null it should not be used to
8575 compute the address for mem1, i.e. we cannot optimize a sequence
8576 like:
8577 ld [%o0], %o0
8578 ld [%o0 + 4], %o1
8580 ldd [%o0], %o0
8581 nor:
8582 ld [%g3 + 4], %g3
8583 ld [%g3], %g2
8585 ldd [%g3], %g2
8587 But, note that the transformation from:
8588 ld [%g2 + 4], %g3
8589 ld [%g2], %g2
8591 ldd [%g2], %g2
8592 is perfectly fine. Thus, the peephole2 patterns always pass us
8593 the destination register of the first load, never the second one.
8595 For stores we don't have a similar problem, so dependent_reg_rtx is
8596 NULL_RTX. */
8599 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8601 rtx addr1, addr2;
8602 unsigned int reg1;
8603 HOST_WIDE_INT offset1;
8605 /* The mems cannot be volatile. */
8606 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8607 return 0;
8609 /* MEM1 should be aligned on a 64-bit boundary. */
8610 if (MEM_ALIGN (mem1) < 64)
8611 return 0;
8613 addr1 = XEXP (mem1, 0);
8614 addr2 = XEXP (mem2, 0);
8616 /* Extract a register number and offset (if used) from the first addr. */
8617 if (GET_CODE (addr1) == PLUS)
8619 /* If not a REG, return zero. */
8620 if (GET_CODE (XEXP (addr1, 0)) != REG)
8621 return 0;
8622 else
8624 reg1 = REGNO (XEXP (addr1, 0));
8625 /* The offset must be constant! */
8626 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8627 return 0;
8628 offset1 = INTVAL (XEXP (addr1, 1));
8631 else if (GET_CODE (addr1) != REG)
8632 return 0;
8633 else
8635 reg1 = REGNO (addr1);
8636 /* This was a simple (mem (reg)) expression. Offset is 0. */
8637 offset1 = 0;
8640 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8641 if (GET_CODE (addr2) != PLUS)
8642 return 0;
8644 if (GET_CODE (XEXP (addr2, 0)) != REG
8645 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8646 return 0;
8648 if (reg1 != REGNO (XEXP (addr2, 0)))
8649 return 0;
8651 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8652 return 0;
8654 /* The first offset must be evenly divisible by 8 to ensure the
8655 address is 64 bit aligned. */
8656 if (offset1 % 8 != 0)
8657 return 0;
8659 /* The offset for the second addr must be 4 more than the first addr. */
8660 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8661 return 0;
8663 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8664 instructions. */
8665 return 1;
8668 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8671 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, enum machine_mode mode)
8673 rtx x = widen_memory_access (mem1, mode, 0);
8674 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8675 return x;
8678 /* Return 1 if reg is a pseudo, or is the first register in
8679 a hard register pair. This makes it suitable for use in
8680 ldd and std insns. */
8683 register_ok_for_ldd (rtx reg)
8685 /* We might have been passed a SUBREG. */
8686 if (!REG_P (reg))
8687 return 0;
8689 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8690 return (REGNO (reg) % 2 == 0);
8692 return 1;
8695 /* Return 1 if OP, a MEM, has an address which is known to be
8696 aligned to an 8-byte boundary. */
8699 memory_ok_for_ldd (rtx op)
8701 /* In 64-bit mode, we assume that the address is word-aligned. */
8702 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8703 return 0;
8705 if (! can_create_pseudo_p ()
8706 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8707 return 0;
8709 return 1;
8712 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8714 static bool
8715 sparc_print_operand_punct_valid_p (unsigned char code)
8717 if (code == '#'
8718 || code == '*'
8719 || code == '('
8720 || code == ')'
8721 || code == '_'
8722 || code == '&')
8723 return true;
8725 return false;
8728 /* Implement TARGET_PRINT_OPERAND.
8729 Print operand X (an rtx) in assembler syntax to file FILE.
8730 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8731 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8733 static void
8734 sparc_print_operand (FILE *file, rtx x, int code)
8736 switch (code)
8738 case '#':
8739 /* Output an insn in a delay slot. */
8740 if (final_sequence)
8741 sparc_indent_opcode = 1;
8742 else
8743 fputs ("\n\t nop", file);
8744 return;
8745 case '*':
8746 /* Output an annul flag if there's nothing for the delay slot and we
8747 are optimizing. This is always used with '(' below.
8748 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8749 this is a dbx bug. So, we only do this when optimizing.
8750 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8751 Always emit a nop in case the next instruction is a branch. */
8752 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8753 fputs (",a", file);
8754 return;
8755 case '(':
8756 /* Output a 'nop' if there's nothing for the delay slot and we are
8757 not optimizing. This is always used with '*' above. */
8758 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8759 fputs ("\n\t nop", file);
8760 else if (final_sequence)
8761 sparc_indent_opcode = 1;
8762 return;
8763 case ')':
8764 /* Output the right displacement from the saved PC on function return.
8765 The caller may have placed an "unimp" insn immediately after the call
8766 so we have to account for it. This insn is used in the 32-bit ABI
8767 when calling a function that returns a non zero-sized structure. The
8768 64-bit ABI doesn't have it. Be careful to have this test be the same
8769 as that for the call. The exception is when sparc_std_struct_return
8770 is enabled, the psABI is followed exactly and the adjustment is made
8771 by the code in sparc_struct_value_rtx. The call emitted is the same
8772 when sparc_std_struct_return is enabled. */
8773 if (!TARGET_ARCH64
8774 && cfun->returns_struct
8775 && !sparc_std_struct_return
8776 && DECL_SIZE (DECL_RESULT (current_function_decl))
8777 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8778 == INTEGER_CST
8779 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8780 fputs ("12", file);
8781 else
8782 fputc ('8', file);
8783 return;
8784 case '_':
8785 /* Output the Embedded Medium/Anywhere code model base register. */
8786 fputs (EMBMEDANY_BASE_REG, file);
8787 return;
8788 case '&':
8789 /* Print some local dynamic TLS name. */
8790 assemble_name (file, get_some_local_dynamic_name ());
8791 return;
8793 case 'Y':
8794 /* Adjust the operand to take into account a RESTORE operation. */
8795 if (GET_CODE (x) == CONST_INT)
8796 break;
8797 else if (GET_CODE (x) != REG)
8798 output_operand_lossage ("invalid %%Y operand");
8799 else if (REGNO (x) < 8)
8800 fputs (reg_names[REGNO (x)], file);
8801 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8802 fputs (reg_names[REGNO (x)-16], file);
8803 else
8804 output_operand_lossage ("invalid %%Y operand");
8805 return;
8806 case 'L':
8807 /* Print out the low order register name of a register pair. */
8808 if (WORDS_BIG_ENDIAN)
8809 fputs (reg_names[REGNO (x)+1], file);
8810 else
8811 fputs (reg_names[REGNO (x)], file);
8812 return;
8813 case 'H':
8814 /* Print out the high order register name of a register pair. */
8815 if (WORDS_BIG_ENDIAN)
8816 fputs (reg_names[REGNO (x)], file);
8817 else
8818 fputs (reg_names[REGNO (x)+1], file);
8819 return;
8820 case 'R':
8821 /* Print out the second register name of a register pair or quad.
8822 I.e., R (%o0) => %o1. */
8823 fputs (reg_names[REGNO (x)+1], file);
8824 return;
8825 case 'S':
8826 /* Print out the third register name of a register quad.
8827 I.e., S (%o0) => %o2. */
8828 fputs (reg_names[REGNO (x)+2], file);
8829 return;
8830 case 'T':
8831 /* Print out the fourth register name of a register quad.
8832 I.e., T (%o0) => %o3. */
8833 fputs (reg_names[REGNO (x)+3], file);
8834 return;
8835 case 'x':
8836 /* Print a condition code register. */
8837 if (REGNO (x) == SPARC_ICC_REG)
8839 /* We don't handle CC[X]_NOOVmode because they're not supposed
8840 to occur here. */
8841 if (GET_MODE (x) == CCmode)
8842 fputs ("%icc", file);
8843 else if (GET_MODE (x) == CCXmode)
8844 fputs ("%xcc", file);
8845 else
8846 gcc_unreachable ();
8848 else
8849 /* %fccN register */
8850 fputs (reg_names[REGNO (x)], file);
8851 return;
8852 case 'm':
8853 /* Print the operand's address only. */
8854 output_address (XEXP (x, 0));
8855 return;
8856 case 'r':
8857 /* In this case we need a register. Use %g0 if the
8858 operand is const0_rtx. */
8859 if (x == const0_rtx
8860 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8862 fputs ("%g0", file);
8863 return;
8865 else
8866 break;
8868 case 'A':
8869 switch (GET_CODE (x))
8871 case IOR: fputs ("or", file); break;
8872 case AND: fputs ("and", file); break;
8873 case XOR: fputs ("xor", file); break;
8874 default: output_operand_lossage ("invalid %%A operand");
8876 return;
8878 case 'B':
8879 switch (GET_CODE (x))
8881 case IOR: fputs ("orn", file); break;
8882 case AND: fputs ("andn", file); break;
8883 case XOR: fputs ("xnor", file); break;
8884 default: output_operand_lossage ("invalid %%B operand");
8886 return;
8888 /* This is used by the conditional move instructions. */
8889 case 'C':
8891 enum rtx_code rc = GET_CODE (x);
8893 switch (rc)
8895 case NE: fputs ("ne", file); break;
8896 case EQ: fputs ("e", file); break;
8897 case GE: fputs ("ge", file); break;
8898 case GT: fputs ("g", file); break;
8899 case LE: fputs ("le", file); break;
8900 case LT: fputs ("l", file); break;
8901 case GEU: fputs ("geu", file); break;
8902 case GTU: fputs ("gu", file); break;
8903 case LEU: fputs ("leu", file); break;
8904 case LTU: fputs ("lu", file); break;
8905 case LTGT: fputs ("lg", file); break;
8906 case UNORDERED: fputs ("u", file); break;
8907 case ORDERED: fputs ("o", file); break;
8908 case UNLT: fputs ("ul", file); break;
8909 case UNLE: fputs ("ule", file); break;
8910 case UNGT: fputs ("ug", file); break;
8911 case UNGE: fputs ("uge", file); break;
8912 case UNEQ: fputs ("ue", file); break;
8913 default: output_operand_lossage ("invalid %%C operand");
8915 return;
8918 /* This are used by the movr instruction pattern. */
8919 case 'D':
8921 enum rtx_code rc = GET_CODE (x);
8922 switch (rc)
8924 case NE: fputs ("ne", file); break;
8925 case EQ: fputs ("e", file); break;
8926 case GE: fputs ("gez", file); break;
8927 case LT: fputs ("lz", file); break;
8928 case LE: fputs ("lez", file); break;
8929 case GT: fputs ("gz", file); break;
8930 default: output_operand_lossage ("invalid %%D operand");
8932 return;
8935 case 'b':
8937 /* Print a sign-extended character. */
8938 int i = trunc_int_for_mode (INTVAL (x), QImode);
8939 fprintf (file, "%d", i);
8940 return;
8943 case 'f':
8944 /* Operand must be a MEM; write its address. */
8945 if (GET_CODE (x) != MEM)
8946 output_operand_lossage ("invalid %%f operand");
8947 output_address (XEXP (x, 0));
8948 return;
8950 case 's':
8952 /* Print a sign-extended 32-bit value. */
8953 HOST_WIDE_INT i;
8954 if (GET_CODE(x) == CONST_INT)
8955 i = INTVAL (x);
8956 else if (GET_CODE(x) == CONST_DOUBLE)
8957 i = CONST_DOUBLE_LOW (x);
8958 else
8960 output_operand_lossage ("invalid %%s operand");
8961 return;
8963 i = trunc_int_for_mode (i, SImode);
8964 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
8965 return;
8968 case 0:
8969 /* Do nothing special. */
8970 break;
8972 default:
8973 /* Undocumented flag. */
8974 output_operand_lossage ("invalid operand output code");
8977 if (GET_CODE (x) == REG)
8978 fputs (reg_names[REGNO (x)], file);
8979 else if (GET_CODE (x) == MEM)
8981 fputc ('[', file);
8982 /* Poor Sun assembler doesn't understand absolute addressing. */
8983 if (CONSTANT_P (XEXP (x, 0)))
8984 fputs ("%g0+", file);
8985 output_address (XEXP (x, 0));
8986 fputc (']', file);
8988 else if (GET_CODE (x) == HIGH)
8990 fputs ("%hi(", file);
8991 output_addr_const (file, XEXP (x, 0));
8992 fputc (')', file);
8994 else if (GET_CODE (x) == LO_SUM)
8996 sparc_print_operand (file, XEXP (x, 0), 0);
8997 if (TARGET_CM_MEDMID)
8998 fputs ("+%l44(", file);
8999 else
9000 fputs ("+%lo(", file);
9001 output_addr_const (file, XEXP (x, 1));
9002 fputc (')', file);
9004 else if (GET_CODE (x) == CONST_DOUBLE
9005 && (GET_MODE (x) == VOIDmode
9006 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
9008 if (CONST_DOUBLE_HIGH (x) == 0)
9009 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
9010 else if (CONST_DOUBLE_HIGH (x) == -1
9011 && CONST_DOUBLE_LOW (x) < 0)
9012 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
9013 else
9014 output_operand_lossage ("long long constant not a valid immediate operand");
9016 else if (GET_CODE (x) == CONST_DOUBLE)
9017 output_operand_lossage ("floating point constant not a valid immediate operand");
9018 else { output_addr_const (file, x); }
9021 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9023 static void
9024 sparc_print_operand_address (FILE *file, rtx x)
9026 register rtx base, index = 0;
9027 int offset = 0;
9028 register rtx addr = x;
9030 if (REG_P (addr))
9031 fputs (reg_names[REGNO (addr)], file);
9032 else if (GET_CODE (addr) == PLUS)
9034 if (CONST_INT_P (XEXP (addr, 0)))
9035 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9036 else if (CONST_INT_P (XEXP (addr, 1)))
9037 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9038 else
9039 base = XEXP (addr, 0), index = XEXP (addr, 1);
9040 if (GET_CODE (base) == LO_SUM)
9042 gcc_assert (USE_AS_OFFSETABLE_LO10
9043 && TARGET_ARCH64
9044 && ! TARGET_CM_MEDMID);
9045 output_operand (XEXP (base, 0), 0);
9046 fputs ("+%lo(", file);
9047 output_address (XEXP (base, 1));
9048 fprintf (file, ")+%d", offset);
9050 else
9052 fputs (reg_names[REGNO (base)], file);
9053 if (index == 0)
9054 fprintf (file, "%+d", offset);
9055 else if (REG_P (index))
9056 fprintf (file, "+%s", reg_names[REGNO (index)]);
9057 else if (GET_CODE (index) == SYMBOL_REF
9058 || GET_CODE (index) == LABEL_REF
9059 || GET_CODE (index) == CONST)
9060 fputc ('+', file), output_addr_const (file, index);
9061 else gcc_unreachable ();
9064 else if (GET_CODE (addr) == MINUS
9065 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9067 output_addr_const (file, XEXP (addr, 0));
9068 fputs ("-(", file);
9069 output_addr_const (file, XEXP (addr, 1));
9070 fputs ("-.)", file);
9072 else if (GET_CODE (addr) == LO_SUM)
9074 output_operand (XEXP (addr, 0), 0);
9075 if (TARGET_CM_MEDMID)
9076 fputs ("+%l44(", file);
9077 else
9078 fputs ("+%lo(", file);
9079 output_address (XEXP (addr, 1));
9080 fputc (')', file);
9082 else if (flag_pic
9083 && GET_CODE (addr) == CONST
9084 && GET_CODE (XEXP (addr, 0)) == MINUS
9085 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9086 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9087 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9089 addr = XEXP (addr, 0);
9090 output_addr_const (file, XEXP (addr, 0));
9091 /* Group the args of the second CONST in parenthesis. */
9092 fputs ("-(", file);
9093 /* Skip past the second CONST--it does nothing for us. */
9094 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9095 /* Close the parenthesis. */
9096 fputc (')', file);
9098 else
9100 output_addr_const (file, addr);
9104 /* Target hook for assembling integer objects. The sparc version has
9105 special handling for aligned DI-mode objects. */
9107 static bool
9108 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9110 /* ??? We only output .xword's for symbols and only then in environments
9111 where the assembler can handle them. */
9112 if (aligned_p && size == 8
9113 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
9115 if (TARGET_V9)
9117 assemble_integer_with_op ("\t.xword\t", x);
9118 return true;
9120 else
9122 assemble_aligned_integer (4, const0_rtx);
9123 assemble_aligned_integer (4, x);
9124 return true;
9127 return default_assemble_integer (x, size, aligned_p);
9130 /* Return the value of a code used in the .proc pseudo-op that says
9131 what kind of result this function returns. For non-C types, we pick
9132 the closest C type. */
9134 #ifndef SHORT_TYPE_SIZE
9135 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9136 #endif
9138 #ifndef INT_TYPE_SIZE
9139 #define INT_TYPE_SIZE BITS_PER_WORD
9140 #endif
9142 #ifndef LONG_TYPE_SIZE
9143 #define LONG_TYPE_SIZE BITS_PER_WORD
9144 #endif
9146 #ifndef LONG_LONG_TYPE_SIZE
9147 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9148 #endif
9150 #ifndef FLOAT_TYPE_SIZE
9151 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9152 #endif
9154 #ifndef DOUBLE_TYPE_SIZE
9155 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9156 #endif
9158 #ifndef LONG_DOUBLE_TYPE_SIZE
9159 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9160 #endif
9162 unsigned long
9163 sparc_type_code (register tree type)
9165 register unsigned long qualifiers = 0;
9166 register unsigned shift;
9168 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9169 setting more, since some assemblers will give an error for this. Also,
9170 we must be careful to avoid shifts of 32 bits or more to avoid getting
9171 unpredictable results. */
9173 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9175 switch (TREE_CODE (type))
9177 case ERROR_MARK:
9178 return qualifiers;
9180 case ARRAY_TYPE:
9181 qualifiers |= (3 << shift);
9182 break;
9184 case FUNCTION_TYPE:
9185 case METHOD_TYPE:
9186 qualifiers |= (2 << shift);
9187 break;
9189 case POINTER_TYPE:
9190 case REFERENCE_TYPE:
9191 case OFFSET_TYPE:
9192 qualifiers |= (1 << shift);
9193 break;
9195 case RECORD_TYPE:
9196 return (qualifiers | 8);
9198 case UNION_TYPE:
9199 case QUAL_UNION_TYPE:
9200 return (qualifiers | 9);
9202 case ENUMERAL_TYPE:
9203 return (qualifiers | 10);
9205 case VOID_TYPE:
9206 return (qualifiers | 16);
9208 case INTEGER_TYPE:
9209 /* If this is a range type, consider it to be the underlying
9210 type. */
9211 if (TREE_TYPE (type) != 0)
9212 break;
9214 /* Carefully distinguish all the standard types of C,
9215 without messing up if the language is not C. We do this by
9216 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9217 look at both the names and the above fields, but that's redundant.
9218 Any type whose size is between two C types will be considered
9219 to be the wider of the two types. Also, we do not have a
9220 special code to use for "long long", so anything wider than
9221 long is treated the same. Note that we can't distinguish
9222 between "int" and "long" in this code if they are the same
9223 size, but that's fine, since neither can the assembler. */
9225 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9226 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9228 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9229 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9231 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9232 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9234 else
9235 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9237 case REAL_TYPE:
9238 /* If this is a range type, consider it to be the underlying
9239 type. */
9240 if (TREE_TYPE (type) != 0)
9241 break;
9243 /* Carefully distinguish all the standard types of C,
9244 without messing up if the language is not C. */
9246 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9247 return (qualifiers | 6);
9249 else
9250 return (qualifiers | 7);
9252 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9253 /* ??? We need to distinguish between double and float complex types,
9254 but I don't know how yet because I can't reach this code from
9255 existing front-ends. */
9256 return (qualifiers | 7); /* Who knows? */
9258 case VECTOR_TYPE:
9259 case BOOLEAN_TYPE: /* Boolean truth value type. */
9260 case LANG_TYPE:
9261 case NULLPTR_TYPE:
9262 return qualifiers;
9264 default:
9265 gcc_unreachable (); /* Not a type! */
9269 return qualifiers;
9272 /* Nested function support. */
9274 /* Emit RTL insns to initialize the variable parts of a trampoline.
9275 FNADDR is an RTX for the address of the function's pure code.
9276 CXT is an RTX for the static chain value for the function.
9278 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9279 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9280 (to store insns). This is a bit excessive. Perhaps a different
9281 mechanism would be better here.
9283 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9285 static void
9286 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9288 /* SPARC 32-bit trampoline:
9290 sethi %hi(fn), %g1
9291 sethi %hi(static), %g2
9292 jmp %g1+%lo(fn)
9293 or %g2, %lo(static), %g2
9295 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9296 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9299 emit_move_insn
9300 (adjust_address (m_tramp, SImode, 0),
9301 expand_binop (SImode, ior_optab,
9302 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9303 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9304 NULL_RTX, 1, OPTAB_DIRECT));
9306 emit_move_insn
9307 (adjust_address (m_tramp, SImode, 4),
9308 expand_binop (SImode, ior_optab,
9309 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9310 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9311 NULL_RTX, 1, OPTAB_DIRECT));
9313 emit_move_insn
9314 (adjust_address (m_tramp, SImode, 8),
9315 expand_binop (SImode, ior_optab,
9316 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9317 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9318 NULL_RTX, 1, OPTAB_DIRECT));
9320 emit_move_insn
9321 (adjust_address (m_tramp, SImode, 12),
9322 expand_binop (SImode, ior_optab,
9323 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9324 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9325 NULL_RTX, 1, OPTAB_DIRECT));
9327 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9328 aligned on a 16 byte boundary so one flush clears it all. */
9329 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9330 if (sparc_cpu != PROCESSOR_ULTRASPARC
9331 && sparc_cpu != PROCESSOR_ULTRASPARC3
9332 && sparc_cpu != PROCESSOR_NIAGARA
9333 && sparc_cpu != PROCESSOR_NIAGARA2
9334 && sparc_cpu != PROCESSOR_NIAGARA3
9335 && sparc_cpu != PROCESSOR_NIAGARA4)
9336 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9338 /* Call __enable_execute_stack after writing onto the stack to make sure
9339 the stack address is accessible. */
9340 #ifdef HAVE_ENABLE_EXECUTE_STACK
9341 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9342 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9343 #endif
9347 /* The 64-bit version is simpler because it makes more sense to load the
9348 values as "immediate" data out of the trampoline. It's also easier since
9349 we can read the PC without clobbering a register. */
9351 static void
9352 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9354 /* SPARC 64-bit trampoline:
9356 rd %pc, %g1
9357 ldx [%g1+24], %g5
9358 jmp %g5
9359 ldx [%g1+16], %g5
9360 +16 bytes data
9363 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9364 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9365 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9366 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9367 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9368 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9369 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9370 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9371 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9372 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9373 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9375 if (sparc_cpu != PROCESSOR_ULTRASPARC
9376 && sparc_cpu != PROCESSOR_ULTRASPARC3
9377 && sparc_cpu != PROCESSOR_NIAGARA
9378 && sparc_cpu != PROCESSOR_NIAGARA2
9379 && sparc_cpu != PROCESSOR_NIAGARA3
9380 && sparc_cpu != PROCESSOR_NIAGARA4)
9381 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9383 /* Call __enable_execute_stack after writing onto the stack to make sure
9384 the stack address is accessible. */
9385 #ifdef HAVE_ENABLE_EXECUTE_STACK
9386 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9387 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9388 #endif
9391 /* Worker for TARGET_TRAMPOLINE_INIT. */
9393 static void
9394 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9396 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9397 cxt = force_reg (Pmode, cxt);
9398 if (TARGET_ARCH64)
9399 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9400 else
9401 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9404 /* Adjust the cost of a scheduling dependency. Return the new cost of
9405 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9407 static int
9408 supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9410 enum attr_type insn_type;
9412 if (! recog_memoized (insn))
9413 return 0;
9415 insn_type = get_attr_type (insn);
9417 if (REG_NOTE_KIND (link) == 0)
9419 /* Data dependency; DEP_INSN writes a register that INSN reads some
9420 cycles later. */
9422 /* if a load, then the dependence must be on the memory address;
9423 add an extra "cycle". Note that the cost could be two cycles
9424 if the reg was written late in an instruction group; we ca not tell
9425 here. */
9426 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9427 return cost + 3;
9429 /* Get the delay only if the address of the store is the dependence. */
9430 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9432 rtx pat = PATTERN(insn);
9433 rtx dep_pat = PATTERN (dep_insn);
9435 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9436 return cost; /* This should not happen! */
9438 /* The dependency between the two instructions was on the data that
9439 is being stored. Assume that this implies that the address of the
9440 store is not dependent. */
9441 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9442 return cost;
9444 return cost + 3; /* An approximation. */
9447 /* A shift instruction cannot receive its data from an instruction
9448 in the same cycle; add a one cycle penalty. */
9449 if (insn_type == TYPE_SHIFT)
9450 return cost + 3; /* Split before cascade into shift. */
9452 else
9454 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9455 INSN writes some cycles later. */
9457 /* These are only significant for the fpu unit; writing a fp reg before
9458 the fpu has finished with it stalls the processor. */
9460 /* Reusing an integer register causes no problems. */
9461 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9462 return 0;
9465 return cost;
9468 static int
9469 hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9471 enum attr_type insn_type, dep_type;
9472 rtx pat = PATTERN(insn);
9473 rtx dep_pat = PATTERN (dep_insn);
9475 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9476 return cost;
9478 insn_type = get_attr_type (insn);
9479 dep_type = get_attr_type (dep_insn);
9481 switch (REG_NOTE_KIND (link))
9483 case 0:
9484 /* Data dependency; DEP_INSN writes a register that INSN reads some
9485 cycles later. */
9487 switch (insn_type)
9489 case TYPE_STORE:
9490 case TYPE_FPSTORE:
9491 /* Get the delay iff the address of the store is the dependence. */
9492 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9493 return cost;
9495 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9496 return cost;
9497 return cost + 3;
9499 case TYPE_LOAD:
9500 case TYPE_SLOAD:
9501 case TYPE_FPLOAD:
9502 /* If a load, then the dependence must be on the memory address. If
9503 the addresses aren't equal, then it might be a false dependency */
9504 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9506 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9507 || GET_CODE (SET_DEST (dep_pat)) != MEM
9508 || GET_CODE (SET_SRC (pat)) != MEM
9509 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9510 XEXP (SET_SRC (pat), 0)))
9511 return cost + 2;
9513 return cost + 8;
9515 break;
9517 case TYPE_BRANCH:
9518 /* Compare to branch latency is 0. There is no benefit from
9519 separating compare and branch. */
9520 if (dep_type == TYPE_COMPARE)
9521 return 0;
9522 /* Floating point compare to branch latency is less than
9523 compare to conditional move. */
9524 if (dep_type == TYPE_FPCMP)
9525 return cost - 1;
9526 break;
9527 default:
9528 break;
9530 break;
9532 case REG_DEP_ANTI:
9533 /* Anti-dependencies only penalize the fpu unit. */
9534 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9535 return 0;
9536 break;
9538 default:
9539 break;
9542 return cost;
9545 static int
9546 sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
9548 switch (sparc_cpu)
9550 case PROCESSOR_SUPERSPARC:
9551 cost = supersparc_adjust_cost (insn, link, dep, cost);
9552 break;
9553 case PROCESSOR_HYPERSPARC:
9554 case PROCESSOR_SPARCLITE86X:
9555 cost = hypersparc_adjust_cost (insn, link, dep, cost);
9556 break;
9557 default:
9558 break;
9560 return cost;
9563 static void
9564 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9565 int sched_verbose ATTRIBUTE_UNUSED,
9566 int max_ready ATTRIBUTE_UNUSED)
9569 static int
9570 sparc_use_sched_lookahead (void)
9572 if (sparc_cpu == PROCESSOR_NIAGARA
9573 || sparc_cpu == PROCESSOR_NIAGARA2
9574 || sparc_cpu == PROCESSOR_NIAGARA3)
9575 return 0;
9576 if (sparc_cpu == PROCESSOR_NIAGARA4)
9577 return 2;
9578 if (sparc_cpu == PROCESSOR_ULTRASPARC
9579 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9580 return 4;
9581 if ((1 << sparc_cpu) &
9582 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9583 (1 << PROCESSOR_SPARCLITE86X)))
9584 return 3;
9585 return 0;
9588 static int
9589 sparc_issue_rate (void)
9591 switch (sparc_cpu)
9593 case PROCESSOR_NIAGARA:
9594 case PROCESSOR_NIAGARA2:
9595 case PROCESSOR_NIAGARA3:
9596 default:
9597 return 1;
9598 case PROCESSOR_NIAGARA4:
9599 case PROCESSOR_V9:
9600 /* Assume V9 processors are capable of at least dual-issue. */
9601 return 2;
9602 case PROCESSOR_SUPERSPARC:
9603 return 3;
9604 case PROCESSOR_HYPERSPARC:
9605 case PROCESSOR_SPARCLITE86X:
9606 return 2;
9607 case PROCESSOR_ULTRASPARC:
9608 case PROCESSOR_ULTRASPARC3:
9609 return 4;
9613 static int
9614 set_extends (rtx insn)
9616 register rtx pat = PATTERN (insn);
9618 switch (GET_CODE (SET_SRC (pat)))
9620 /* Load and some shift instructions zero extend. */
9621 case MEM:
9622 case ZERO_EXTEND:
9623 /* sethi clears the high bits */
9624 case HIGH:
9625 /* LO_SUM is used with sethi. sethi cleared the high
9626 bits and the values used with lo_sum are positive */
9627 case LO_SUM:
9628 /* Store flag stores 0 or 1 */
9629 case LT: case LTU:
9630 case GT: case GTU:
9631 case LE: case LEU:
9632 case GE: case GEU:
9633 case EQ:
9634 case NE:
9635 return 1;
9636 case AND:
9638 rtx op0 = XEXP (SET_SRC (pat), 0);
9639 rtx op1 = XEXP (SET_SRC (pat), 1);
9640 if (GET_CODE (op1) == CONST_INT)
9641 return INTVAL (op1) >= 0;
9642 if (GET_CODE (op0) != REG)
9643 return 0;
9644 if (sparc_check_64 (op0, insn) == 1)
9645 return 1;
9646 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9648 case IOR:
9649 case XOR:
9651 rtx op0 = XEXP (SET_SRC (pat), 0);
9652 rtx op1 = XEXP (SET_SRC (pat), 1);
9653 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9654 return 0;
9655 if (GET_CODE (op1) == CONST_INT)
9656 return INTVAL (op1) >= 0;
9657 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9659 case LSHIFTRT:
9660 return GET_MODE (SET_SRC (pat)) == SImode;
9661 /* Positive integers leave the high bits zero. */
9662 case CONST_DOUBLE:
9663 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
9664 case CONST_INT:
9665 return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
9666 case ASHIFTRT:
9667 case SIGN_EXTEND:
9668 return - (GET_MODE (SET_SRC (pat)) == SImode);
9669 case REG:
9670 return sparc_check_64 (SET_SRC (pat), insn);
9671 default:
9672 return 0;
9676 /* We _ought_ to have only one kind per function, but... */
9677 static GTY(()) rtx sparc_addr_diff_list;
9678 static GTY(()) rtx sparc_addr_list;
9680 void
9681 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9683 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9684 if (diff)
9685 sparc_addr_diff_list
9686 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9687 else
9688 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9691 static void
9692 sparc_output_addr_vec (rtx vec)
9694 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9695 int idx, vlen = XVECLEN (body, 0);
9697 #ifdef ASM_OUTPUT_ADDR_VEC_START
9698 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9699 #endif
9701 #ifdef ASM_OUTPUT_CASE_LABEL
9702 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9703 NEXT_INSN (lab));
9704 #else
9705 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9706 #endif
9708 for (idx = 0; idx < vlen; idx++)
9710 ASM_OUTPUT_ADDR_VEC_ELT
9711 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9714 #ifdef ASM_OUTPUT_ADDR_VEC_END
9715 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9716 #endif
9719 static void
9720 sparc_output_addr_diff_vec (rtx vec)
9722 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9723 rtx base = XEXP (XEXP (body, 0), 0);
9724 int idx, vlen = XVECLEN (body, 1);
9726 #ifdef ASM_OUTPUT_ADDR_VEC_START
9727 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9728 #endif
9730 #ifdef ASM_OUTPUT_CASE_LABEL
9731 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9732 NEXT_INSN (lab));
9733 #else
9734 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9735 #endif
9737 for (idx = 0; idx < vlen; idx++)
9739 ASM_OUTPUT_ADDR_DIFF_ELT
9740 (asm_out_file,
9741 body,
9742 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9743 CODE_LABEL_NUMBER (base));
9746 #ifdef ASM_OUTPUT_ADDR_VEC_END
9747 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9748 #endif
9751 static void
9752 sparc_output_deferred_case_vectors (void)
9754 rtx t;
9755 int align;
9757 if (sparc_addr_list == NULL_RTX
9758 && sparc_addr_diff_list == NULL_RTX)
9759 return;
9761 /* Align to cache line in the function's code section. */
9762 switch_to_section (current_function_section ());
9764 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9765 if (align > 0)
9766 ASM_OUTPUT_ALIGN (asm_out_file, align);
9768 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9769 sparc_output_addr_vec (XEXP (t, 0));
9770 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9771 sparc_output_addr_diff_vec (XEXP (t, 0));
9773 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9776 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9777 unknown. Return 1 if the high bits are zero, -1 if the register is
9778 sign extended. */
9780 sparc_check_64 (rtx x, rtx insn)
9782 /* If a register is set only once it is safe to ignore insns this
9783 code does not know how to handle. The loop will either recognize
9784 the single set and return the correct value or fail to recognize
9785 it and return 0. */
9786 int set_once = 0;
9787 rtx y = x;
9789 gcc_assert (GET_CODE (x) == REG);
9791 if (GET_MODE (x) == DImode)
9792 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9794 if (flag_expensive_optimizations
9795 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9796 set_once = 1;
9798 if (insn == 0)
9800 if (set_once)
9801 insn = get_last_insn_anywhere ();
9802 else
9803 return 0;
9806 while ((insn = PREV_INSN (insn)))
9808 switch (GET_CODE (insn))
9810 case JUMP_INSN:
9811 case NOTE:
9812 break;
9813 case CODE_LABEL:
9814 case CALL_INSN:
9815 default:
9816 if (! set_once)
9817 return 0;
9818 break;
9819 case INSN:
9821 rtx pat = PATTERN (insn);
9822 if (GET_CODE (pat) != SET)
9823 return 0;
9824 if (rtx_equal_p (x, SET_DEST (pat)))
9825 return set_extends (insn);
9826 if (y && rtx_equal_p (y, SET_DEST (pat)))
9827 return set_extends (insn);
9828 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9829 return 0;
9833 return 0;
9836 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9837 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9839 const char *
9840 output_v8plus_shift (rtx insn, rtx *operands, const char *opcode)
9842 static char asm_code[60];
9844 /* The scratch register is only required when the destination
9845 register is not a 64-bit global or out register. */
9846 if (which_alternative != 2)
9847 operands[3] = operands[0];
9849 /* We can only shift by constants <= 63. */
9850 if (GET_CODE (operands[2]) == CONST_INT)
9851 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9853 if (GET_CODE (operands[1]) == CONST_INT)
9855 output_asm_insn ("mov\t%1, %3", operands);
9857 else
9859 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9860 if (sparc_check_64 (operands[1], insn) <= 0)
9861 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9862 output_asm_insn ("or\t%L1, %3, %3", operands);
9865 strcpy (asm_code, opcode);
9867 if (which_alternative != 2)
9868 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9869 else
9870 return
9871 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9874 /* Output rtl to increment the profiler label LABELNO
9875 for profiling a function entry. */
9877 void
9878 sparc_profile_hook (int labelno)
9880 char buf[32];
9881 rtx lab, fun;
9883 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9884 if (NO_PROFILE_COUNTERS)
9886 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9888 else
9890 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9891 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9892 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9896 #ifdef TARGET_SOLARIS
9897 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9899 static void
9900 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9901 tree decl ATTRIBUTE_UNUSED)
9903 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9905 solaris_elf_asm_comdat_section (name, flags, decl);
9906 return;
9909 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9911 if (!(flags & SECTION_DEBUG))
9912 fputs (",#alloc", asm_out_file);
9913 if (flags & SECTION_WRITE)
9914 fputs (",#write", asm_out_file);
9915 if (flags & SECTION_TLS)
9916 fputs (",#tls", asm_out_file);
9917 if (flags & SECTION_CODE)
9918 fputs (",#execinstr", asm_out_file);
9920 /* Sun as only supports #nobits/#progbits since Solaris 10. */
9921 if (HAVE_AS_SPARC_NOBITS)
9923 if (flags & SECTION_BSS)
9924 fputs (",#nobits", asm_out_file);
9925 else
9926 fputs (",#progbits", asm_out_file);
9929 fputc ('\n', asm_out_file);
9931 #endif /* TARGET_SOLARIS */
9933 /* We do not allow indirect calls to be optimized into sibling calls.
9935 We cannot use sibling calls when delayed branches are disabled
9936 because they will likely require the call delay slot to be filled.
9938 Also, on SPARC 32-bit we cannot emit a sibling call when the
9939 current function returns a structure. This is because the "unimp
9940 after call" convention would cause the callee to return to the
9941 wrong place. The generic code already disallows cases where the
9942 function being called returns a structure.
9944 It may seem strange how this last case could occur. Usually there
9945 is code after the call which jumps to epilogue code which dumps the
9946 return value into the struct return area. That ought to invalidate
9947 the sibling call right? Well, in the C++ case we can end up passing
9948 the pointer to the struct return area to a constructor (which returns
9949 void) and then nothing else happens. Such a sibling call would look
9950 valid without the added check here.
9952 VxWorks PIC PLT entries require the global pointer to be initialized
9953 on entry. We therefore can't emit sibling calls to them. */
9954 static bool
9955 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9957 return (decl
9958 && flag_delayed_branch
9959 && (TARGET_ARCH64 || ! cfun->returns_struct)
9960 && !(TARGET_VXWORKS_RTP
9961 && flag_pic
9962 && !targetm.binds_local_p (decl)));
9965 /* libfunc renaming. */
9967 static void
9968 sparc_init_libfuncs (void)
9970 if (TARGET_ARCH32)
9972 /* Use the subroutines that Sun's library provides for integer
9973 multiply and divide. The `*' prevents an underscore from
9974 being prepended by the compiler. .umul is a little faster
9975 than .mul. */
9976 set_optab_libfunc (smul_optab, SImode, "*.umul");
9977 set_optab_libfunc (sdiv_optab, SImode, "*.div");
9978 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
9979 set_optab_libfunc (smod_optab, SImode, "*.rem");
9980 set_optab_libfunc (umod_optab, SImode, "*.urem");
9982 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
9983 set_optab_libfunc (add_optab, TFmode, "_Q_add");
9984 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
9985 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
9986 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
9987 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
9989 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
9990 is because with soft-float, the SFmode and DFmode sqrt
9991 instructions will be absent, and the compiler will notice and
9992 try to use the TFmode sqrt instruction for calls to the
9993 builtin function sqrt, but this fails. */
9994 if (TARGET_FPU)
9995 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
9997 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
9998 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
9999 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10000 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10001 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10002 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10004 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10005 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10006 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10007 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10009 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10010 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10011 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10012 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10014 if (DITF_CONVERSION_LIBFUNCS)
10016 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10017 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10018 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10019 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10022 if (SUN_CONVERSION_LIBFUNCS)
10024 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10025 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10026 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10027 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10030 if (TARGET_ARCH64)
10032 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10033 do not exist in the library. Make sure the compiler does not
10034 emit calls to them by accident. (It should always use the
10035 hardware instructions.) */
10036 set_optab_libfunc (smul_optab, SImode, 0);
10037 set_optab_libfunc (sdiv_optab, SImode, 0);
10038 set_optab_libfunc (udiv_optab, SImode, 0);
10039 set_optab_libfunc (smod_optab, SImode, 0);
10040 set_optab_libfunc (umod_optab, SImode, 0);
10042 if (SUN_INTEGER_MULTIPLY_64)
10044 set_optab_libfunc (smul_optab, DImode, "__mul64");
10045 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10046 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10047 set_optab_libfunc (smod_optab, DImode, "__rem64");
10048 set_optab_libfunc (umod_optab, DImode, "__urem64");
10051 if (SUN_CONVERSION_LIBFUNCS)
10053 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10054 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10055 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10056 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10061 /* SPARC builtins. */
10062 enum sparc_builtins
10064 /* FPU builtins. */
10065 SPARC_BUILTIN_LDFSR,
10066 SPARC_BUILTIN_STFSR,
10068 /* VIS 1.0 builtins. */
10069 SPARC_BUILTIN_FPACK16,
10070 SPARC_BUILTIN_FPACK32,
10071 SPARC_BUILTIN_FPACKFIX,
10072 SPARC_BUILTIN_FEXPAND,
10073 SPARC_BUILTIN_FPMERGE,
10074 SPARC_BUILTIN_FMUL8X16,
10075 SPARC_BUILTIN_FMUL8X16AU,
10076 SPARC_BUILTIN_FMUL8X16AL,
10077 SPARC_BUILTIN_FMUL8SUX16,
10078 SPARC_BUILTIN_FMUL8ULX16,
10079 SPARC_BUILTIN_FMULD8SUX16,
10080 SPARC_BUILTIN_FMULD8ULX16,
10081 SPARC_BUILTIN_FALIGNDATAV4HI,
10082 SPARC_BUILTIN_FALIGNDATAV8QI,
10083 SPARC_BUILTIN_FALIGNDATAV2SI,
10084 SPARC_BUILTIN_FALIGNDATADI,
10085 SPARC_BUILTIN_WRGSR,
10086 SPARC_BUILTIN_RDGSR,
10087 SPARC_BUILTIN_ALIGNADDR,
10088 SPARC_BUILTIN_ALIGNADDRL,
10089 SPARC_BUILTIN_PDIST,
10090 SPARC_BUILTIN_EDGE8,
10091 SPARC_BUILTIN_EDGE8L,
10092 SPARC_BUILTIN_EDGE16,
10093 SPARC_BUILTIN_EDGE16L,
10094 SPARC_BUILTIN_EDGE32,
10095 SPARC_BUILTIN_EDGE32L,
10096 SPARC_BUILTIN_FCMPLE16,
10097 SPARC_BUILTIN_FCMPLE32,
10098 SPARC_BUILTIN_FCMPNE16,
10099 SPARC_BUILTIN_FCMPNE32,
10100 SPARC_BUILTIN_FCMPGT16,
10101 SPARC_BUILTIN_FCMPGT32,
10102 SPARC_BUILTIN_FCMPEQ16,
10103 SPARC_BUILTIN_FCMPEQ32,
10104 SPARC_BUILTIN_FPADD16,
10105 SPARC_BUILTIN_FPADD16S,
10106 SPARC_BUILTIN_FPADD32,
10107 SPARC_BUILTIN_FPADD32S,
10108 SPARC_BUILTIN_FPSUB16,
10109 SPARC_BUILTIN_FPSUB16S,
10110 SPARC_BUILTIN_FPSUB32,
10111 SPARC_BUILTIN_FPSUB32S,
10112 SPARC_BUILTIN_ARRAY8,
10113 SPARC_BUILTIN_ARRAY16,
10114 SPARC_BUILTIN_ARRAY32,
10116 /* VIS 2.0 builtins. */
10117 SPARC_BUILTIN_EDGE8N,
10118 SPARC_BUILTIN_EDGE8LN,
10119 SPARC_BUILTIN_EDGE16N,
10120 SPARC_BUILTIN_EDGE16LN,
10121 SPARC_BUILTIN_EDGE32N,
10122 SPARC_BUILTIN_EDGE32LN,
10123 SPARC_BUILTIN_BMASK,
10124 SPARC_BUILTIN_BSHUFFLEV4HI,
10125 SPARC_BUILTIN_BSHUFFLEV8QI,
10126 SPARC_BUILTIN_BSHUFFLEV2SI,
10127 SPARC_BUILTIN_BSHUFFLEDI,
10129 /* VIS 3.0 builtins. */
10130 SPARC_BUILTIN_CMASK8,
10131 SPARC_BUILTIN_CMASK16,
10132 SPARC_BUILTIN_CMASK32,
10133 SPARC_BUILTIN_FCHKSM16,
10134 SPARC_BUILTIN_FSLL16,
10135 SPARC_BUILTIN_FSLAS16,
10136 SPARC_BUILTIN_FSRL16,
10137 SPARC_BUILTIN_FSRA16,
10138 SPARC_BUILTIN_FSLL32,
10139 SPARC_BUILTIN_FSLAS32,
10140 SPARC_BUILTIN_FSRL32,
10141 SPARC_BUILTIN_FSRA32,
10142 SPARC_BUILTIN_PDISTN,
10143 SPARC_BUILTIN_FMEAN16,
10144 SPARC_BUILTIN_FPADD64,
10145 SPARC_BUILTIN_FPSUB64,
10146 SPARC_BUILTIN_FPADDS16,
10147 SPARC_BUILTIN_FPADDS16S,
10148 SPARC_BUILTIN_FPSUBS16,
10149 SPARC_BUILTIN_FPSUBS16S,
10150 SPARC_BUILTIN_FPADDS32,
10151 SPARC_BUILTIN_FPADDS32S,
10152 SPARC_BUILTIN_FPSUBS32,
10153 SPARC_BUILTIN_FPSUBS32S,
10154 SPARC_BUILTIN_FUCMPLE8,
10155 SPARC_BUILTIN_FUCMPNE8,
10156 SPARC_BUILTIN_FUCMPGT8,
10157 SPARC_BUILTIN_FUCMPEQ8,
10158 SPARC_BUILTIN_FHADDS,
10159 SPARC_BUILTIN_FHADDD,
10160 SPARC_BUILTIN_FHSUBS,
10161 SPARC_BUILTIN_FHSUBD,
10162 SPARC_BUILTIN_FNHADDS,
10163 SPARC_BUILTIN_FNHADDD,
10164 SPARC_BUILTIN_UMULXHI,
10165 SPARC_BUILTIN_XMULX,
10166 SPARC_BUILTIN_XMULXHI,
10168 SPARC_BUILTIN_MAX
10171 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10172 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10174 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10175 function decl or NULL_TREE if the builtin was not added. */
10177 static tree
10178 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10179 tree type)
10181 tree t
10182 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10184 if (t)
10186 sparc_builtins[code] = t;
10187 sparc_builtins_icode[code] = icode;
10190 return t;
10193 /* Likewise, but also marks the function as "const". */
10195 static tree
10196 def_builtin_const (const char *name, enum insn_code icode,
10197 enum sparc_builtins code, tree type)
10199 tree t = def_builtin (name, icode, code, type);
10201 if (t)
10202 TREE_READONLY (t) = 1;
10204 return t;
10207 /* Implement the TARGET_INIT_BUILTINS target hook.
10208 Create builtin functions for special SPARC instructions. */
10210 static void
10211 sparc_init_builtins (void)
10213 if (TARGET_FPU)
10214 sparc_fpu_init_builtins ();
10216 if (TARGET_VIS)
10217 sparc_vis_init_builtins ();
10220 /* Create builtin functions for FPU instructions. */
10222 static void
10223 sparc_fpu_init_builtins (void)
10225 tree ftype
10226 = build_function_type_list (void_type_node,
10227 build_pointer_type (unsigned_type_node), 0);
10228 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10229 SPARC_BUILTIN_LDFSR, ftype);
10230 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10231 SPARC_BUILTIN_STFSR, ftype);
10234 /* Create builtin functions for VIS instructions. */
10236 static void
10237 sparc_vis_init_builtins (void)
10239 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10240 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10241 tree v4hi = build_vector_type (intHI_type_node, 4);
10242 tree v2hi = build_vector_type (intHI_type_node, 2);
10243 tree v2si = build_vector_type (intSI_type_node, 2);
10244 tree v1si = build_vector_type (intSI_type_node, 1);
10246 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10247 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10248 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10249 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10250 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10251 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10252 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10253 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10254 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10255 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10256 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10257 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10258 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10259 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10260 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10261 v8qi, v8qi,
10262 intDI_type_node, 0);
10263 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10264 v8qi, v8qi, 0);
10265 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10266 v8qi, v8qi, 0);
10267 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10268 intDI_type_node,
10269 intDI_type_node, 0);
10270 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10271 intSI_type_node,
10272 intSI_type_node, 0);
10273 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10274 ptr_type_node,
10275 intSI_type_node, 0);
10276 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10277 ptr_type_node,
10278 intDI_type_node, 0);
10279 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10280 ptr_type_node,
10281 ptr_type_node, 0);
10282 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10283 ptr_type_node,
10284 ptr_type_node, 0);
10285 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10286 v4hi, v4hi, 0);
10287 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10288 v2si, v2si, 0);
10289 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10290 v4hi, v4hi, 0);
10291 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10292 v2si, v2si, 0);
10293 tree void_ftype_di = build_function_type_list (void_type_node,
10294 intDI_type_node, 0);
10295 tree di_ftype_void = build_function_type_list (intDI_type_node,
10296 void_type_node, 0);
10297 tree void_ftype_si = build_function_type_list (void_type_node,
10298 intSI_type_node, 0);
10299 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10300 float_type_node,
10301 float_type_node, 0);
10302 tree df_ftype_df_df = build_function_type_list (double_type_node,
10303 double_type_node,
10304 double_type_node, 0);
10306 /* Packing and expanding vectors. */
10307 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10308 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10309 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10310 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10311 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10312 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10313 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10314 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10315 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10316 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10318 /* Multiplications. */
10319 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10320 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10321 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10322 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10323 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10324 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10325 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10326 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10327 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10328 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10329 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10330 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10331 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10332 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10334 /* Data aligning. */
10335 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10336 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10337 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10338 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10339 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10340 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10341 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10342 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10344 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10345 SPARC_BUILTIN_WRGSR, void_ftype_di);
10346 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10347 SPARC_BUILTIN_RDGSR, di_ftype_void);
10349 if (TARGET_ARCH64)
10351 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10352 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10353 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10354 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10356 else
10358 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10359 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10360 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10361 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10364 /* Pixel distance. */
10365 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10366 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10368 /* Edge handling. */
10369 if (TARGET_ARCH64)
10371 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10372 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10373 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10374 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10375 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10376 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10377 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10378 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10379 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10380 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10381 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10382 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10384 else
10386 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10387 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10388 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10389 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10390 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10391 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10392 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10393 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10394 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10395 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10396 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10397 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10400 /* Pixel compare. */
10401 if (TARGET_ARCH64)
10403 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10404 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10405 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10406 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10407 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10408 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10409 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10410 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10411 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10412 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10413 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10414 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10415 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10416 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10417 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10418 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10420 else
10422 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10423 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10424 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10425 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10426 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10427 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10428 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10429 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10430 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10431 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10432 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10433 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10434 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10435 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10436 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10437 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10440 /* Addition and subtraction. */
10441 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10442 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10443 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10444 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10445 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10446 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10447 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10448 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10449 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10450 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10451 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10452 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10453 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10454 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10455 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10456 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10458 /* Three-dimensional array addressing. */
10459 if (TARGET_ARCH64)
10461 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10462 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10463 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10464 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10465 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10466 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10468 else
10470 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10471 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10472 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10473 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10474 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10475 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10478 if (TARGET_VIS2)
10480 /* Edge handling. */
10481 if (TARGET_ARCH64)
10483 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10484 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10485 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10486 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10487 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10488 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10489 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10490 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10491 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10492 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10493 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10494 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10496 else
10498 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10499 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10500 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10501 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10502 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10503 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10504 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10505 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10506 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10507 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10508 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10509 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10512 /* Byte mask and shuffle. */
10513 if (TARGET_ARCH64)
10514 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10515 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10516 else
10517 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10518 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10519 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10520 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10521 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10522 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10523 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10524 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10525 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10526 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10529 if (TARGET_VIS3)
10531 if (TARGET_ARCH64)
10533 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10534 SPARC_BUILTIN_CMASK8, void_ftype_di);
10535 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10536 SPARC_BUILTIN_CMASK16, void_ftype_di);
10537 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10538 SPARC_BUILTIN_CMASK32, void_ftype_di);
10540 else
10542 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10543 SPARC_BUILTIN_CMASK8, void_ftype_si);
10544 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10545 SPARC_BUILTIN_CMASK16, void_ftype_si);
10546 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10547 SPARC_BUILTIN_CMASK32, void_ftype_si);
10550 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10551 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
10553 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10554 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
10555 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10556 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
10557 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10558 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
10559 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10560 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
10561 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10562 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
10563 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10564 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
10565 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10566 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
10567 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10568 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
10570 if (TARGET_ARCH64)
10571 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10572 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
10573 else
10574 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10575 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
10577 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10578 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
10579 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10580 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
10581 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10582 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
10584 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10585 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
10586 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10587 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
10588 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10589 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
10590 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10591 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
10592 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10593 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
10594 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10595 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
10596 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10597 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
10598 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10599 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
10601 if (TARGET_ARCH64)
10603 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10604 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
10605 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10606 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
10607 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10608 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
10609 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10610 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
10612 else
10614 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10615 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
10616 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10617 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
10618 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10619 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
10620 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10621 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
10624 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10625 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
10626 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10627 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
10628 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10629 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
10630 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10631 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
10632 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10633 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
10634 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10635 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
10637 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10638 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
10639 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10640 SPARC_BUILTIN_XMULX, di_ftype_di_di);
10641 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10642 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
10646 /* Implement TARGET_BUILTIN_DECL hook. */
10648 static tree
10649 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10651 if (code >= SPARC_BUILTIN_MAX)
10652 return error_mark_node;
10654 return sparc_builtins[code];
10657 /* Implemented TARGET_EXPAND_BUILTIN hook. */
10659 static rtx
10660 sparc_expand_builtin (tree exp, rtx target,
10661 rtx subtarget ATTRIBUTE_UNUSED,
10662 enum machine_mode tmode ATTRIBUTE_UNUSED,
10663 int ignore ATTRIBUTE_UNUSED)
10665 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10666 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10667 enum insn_code icode = sparc_builtins_icode[code];
10668 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10669 call_expr_arg_iterator iter;
10670 int arg_count = 0;
10671 rtx pat, op[4];
10672 tree arg;
10674 if (nonvoid)
10676 enum machine_mode tmode = insn_data[icode].operand[0].mode;
10677 if (!target
10678 || GET_MODE (target) != tmode
10679 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10680 op[0] = gen_reg_rtx (tmode);
10681 else
10682 op[0] = target;
10685 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10687 const struct insn_operand_data *insn_op;
10688 int idx;
10690 if (arg == error_mark_node)
10691 return NULL_RTX;
10693 arg_count++;
10694 idx = arg_count - !nonvoid;
10695 insn_op = &insn_data[icode].operand[idx];
10696 op[arg_count] = expand_normal (arg);
10698 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
10700 if (!address_operand (op[arg_count], SImode))
10702 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
10703 op[arg_count] = copy_addr_to_reg (op[arg_count]);
10705 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
10708 else if (insn_op->mode == V1DImode
10709 && GET_MODE (op[arg_count]) == DImode)
10710 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10712 else if (insn_op->mode == V1SImode
10713 && GET_MODE (op[arg_count]) == SImode)
10714 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10716 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10717 insn_op->mode))
10718 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10721 switch (arg_count)
10723 case 0:
10724 pat = GEN_FCN (icode) (op[0]);
10725 break;
10726 case 1:
10727 if (nonvoid)
10728 pat = GEN_FCN (icode) (op[0], op[1]);
10729 else
10730 pat = GEN_FCN (icode) (op[1]);
10731 break;
10732 case 2:
10733 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10734 break;
10735 case 3:
10736 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10737 break;
10738 default:
10739 gcc_unreachable ();
10742 if (!pat)
10743 return NULL_RTX;
10745 emit_insn (pat);
10747 return (nonvoid ? op[0] : const0_rtx);
10750 /* Return the upper 16 bits of the 8x16 multiplication. */
10752 static int
10753 sparc_vis_mul8x16 (int e8, int e16)
10755 return (e8 * e16 + 128) / 256;
10758 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10759 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10761 static void
10762 sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode,
10763 tree inner_type, tree cst0, tree cst1)
10765 unsigned i, num = VECTOR_CST_NELTS (cst0);
10766 int scale;
10768 switch (fncode)
10770 case SPARC_BUILTIN_FMUL8X16:
10771 for (i = 0; i < num; ++i)
10773 int val
10774 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10775 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10776 n_elts[i] = build_int_cst (inner_type, val);
10778 break;
10780 case SPARC_BUILTIN_FMUL8X16AU:
10781 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10783 for (i = 0; i < num; ++i)
10785 int val
10786 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10787 scale);
10788 n_elts[i] = build_int_cst (inner_type, val);
10790 break;
10792 case SPARC_BUILTIN_FMUL8X16AL:
10793 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10795 for (i = 0; i < num; ++i)
10797 int val
10798 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10799 scale);
10800 n_elts[i] = build_int_cst (inner_type, val);
10802 break;
10804 default:
10805 gcc_unreachable ();
10809 /* Implement TARGET_FOLD_BUILTIN hook.
10811 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
10812 result of the function call is ignored. NULL_TREE is returned if the
10813 function could not be folded. */
10815 static tree
10816 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10817 tree *args, bool ignore)
10819 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10820 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10821 tree arg0, arg1, arg2;
10823 if (ignore)
10824 switch (code)
10826 case SPARC_BUILTIN_LDFSR:
10827 case SPARC_BUILTIN_STFSR:
10828 case SPARC_BUILTIN_ALIGNADDR:
10829 case SPARC_BUILTIN_WRGSR:
10830 case SPARC_BUILTIN_BMASK:
10831 case SPARC_BUILTIN_CMASK8:
10832 case SPARC_BUILTIN_CMASK16:
10833 case SPARC_BUILTIN_CMASK32:
10834 break;
10836 default:
10837 return build_zero_cst (rtype);
10840 switch (code)
10842 case SPARC_BUILTIN_FEXPAND:
10843 arg0 = args[0];
10844 STRIP_NOPS (arg0);
10846 if (TREE_CODE (arg0) == VECTOR_CST)
10848 tree inner_type = TREE_TYPE (rtype);
10849 tree *n_elts;
10850 unsigned i;
10852 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10853 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10854 n_elts[i] = build_int_cst (inner_type,
10855 TREE_INT_CST_LOW
10856 (VECTOR_CST_ELT (arg0, i)) << 4);
10857 return build_vector (rtype, n_elts);
10859 break;
10861 case SPARC_BUILTIN_FMUL8X16:
10862 case SPARC_BUILTIN_FMUL8X16AU:
10863 case SPARC_BUILTIN_FMUL8X16AL:
10864 arg0 = args[0];
10865 arg1 = args[1];
10866 STRIP_NOPS (arg0);
10867 STRIP_NOPS (arg1);
10869 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10871 tree inner_type = TREE_TYPE (rtype);
10872 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10873 sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1);
10874 return build_vector (rtype, n_elts);
10876 break;
10878 case SPARC_BUILTIN_FPMERGE:
10879 arg0 = args[0];
10880 arg1 = args[1];
10881 STRIP_NOPS (arg0);
10882 STRIP_NOPS (arg1);
10884 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10886 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
10887 unsigned i;
10888 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10890 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
10891 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
10894 return build_vector (rtype, n_elts);
10896 break;
10898 case SPARC_BUILTIN_PDIST:
10899 case SPARC_BUILTIN_PDISTN:
10900 arg0 = args[0];
10901 arg1 = args[1];
10902 STRIP_NOPS (arg0);
10903 STRIP_NOPS (arg1);
10904 if (code == SPARC_BUILTIN_PDIST)
10906 arg2 = args[2];
10907 STRIP_NOPS (arg2);
10909 else
10910 arg2 = integer_zero_node;
10912 if (TREE_CODE (arg0) == VECTOR_CST
10913 && TREE_CODE (arg1) == VECTOR_CST
10914 && TREE_CODE (arg2) == INTEGER_CST)
10916 bool overflow = false;
10917 double_int result = TREE_INT_CST (arg2);
10918 double_int tmp;
10919 unsigned i;
10921 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10923 double_int e0 = TREE_INT_CST (VECTOR_CST_ELT (arg0, i));
10924 double_int e1 = TREE_INT_CST (VECTOR_CST_ELT (arg1, i));
10926 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
10928 tmp = e1.neg_with_overflow (&neg1_ovf);
10929 tmp = e0.add_with_sign (tmp, false, &add1_ovf);
10930 if (tmp.is_negative ())
10931 tmp = tmp.neg_with_overflow (&neg2_ovf);
10932 else
10933 neg2_ovf = false;
10934 result = result.add_with_sign (tmp, false, &add2_ovf);
10935 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
10938 gcc_assert (!overflow);
10940 return build_int_cst_wide (rtype, result.low, result.high);
10943 default:
10944 break;
10947 return NULL_TREE;
10950 /* ??? This duplicates information provided to the compiler by the
10951 ??? scheduler description. Some day, teach genautomata to output
10952 ??? the latencies and then CSE will just use that. */
10954 static bool
10955 sparc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10956 int *total, bool speed ATTRIBUTE_UNUSED)
10958 enum machine_mode mode = GET_MODE (x);
10959 bool float_mode_p = FLOAT_MODE_P (mode);
10961 switch (code)
10963 case CONST_INT:
10964 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
10966 *total = 0;
10967 return true;
10969 /* FALLTHRU */
10971 case HIGH:
10972 *total = 2;
10973 return true;
10975 case CONST:
10976 case LABEL_REF:
10977 case SYMBOL_REF:
10978 *total = 4;
10979 return true;
10981 case CONST_DOUBLE:
10982 if (GET_MODE (x) == VOIDmode
10983 && ((CONST_DOUBLE_HIGH (x) == 0
10984 && CONST_DOUBLE_LOW (x) < 0x1000)
10985 || (CONST_DOUBLE_HIGH (x) == -1
10986 && CONST_DOUBLE_LOW (x) < 0
10987 && CONST_DOUBLE_LOW (x) >= -0x1000)))
10988 *total = 0;
10989 else
10990 *total = 8;
10991 return true;
10993 case MEM:
10994 /* If outer-code was a sign or zero extension, a cost
10995 of COSTS_N_INSNS (1) was already added in. This is
10996 why we are subtracting it back out. */
10997 if (outer_code == ZERO_EXTEND)
10999 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11001 else if (outer_code == SIGN_EXTEND)
11003 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11005 else if (float_mode_p)
11007 *total = sparc_costs->float_load;
11009 else
11011 *total = sparc_costs->int_load;
11014 return true;
11016 case PLUS:
11017 case MINUS:
11018 if (float_mode_p)
11019 *total = sparc_costs->float_plusminus;
11020 else
11021 *total = COSTS_N_INSNS (1);
11022 return false;
11024 case FMA:
11026 rtx sub;
11028 gcc_assert (float_mode_p);
11029 *total = sparc_costs->float_mul;
11031 sub = XEXP (x, 0);
11032 if (GET_CODE (sub) == NEG)
11033 sub = XEXP (sub, 0);
11034 *total += rtx_cost (sub, FMA, 0, speed);
11036 sub = XEXP (x, 2);
11037 if (GET_CODE (sub) == NEG)
11038 sub = XEXP (sub, 0);
11039 *total += rtx_cost (sub, FMA, 2, speed);
11040 return true;
11043 case MULT:
11044 if (float_mode_p)
11045 *total = sparc_costs->float_mul;
11046 else if (! TARGET_HARD_MUL)
11047 *total = COSTS_N_INSNS (25);
11048 else
11050 int bit_cost;
11052 bit_cost = 0;
11053 if (sparc_costs->int_mul_bit_factor)
11055 int nbits;
11057 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11059 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11060 for (nbits = 0; value != 0; value &= value - 1)
11061 nbits++;
11063 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
11064 && GET_MODE (XEXP (x, 1)) == VOIDmode)
11066 rtx x1 = XEXP (x, 1);
11067 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
11068 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
11070 for (nbits = 0; value1 != 0; value1 &= value1 - 1)
11071 nbits++;
11072 for (; value2 != 0; value2 &= value2 - 1)
11073 nbits++;
11075 else
11076 nbits = 7;
11078 if (nbits < 3)
11079 nbits = 3;
11080 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11081 bit_cost = COSTS_N_INSNS (bit_cost);
11084 if (mode == DImode)
11085 *total = sparc_costs->int_mulX + bit_cost;
11086 else
11087 *total = sparc_costs->int_mul + bit_cost;
11089 return false;
11091 case ASHIFT:
11092 case ASHIFTRT:
11093 case LSHIFTRT:
11094 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11095 return false;
11097 case DIV:
11098 case UDIV:
11099 case MOD:
11100 case UMOD:
11101 if (float_mode_p)
11103 if (mode == DFmode)
11104 *total = sparc_costs->float_div_df;
11105 else
11106 *total = sparc_costs->float_div_sf;
11108 else
11110 if (mode == DImode)
11111 *total = sparc_costs->int_divX;
11112 else
11113 *total = sparc_costs->int_div;
11115 return false;
11117 case NEG:
11118 if (! float_mode_p)
11120 *total = COSTS_N_INSNS (1);
11121 return false;
11123 /* FALLTHRU */
11125 case ABS:
11126 case FLOAT:
11127 case UNSIGNED_FLOAT:
11128 case FIX:
11129 case UNSIGNED_FIX:
11130 case FLOAT_EXTEND:
11131 case FLOAT_TRUNCATE:
11132 *total = sparc_costs->float_move;
11133 return false;
11135 case SQRT:
11136 if (mode == DFmode)
11137 *total = sparc_costs->float_sqrt_df;
11138 else
11139 *total = sparc_costs->float_sqrt_sf;
11140 return false;
11142 case COMPARE:
11143 if (float_mode_p)
11144 *total = sparc_costs->float_cmp;
11145 else
11146 *total = COSTS_N_INSNS (1);
11147 return false;
11149 case IF_THEN_ELSE:
11150 if (float_mode_p)
11151 *total = sparc_costs->float_cmove;
11152 else
11153 *total = sparc_costs->int_cmove;
11154 return false;
11156 case IOR:
11157 /* Handle the NAND vector patterns. */
11158 if (sparc_vector_mode_supported_p (GET_MODE (x))
11159 && GET_CODE (XEXP (x, 0)) == NOT
11160 && GET_CODE (XEXP (x, 1)) == NOT)
11162 *total = COSTS_N_INSNS (1);
11163 return true;
11165 else
11166 return false;
11168 default:
11169 return false;
11173 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11175 static inline bool
11176 general_or_i64_p (reg_class_t rclass)
11178 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11181 /* Implement TARGET_REGISTER_MOVE_COST. */
11183 static int
11184 sparc_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
11185 reg_class_t from, reg_class_t to)
11187 bool need_memory = false;
11189 if (from == FPCC_REGS || to == FPCC_REGS)
11190 need_memory = true;
11191 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11192 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11194 if (TARGET_VIS3)
11196 int size = GET_MODE_SIZE (mode);
11197 if (size == 8 || size == 4)
11199 if (! TARGET_ARCH32 || size == 4)
11200 return 4;
11201 else
11202 return 6;
11205 need_memory = true;
11208 if (need_memory)
11210 if (sparc_cpu == PROCESSOR_ULTRASPARC
11211 || sparc_cpu == PROCESSOR_ULTRASPARC3
11212 || sparc_cpu == PROCESSOR_NIAGARA
11213 || sparc_cpu == PROCESSOR_NIAGARA2
11214 || sparc_cpu == PROCESSOR_NIAGARA3
11215 || sparc_cpu == PROCESSOR_NIAGARA4)
11216 return 12;
11218 return 6;
11221 return 2;
11224 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11225 This is achieved by means of a manual dynamic stack space allocation in
11226 the current frame. We make the assumption that SEQ doesn't contain any
11227 function calls, with the possible exception of calls to the GOT helper. */
11229 static void
11230 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11232 /* We must preserve the lowest 16 words for the register save area. */
11233 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11234 /* We really need only 2 words of fresh stack space. */
11235 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11237 rtx slot
11238 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11239 SPARC_STACK_BIAS + offset));
11241 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11242 emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
11243 if (reg2)
11244 emit_insn (gen_rtx_SET (VOIDmode,
11245 adjust_address (slot, word_mode, UNITS_PER_WORD),
11246 reg2));
11247 emit_insn (seq);
11248 if (reg2)
11249 emit_insn (gen_rtx_SET (VOIDmode,
11250 reg2,
11251 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11252 emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
11253 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11256 /* Output the assembler code for a thunk function. THUNK_DECL is the
11257 declaration for the thunk function itself, FUNCTION is the decl for
11258 the target function. DELTA is an immediate constant offset to be
11259 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11260 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11262 static void
11263 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11264 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11265 tree function)
11267 rtx this_rtx, insn, funexp;
11268 unsigned int int_arg_first;
11270 reload_completed = 1;
11271 epilogue_completed = 1;
11273 emit_note (NOTE_INSN_PROLOGUE_END);
11275 if (TARGET_FLAT)
11277 sparc_leaf_function_p = 1;
11279 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11281 else if (flag_delayed_branch)
11283 /* We will emit a regular sibcall below, so we need to instruct
11284 output_sibcall that we are in a leaf function. */
11285 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11287 /* This will cause final.c to invoke leaf_renumber_regs so we
11288 must behave as if we were in a not-yet-leafified function. */
11289 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11291 else
11293 /* We will emit the sibcall manually below, so we will need to
11294 manually spill non-leaf registers. */
11295 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11297 /* We really are in a leaf function. */
11298 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11301 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11302 returns a structure, the structure return pointer is there instead. */
11303 if (TARGET_ARCH64
11304 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11305 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11306 else
11307 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11309 /* Add DELTA. When possible use a plain add, otherwise load it into
11310 a register first. */
11311 if (delta)
11313 rtx delta_rtx = GEN_INT (delta);
11315 if (! SPARC_SIMM13_P (delta))
11317 rtx scratch = gen_rtx_REG (Pmode, 1);
11318 emit_move_insn (scratch, delta_rtx);
11319 delta_rtx = scratch;
11322 /* THIS_RTX += DELTA. */
11323 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11326 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
11327 if (vcall_offset)
11329 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11330 rtx scratch = gen_rtx_REG (Pmode, 1);
11332 gcc_assert (vcall_offset < 0);
11334 /* SCRATCH = *THIS_RTX. */
11335 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11337 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11338 may not have any available scratch register at this point. */
11339 if (SPARC_SIMM13_P (vcall_offset))
11341 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11342 else if (! fixed_regs[5]
11343 /* The below sequence is made up of at least 2 insns,
11344 while the default method may need only one. */
11345 && vcall_offset < -8192)
11347 rtx scratch2 = gen_rtx_REG (Pmode, 5);
11348 emit_move_insn (scratch2, vcall_offset_rtx);
11349 vcall_offset_rtx = scratch2;
11351 else
11353 rtx increment = GEN_INT (-4096);
11355 /* VCALL_OFFSET is a negative number whose typical range can be
11356 estimated as -32768..0 in 32-bit mode. In almost all cases
11357 it is therefore cheaper to emit multiple add insns than
11358 spilling and loading the constant into a register (at least
11359 6 insns). */
11360 while (! SPARC_SIMM13_P (vcall_offset))
11362 emit_insn (gen_add2_insn (scratch, increment));
11363 vcall_offset += 4096;
11365 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11368 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
11369 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11370 gen_rtx_PLUS (Pmode,
11371 scratch,
11372 vcall_offset_rtx)));
11374 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
11375 emit_insn (gen_add2_insn (this_rtx, scratch));
11378 /* Generate a tail call to the target function. */
11379 if (! TREE_USED (function))
11381 assemble_external (function);
11382 TREE_USED (function) = 1;
11384 funexp = XEXP (DECL_RTL (function), 0);
11386 if (flag_delayed_branch)
11388 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11389 insn = emit_call_insn (gen_sibcall (funexp));
11390 SIBLING_CALL_P (insn) = 1;
11392 else
11394 /* The hoops we have to jump through in order to generate a sibcall
11395 without using delay slots... */
11396 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11398 if (flag_pic)
11400 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
11401 start_sequence ();
11402 load_got_register (); /* clobbers %o7 */
11403 scratch = sparc_legitimize_pic_address (funexp, scratch);
11404 seq = get_insns ();
11405 end_sequence ();
11406 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11408 else if (TARGET_ARCH32)
11410 emit_insn (gen_rtx_SET (VOIDmode,
11411 scratch,
11412 gen_rtx_HIGH (SImode, funexp)));
11413 emit_insn (gen_rtx_SET (VOIDmode,
11414 scratch,
11415 gen_rtx_LO_SUM (SImode, scratch, funexp)));
11417 else /* TARGET_ARCH64 */
11419 switch (sparc_cmodel)
11421 case CM_MEDLOW:
11422 case CM_MEDMID:
11423 /* The destination can serve as a temporary. */
11424 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11425 break;
11427 case CM_MEDANY:
11428 case CM_EMBMEDANY:
11429 /* The destination cannot serve as a temporary. */
11430 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11431 start_sequence ();
11432 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11433 seq = get_insns ();
11434 end_sequence ();
11435 emit_and_preserve (seq, spill_reg, 0);
11436 break;
11438 default:
11439 gcc_unreachable ();
11443 emit_jump_insn (gen_indirect_jump (scratch));
11446 emit_barrier ();
11448 /* Run just enough of rest_of_compilation to get the insns emitted.
11449 There's not really enough bulk here to make other passes such as
11450 instruction scheduling worth while. Note that use_thunk calls
11451 assemble_start_function and assemble_end_function. */
11452 insn = get_insns ();
11453 shorten_branches (insn);
11454 final_start_function (insn, file, 1);
11455 final (insn, file, 1);
11456 final_end_function ();
11458 reload_completed = 0;
11459 epilogue_completed = 0;
11462 /* Return true if sparc_output_mi_thunk would be able to output the
11463 assembler code for the thunk function specified by the arguments
11464 it is passed, and false otherwise. */
11465 static bool
11466 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11467 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11468 HOST_WIDE_INT vcall_offset,
11469 const_tree function ATTRIBUTE_UNUSED)
11471 /* Bound the loop used in the default method above. */
11472 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11475 /* How to allocate a 'struct machine_function'. */
11477 static struct machine_function *
11478 sparc_init_machine_status (void)
11480 return ggc_alloc_cleared_machine_function ();
11483 /* Locate some local-dynamic symbol still in use by this function
11484 so that we can print its name in local-dynamic base patterns. */
11486 static const char *
11487 get_some_local_dynamic_name (void)
11489 rtx insn;
11491 if (cfun->machine->some_ld_name)
11492 return cfun->machine->some_ld_name;
11494 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11495 if (INSN_P (insn)
11496 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11497 return cfun->machine->some_ld_name;
11499 gcc_unreachable ();
11502 static int
11503 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11505 rtx x = *px;
11507 if (x
11508 && GET_CODE (x) == SYMBOL_REF
11509 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11511 cfun->machine->some_ld_name = XSTR (x, 0);
11512 return 1;
11515 return 0;
11518 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11519 We need to emit DTP-relative relocations. */
11521 static void
11522 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11524 switch (size)
11526 case 4:
11527 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11528 break;
11529 case 8:
11530 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11531 break;
11532 default:
11533 gcc_unreachable ();
11535 output_addr_const (file, x);
11536 fputs (")", file);
11539 /* Do whatever processing is required at the end of a file. */
11541 static void
11542 sparc_file_end (void)
11544 /* If we need to emit the special GOT helper function, do so now. */
11545 if (got_helper_rtx)
11547 const char *name = XSTR (got_helper_rtx, 0);
11548 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11549 #ifdef DWARF2_UNWIND_INFO
11550 bool do_cfi;
11551 #endif
11553 if (USE_HIDDEN_LINKONCE)
11555 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11556 get_identifier (name),
11557 build_function_type_list (void_type_node,
11558 NULL_TREE));
11559 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11560 NULL_TREE, void_type_node);
11561 TREE_PUBLIC (decl) = 1;
11562 TREE_STATIC (decl) = 1;
11563 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11564 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11565 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11566 resolve_unique_section (decl, 0, flag_function_sections);
11567 allocate_struct_function (decl, true);
11568 cfun->is_thunk = 1;
11569 current_function_decl = decl;
11570 init_varasm_status ();
11571 assemble_start_function (decl, name);
11573 else
11575 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11576 switch_to_section (text_section);
11577 if (align > 0)
11578 ASM_OUTPUT_ALIGN (asm_out_file, align);
11579 ASM_OUTPUT_LABEL (asm_out_file, name);
11582 #ifdef DWARF2_UNWIND_INFO
11583 do_cfi = dwarf2out_do_cfi_asm ();
11584 if (do_cfi)
11585 fprintf (asm_out_file, "\t.cfi_startproc\n");
11586 #endif
11587 if (flag_delayed_branch)
11588 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11589 reg_name, reg_name);
11590 else
11591 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11592 reg_name, reg_name);
11593 #ifdef DWARF2_UNWIND_INFO
11594 if (do_cfi)
11595 fprintf (asm_out_file, "\t.cfi_endproc\n");
11596 #endif
11599 if (NEED_INDICATE_EXEC_STACK)
11600 file_end_indicate_exec_stack ();
11602 #ifdef TARGET_SOLARIS
11603 solaris_file_end ();
11604 #endif
11607 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11608 /* Implement TARGET_MANGLE_TYPE. */
11610 static const char *
11611 sparc_mangle_type (const_tree type)
11613 if (!TARGET_64BIT
11614 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11615 && TARGET_LONG_DOUBLE_128)
11616 return "g";
11618 /* For all other types, use normal C++ mangling. */
11619 return NULL;
11621 #endif
11623 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11624 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11625 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11627 void
11628 sparc_emit_membar_for_model (enum memmodel model,
11629 int load_store, int before_after)
11631 /* Bits for the MEMBAR mmask field. */
11632 const int LoadLoad = 1;
11633 const int StoreLoad = 2;
11634 const int LoadStore = 4;
11635 const int StoreStore = 8;
11637 int mm = 0, implied = 0;
11639 switch (sparc_memory_model)
11641 case SMM_SC:
11642 /* Sequential Consistency. All memory transactions are immediately
11643 visible in sequential execution order. No barriers needed. */
11644 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11645 break;
11647 case SMM_TSO:
11648 /* Total Store Ordering: all memory transactions with store semantics
11649 are followed by an implied StoreStore. */
11650 implied |= StoreStore;
11652 /* If we're not looking for a raw barrer (before+after), then atomic
11653 operations get the benefit of being both load and store. */
11654 if (load_store == 3 && before_after == 1)
11655 implied |= StoreLoad;
11656 /* FALLTHRU */
11658 case SMM_PSO:
11659 /* Partial Store Ordering: all memory transactions with load semantics
11660 are followed by an implied LoadLoad | LoadStore. */
11661 implied |= LoadLoad | LoadStore;
11663 /* If we're not looking for a raw barrer (before+after), then atomic
11664 operations get the benefit of being both load and store. */
11665 if (load_store == 3 && before_after == 2)
11666 implied |= StoreLoad | StoreStore;
11667 /* FALLTHRU */
11669 case SMM_RMO:
11670 /* Relaxed Memory Ordering: no implicit bits. */
11671 break;
11673 default:
11674 gcc_unreachable ();
11677 if (before_after & 1)
11679 if (model == MEMMODEL_RELEASE
11680 || model == MEMMODEL_ACQ_REL
11681 || model == MEMMODEL_SEQ_CST)
11683 if (load_store & 1)
11684 mm |= LoadLoad | StoreLoad;
11685 if (load_store & 2)
11686 mm |= LoadStore | StoreStore;
11689 if (before_after & 2)
11691 if (model == MEMMODEL_ACQUIRE
11692 || model == MEMMODEL_ACQ_REL
11693 || model == MEMMODEL_SEQ_CST)
11695 if (load_store & 1)
11696 mm |= LoadLoad | LoadStore;
11697 if (load_store & 2)
11698 mm |= StoreLoad | StoreStore;
11702 /* Remove the bits implied by the system memory model. */
11703 mm &= ~implied;
11705 /* For raw barriers (before+after), always emit a barrier.
11706 This will become a compile-time barrier if needed. */
11707 if (mm || before_after == 3)
11708 emit_insn (gen_membar (GEN_INT (mm)));
11711 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11712 compare and swap on the word containing the byte or half-word. */
11714 static void
11715 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11716 rtx oldval, rtx newval)
11718 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11719 rtx addr = gen_reg_rtx (Pmode);
11720 rtx off = gen_reg_rtx (SImode);
11721 rtx oldv = gen_reg_rtx (SImode);
11722 rtx newv = gen_reg_rtx (SImode);
11723 rtx oldvalue = gen_reg_rtx (SImode);
11724 rtx newvalue = gen_reg_rtx (SImode);
11725 rtx res = gen_reg_rtx (SImode);
11726 rtx resv = gen_reg_rtx (SImode);
11727 rtx memsi, val, mask, end_label, loop_label, cc;
11729 emit_insn (gen_rtx_SET (VOIDmode, addr,
11730 gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11732 if (Pmode != SImode)
11733 addr1 = gen_lowpart (SImode, addr1);
11734 emit_insn (gen_rtx_SET (VOIDmode, off,
11735 gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11737 memsi = gen_rtx_MEM (SImode, addr);
11738 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11739 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11741 val = copy_to_reg (memsi);
11743 emit_insn (gen_rtx_SET (VOIDmode, off,
11744 gen_rtx_XOR (SImode, off,
11745 GEN_INT (GET_MODE (mem) == QImode
11746 ? 3 : 2))));
11748 emit_insn (gen_rtx_SET (VOIDmode, off,
11749 gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11751 if (GET_MODE (mem) == QImode)
11752 mask = force_reg (SImode, GEN_INT (0xff));
11753 else
11754 mask = force_reg (SImode, GEN_INT (0xffff));
11756 emit_insn (gen_rtx_SET (VOIDmode, mask,
11757 gen_rtx_ASHIFT (SImode, mask, off)));
11759 emit_insn (gen_rtx_SET (VOIDmode, val,
11760 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11761 val)));
11763 oldval = gen_lowpart (SImode, oldval);
11764 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11765 gen_rtx_ASHIFT (SImode, oldval, off)));
11767 newval = gen_lowpart_common (SImode, newval);
11768 emit_insn (gen_rtx_SET (VOIDmode, newv,
11769 gen_rtx_ASHIFT (SImode, newval, off)));
11771 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11772 gen_rtx_AND (SImode, oldv, mask)));
11774 emit_insn (gen_rtx_SET (VOIDmode, newv,
11775 gen_rtx_AND (SImode, newv, mask)));
11777 end_label = gen_label_rtx ();
11778 loop_label = gen_label_rtx ();
11779 emit_label (loop_label);
11781 emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
11782 gen_rtx_IOR (SImode, oldv, val)));
11784 emit_insn (gen_rtx_SET (VOIDmode, newvalue,
11785 gen_rtx_IOR (SImode, newv, val)));
11787 emit_move_insn (bool_result, const1_rtx);
11789 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11791 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11793 emit_insn (gen_rtx_SET (VOIDmode, resv,
11794 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11795 res)));
11797 emit_move_insn (bool_result, const0_rtx);
11799 cc = gen_compare_reg_1 (NE, resv, val);
11800 emit_insn (gen_rtx_SET (VOIDmode, val, resv));
11802 /* Use cbranchcc4 to separate the compare and branch! */
11803 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11804 cc, const0_rtx, loop_label));
11806 emit_label (end_label);
11808 emit_insn (gen_rtx_SET (VOIDmode, res,
11809 gen_rtx_AND (SImode, res, mask)));
11811 emit_insn (gen_rtx_SET (VOIDmode, res,
11812 gen_rtx_LSHIFTRT (SImode, res, off)));
11814 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11817 /* Expand code to perform a compare-and-swap. */
11819 void
11820 sparc_expand_compare_and_swap (rtx operands[])
11822 rtx bval, retval, mem, oldval, newval;
11823 enum machine_mode mode;
11824 enum memmodel model;
11826 bval = operands[0];
11827 retval = operands[1];
11828 mem = operands[2];
11829 oldval = operands[3];
11830 newval = operands[4];
11831 model = (enum memmodel) INTVAL (operands[6]);
11832 mode = GET_MODE (mem);
11834 sparc_emit_membar_for_model (model, 3, 1);
11836 if (reg_overlap_mentioned_p (retval, oldval))
11837 oldval = copy_to_reg (oldval);
11839 if (mode == QImode || mode == HImode)
11840 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11841 else
11843 rtx (*gen) (rtx, rtx, rtx, rtx);
11844 rtx x;
11846 if (mode == SImode)
11847 gen = gen_atomic_compare_and_swapsi_1;
11848 else
11849 gen = gen_atomic_compare_and_swapdi_1;
11850 emit_insn (gen (retval, mem, oldval, newval));
11852 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11853 if (x != bval)
11854 convert_move (bval, x, 1);
11857 sparc_emit_membar_for_model (model, 3, 2);
11860 void
11861 sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel)
11863 rtx t_1, t_2, t_3;
11865 sel = gen_lowpart (DImode, sel);
11866 switch (vmode)
11868 case V2SImode:
11869 /* inp = xxxxxxxAxxxxxxxB */
11870 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11871 NULL_RTX, 1, OPTAB_DIRECT);
11872 /* t_1 = ....xxxxxxxAxxx. */
11873 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11874 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11875 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11876 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11877 /* sel = .......B */
11878 /* t_1 = ...A.... */
11879 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11880 /* sel = ...A...B */
11881 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11882 /* sel = AAAABBBB * 4 */
11883 t_1 = force_reg (SImode, GEN_INT (0x01230123));
11884 /* sel = { A*4, A*4+1, A*4+2, ... } */
11885 break;
11887 case V4HImode:
11888 /* inp = xxxAxxxBxxxCxxxD */
11889 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11890 NULL_RTX, 1, OPTAB_DIRECT);
11891 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11892 NULL_RTX, 1, OPTAB_DIRECT);
11893 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11894 NULL_RTX, 1, OPTAB_DIRECT);
11895 /* t_1 = ..xxxAxxxBxxxCxx */
11896 /* t_2 = ....xxxAxxxBxxxC */
11897 /* t_3 = ......xxxAxxxBxx */
11898 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11899 GEN_INT (0x07),
11900 NULL_RTX, 1, OPTAB_DIRECT);
11901 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11902 GEN_INT (0x0700),
11903 NULL_RTX, 1, OPTAB_DIRECT);
11904 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11905 GEN_INT (0x070000),
11906 NULL_RTX, 1, OPTAB_DIRECT);
11907 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11908 GEN_INT (0x07000000),
11909 NULL_RTX, 1, OPTAB_DIRECT);
11910 /* sel = .......D */
11911 /* t_1 = .....C.. */
11912 /* t_2 = ...B.... */
11913 /* t_3 = .A...... */
11914 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11915 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11916 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11917 /* sel = .A.B.C.D */
11918 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11919 /* sel = AABBCCDD * 2 */
11920 t_1 = force_reg (SImode, GEN_INT (0x01010101));
11921 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11922 break;
11924 case V8QImode:
11925 /* input = xAxBxCxDxExFxGxH */
11926 sel = expand_simple_binop (DImode, AND, sel,
11927 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11928 | 0x0f0f0f0f),
11929 NULL_RTX, 1, OPTAB_DIRECT);
11930 /* sel = .A.B.C.D.E.F.G.H */
11931 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11932 NULL_RTX, 1, OPTAB_DIRECT);
11933 /* t_1 = ..A.B.C.D.E.F.G. */
11934 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11935 NULL_RTX, 1, OPTAB_DIRECT);
11936 /* sel = .AABBCCDDEEFFGGH */
11937 sel = expand_simple_binop (DImode, AND, sel,
11938 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11939 | 0xff00ff),
11940 NULL_RTX, 1, OPTAB_DIRECT);
11941 /* sel = ..AB..CD..EF..GH */
11942 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11943 NULL_RTX, 1, OPTAB_DIRECT);
11944 /* t_1 = ....AB..CD..EF.. */
11945 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11946 NULL_RTX, 1, OPTAB_DIRECT);
11947 /* sel = ..ABABCDCDEFEFGH */
11948 sel = expand_simple_binop (DImode, AND, sel,
11949 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11950 NULL_RTX, 1, OPTAB_DIRECT);
11951 /* sel = ....ABCD....EFGH */
11952 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11953 NULL_RTX, 1, OPTAB_DIRECT);
11954 /* t_1 = ........ABCD.... */
11955 sel = gen_lowpart (SImode, sel);
11956 t_1 = gen_lowpart (SImode, t_1);
11957 break;
11959 default:
11960 gcc_unreachable ();
11963 /* Always perform the final addition/merge within the bmask insn. */
11964 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, t_1));
11967 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
11969 static bool
11970 sparc_frame_pointer_required (void)
11972 /* If the stack pointer is dynamically modified in the function, it cannot
11973 serve as the frame pointer. */
11974 if (cfun->calls_alloca)
11975 return true;
11977 /* If the function receives nonlocal gotos, it needs to save the frame
11978 pointer in the nonlocal_goto_save_area object. */
11979 if (cfun->has_nonlocal_label)
11980 return true;
11982 /* In flat mode, that's it. */
11983 if (TARGET_FLAT)
11984 return false;
11986 /* Otherwise, the frame pointer is required if the function isn't leaf. */
11987 return !(crtl->is_leaf && only_leaf_regs_used ());
11990 /* The way this is structured, we can't eliminate SFP in favor of SP
11991 if the frame pointer is required: we want to use the SFP->HFP elimination
11992 in that case. But the test in update_eliminables doesn't know we are
11993 assuming below that we only do the former elimination. */
11995 static bool
11996 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
11998 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12001 /* Return the hard frame pointer directly to bypass the stack bias. */
12003 static rtx
12004 sparc_builtin_setjmp_frame_value (void)
12006 return hard_frame_pointer_rtx;
12009 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12010 they won't be allocated. */
12012 static void
12013 sparc_conditional_register_usage (void)
12015 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12017 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12018 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12020 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12021 /* then honor it. */
12022 if (TARGET_ARCH32 && fixed_regs[5])
12023 fixed_regs[5] = 1;
12024 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12025 fixed_regs[5] = 0;
12026 if (! TARGET_V9)
12028 int regno;
12029 for (regno = SPARC_FIRST_V9_FP_REG;
12030 regno <= SPARC_LAST_V9_FP_REG;
12031 regno++)
12032 fixed_regs[regno] = 1;
12033 /* %fcc0 is used by v8 and v9. */
12034 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12035 regno <= SPARC_LAST_V9_FCC_REG;
12036 regno++)
12037 fixed_regs[regno] = 1;
12039 if (! TARGET_FPU)
12041 int regno;
12042 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12043 fixed_regs[regno] = 1;
12045 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12046 /* then honor it. Likewise with g3 and g4. */
12047 if (fixed_regs[2] == 2)
12048 fixed_regs[2] = ! TARGET_APP_REGS;
12049 if (fixed_regs[3] == 2)
12050 fixed_regs[3] = ! TARGET_APP_REGS;
12051 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12052 fixed_regs[4] = ! TARGET_APP_REGS;
12053 else if (TARGET_CM_EMBMEDANY)
12054 fixed_regs[4] = 1;
12055 else if (fixed_regs[4] == 2)
12056 fixed_regs[4] = 0;
12057 if (TARGET_FLAT)
12059 int regno;
12060 /* Disable leaf functions. */
12061 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12062 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12063 leaf_reg_remap [regno] = regno;
12065 if (TARGET_VIS)
12066 global_regs[SPARC_GSR_REG] = 1;
12069 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12071 - We can't load constants into FP registers.
12072 - We can't load FP constants into integer registers when soft-float,
12073 because there is no soft-float pattern with a r/F constraint.
12074 - We can't load FP constants into integer registers for TFmode unless
12075 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12076 - Try and reload integer constants (symbolic or otherwise) back into
12077 registers directly, rather than having them dumped to memory. */
12079 static reg_class_t
12080 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12082 enum machine_mode mode = GET_MODE (x);
12083 if (CONSTANT_P (x))
12085 if (FP_REG_CLASS_P (rclass)
12086 || rclass == GENERAL_OR_FP_REGS
12087 || rclass == GENERAL_OR_EXTRA_FP_REGS
12088 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12089 || (mode == TFmode && ! const_zero_operand (x, mode)))
12090 return NO_REGS;
12092 if (GET_MODE_CLASS (mode) == MODE_INT)
12093 return GENERAL_REGS;
12095 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12097 if (! FP_REG_CLASS_P (rclass)
12098 || !(const_zero_operand (x, mode)
12099 || const_all_ones_operand (x, mode)))
12100 return NO_REGS;
12104 if (TARGET_VIS3
12105 && ! TARGET_ARCH64
12106 && (rclass == EXTRA_FP_REGS
12107 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12109 int regno = true_regnum (x);
12111 if (SPARC_INT_REG_P (regno))
12112 return (rclass == EXTRA_FP_REGS
12113 ? FP_REGS : GENERAL_OR_FP_REGS);
12116 return rclass;
12119 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12120 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12122 const char *
12123 output_v8plus_mult (rtx insn, rtx *operands, const char *opcode)
12125 char mulstr[32];
12127 gcc_assert (! TARGET_ARCH64);
12129 if (sparc_check_64 (operands[1], insn) <= 0)
12130 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12131 if (which_alternative == 1)
12132 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12133 if (GET_CODE (operands[2]) == CONST_INT)
12135 if (which_alternative == 1)
12137 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12138 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12139 output_asm_insn (mulstr, operands);
12140 return "srlx\t%L0, 32, %H0";
12142 else
12144 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12145 output_asm_insn ("or\t%L1, %3, %3", operands);
12146 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12147 output_asm_insn (mulstr, operands);
12148 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12149 return "mov\t%3, %L0";
12152 else if (rtx_equal_p (operands[1], operands[2]))
12154 if (which_alternative == 1)
12156 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12157 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12158 output_asm_insn (mulstr, operands);
12159 return "srlx\t%L0, 32, %H0";
12161 else
12163 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12164 output_asm_insn ("or\t%L1, %3, %3", operands);
12165 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12166 output_asm_insn (mulstr, operands);
12167 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12168 return "mov\t%3, %L0";
12171 if (sparc_check_64 (operands[2], insn) <= 0)
12172 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12173 if (which_alternative == 1)
12175 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12176 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12177 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12178 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12179 output_asm_insn (mulstr, operands);
12180 return "srlx\t%L0, 32, %H0";
12182 else
12184 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12185 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12186 output_asm_insn ("or\t%L1, %3, %3", operands);
12187 output_asm_insn ("or\t%L2, %4, %4", operands);
12188 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12189 output_asm_insn (mulstr, operands);
12190 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12191 return "mov\t%3, %L0";
12195 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12196 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12197 and INNER_MODE are the modes describing TARGET. */
12199 static void
12200 vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode,
12201 enum machine_mode inner_mode)
12203 rtx t1, final_insn, sel;
12204 int bmask;
12206 t1 = gen_reg_rtx (mode);
12208 elt = convert_modes (SImode, inner_mode, elt, true);
12209 emit_move_insn (gen_lowpart(SImode, t1), elt);
12211 switch (mode)
12213 case V2SImode:
12214 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12215 bmask = 0x45674567;
12216 break;
12217 case V4HImode:
12218 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12219 bmask = 0x67676767;
12220 break;
12221 case V8QImode:
12222 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12223 bmask = 0x77777777;
12224 break;
12225 default:
12226 gcc_unreachable ();
12229 sel = force_reg (SImode, GEN_INT (bmask));
12230 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx));
12231 emit_insn (final_insn);
12234 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12235 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12237 static void
12238 vector_init_fpmerge (rtx target, rtx elt)
12240 rtx t1, t2, t2_low, t3, t3_low;
12242 t1 = gen_reg_rtx (V4QImode);
12243 elt = convert_modes (SImode, QImode, elt, true);
12244 emit_move_insn (gen_lowpart (SImode, t1), elt);
12246 t2 = gen_reg_rtx (V8QImode);
12247 t2_low = gen_lowpart (V4QImode, t2);
12248 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12250 t3 = gen_reg_rtx (V8QImode);
12251 t3_low = gen_lowpart (V4QImode, t3);
12252 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12254 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12257 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12258 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12260 static void
12261 vector_init_faligndata (rtx target, rtx elt)
12263 rtx t1 = gen_reg_rtx (V4HImode);
12264 int i;
12266 elt = convert_modes (SImode, HImode, elt, true);
12267 emit_move_insn (gen_lowpart (SImode, t1), elt);
12269 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12270 force_reg (SImode, GEN_INT (6)),
12271 const0_rtx));
12273 for (i = 0; i < 4; i++)
12274 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12277 /* Emit code to initialize TARGET to values for individual fields VALS. */
12279 void
12280 sparc_expand_vector_init (rtx target, rtx vals)
12282 const enum machine_mode mode = GET_MODE (target);
12283 const enum machine_mode inner_mode = GET_MODE_INNER (mode);
12284 const int n_elts = GET_MODE_NUNITS (mode);
12285 int i, n_var = 0;
12286 bool all_same;
12287 rtx mem;
12289 all_same = true;
12290 for (i = 0; i < n_elts; i++)
12292 rtx x = XVECEXP (vals, 0, i);
12293 if (!CONSTANT_P (x))
12294 n_var++;
12296 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12297 all_same = false;
12300 if (n_var == 0)
12302 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12303 return;
12306 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12308 if (GET_MODE_SIZE (inner_mode) == 4)
12310 emit_move_insn (gen_lowpart (SImode, target),
12311 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12312 return;
12314 else if (GET_MODE_SIZE (inner_mode) == 8)
12316 emit_move_insn (gen_lowpart (DImode, target),
12317 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12318 return;
12321 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12322 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12324 emit_move_insn (gen_highpart (word_mode, target),
12325 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12326 emit_move_insn (gen_lowpart (word_mode, target),
12327 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12328 return;
12331 if (all_same && GET_MODE_SIZE (mode) == 8)
12333 if (TARGET_VIS2)
12335 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12336 return;
12338 if (mode == V8QImode)
12340 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12341 return;
12343 if (mode == V4HImode)
12345 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12346 return;
12350 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12351 for (i = 0; i < n_elts; i++)
12352 emit_move_insn (adjust_address_nv (mem, inner_mode,
12353 i * GET_MODE_SIZE (inner_mode)),
12354 XVECEXP (vals, 0, i));
12355 emit_move_insn (target, mem);
12358 /* Implement TARGET_SECONDARY_RELOAD. */
12360 static reg_class_t
12361 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12362 enum machine_mode mode, secondary_reload_info *sri)
12364 enum reg_class rclass = (enum reg_class) rclass_i;
12366 sri->icode = CODE_FOR_nothing;
12367 sri->extra_cost = 0;
12369 /* We need a temporary when loading/storing a HImode/QImode value
12370 between memory and the FPU registers. This can happen when combine puts
12371 a paradoxical subreg in a float/fix conversion insn. */
12372 if (FP_REG_CLASS_P (rclass)
12373 && (mode == HImode || mode == QImode)
12374 && (GET_CODE (x) == MEM
12375 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12376 && true_regnum (x) == -1)))
12377 return GENERAL_REGS;
12379 /* On 32-bit we need a temporary when loading/storing a DFmode value
12380 between unaligned memory and the upper FPU registers. */
12381 if (TARGET_ARCH32
12382 && rclass == EXTRA_FP_REGS
12383 && mode == DFmode
12384 && GET_CODE (x) == MEM
12385 && ! mem_min_alignment (x, 8))
12386 return FP_REGS;
12388 if (((TARGET_CM_MEDANY
12389 && symbolic_operand (x, mode))
12390 || (TARGET_CM_EMBMEDANY
12391 && text_segment_operand (x, mode)))
12392 && ! flag_pic)
12394 if (in_p)
12395 sri->icode = direct_optab_handler (reload_in_optab, mode);
12396 else
12397 sri->icode = direct_optab_handler (reload_out_optab, mode);
12398 return NO_REGS;
12401 if (TARGET_VIS3 && TARGET_ARCH32)
12403 int regno = true_regnum (x);
12405 /* When using VIS3 fp<-->int register moves, on 32-bit we have
12406 to move 8-byte values in 4-byte pieces. This only works via
12407 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
12408 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12409 an FP_REGS intermediate move. */
12410 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12411 || ((general_or_i64_p (rclass)
12412 || rclass == GENERAL_OR_FP_REGS)
12413 && SPARC_FP_REG_P (regno)))
12415 sri->extra_cost = 2;
12416 return FP_REGS;
12420 return NO_REGS;
12423 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12424 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
12426 bool
12427 sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
12429 enum rtx_code rc = GET_CODE (operands[1]);
12430 enum machine_mode cmp_mode;
12431 rtx cc_reg, dst, cmp;
12433 cmp = operands[1];
12434 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12435 return false;
12437 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12438 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12440 cmp_mode = GET_MODE (XEXP (cmp, 0));
12441 rc = GET_CODE (cmp);
12443 dst = operands[0];
12444 if (! rtx_equal_p (operands[2], dst)
12445 && ! rtx_equal_p (operands[3], dst))
12447 if (reg_overlap_mentioned_p (dst, cmp))
12448 dst = gen_reg_rtx (mode);
12450 emit_move_insn (dst, operands[3]);
12452 else if (operands[2] == dst)
12454 operands[2] = operands[3];
12456 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12457 rc = reverse_condition_maybe_unordered (rc);
12458 else
12459 rc = reverse_condition (rc);
12462 if (XEXP (cmp, 1) == const0_rtx
12463 && GET_CODE (XEXP (cmp, 0)) == REG
12464 && cmp_mode == DImode
12465 && v9_regcmp_p (rc))
12466 cc_reg = XEXP (cmp, 0);
12467 else
12468 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12470 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12472 emit_insn (gen_rtx_SET (VOIDmode, dst,
12473 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12475 if (dst != operands[0])
12476 emit_move_insn (operands[0], dst);
12478 return true;
12481 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12482 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12483 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12484 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12485 code to be used for the condition mask. */
12487 void
12488 sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode)
12490 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12491 enum rtx_code code = GET_CODE (operands[3]);
12493 mask = gen_reg_rtx (Pmode);
12494 cop0 = operands[4];
12495 cop1 = operands[5];
12496 if (code == LT || code == GE)
12498 rtx t;
12500 code = swap_condition (code);
12501 t = cop0; cop0 = cop1; cop1 = t;
12504 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12506 fcmp = gen_rtx_UNSPEC (Pmode,
12507 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12508 fcode);
12510 cmask = gen_rtx_UNSPEC (DImode,
12511 gen_rtvec (2, mask, gsr),
12512 ccode);
12514 bshuf = gen_rtx_UNSPEC (mode,
12515 gen_rtvec (3, operands[1], operands[2], gsr),
12516 UNSPEC_BSHUFFLE);
12518 emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp));
12519 emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask));
12521 emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf));
12524 /* On sparc, any mode which naturally allocates into the float
12525 registers should return 4 here. */
12527 unsigned int
12528 sparc_regmode_natural_size (enum machine_mode mode)
12530 int size = UNITS_PER_WORD;
12532 if (TARGET_ARCH64)
12534 enum mode_class mclass = GET_MODE_CLASS (mode);
12536 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12537 size = 4;
12540 return size;
12543 /* Return TRUE if it is a good idea to tie two pseudo registers
12544 when one has mode MODE1 and one has mode MODE2.
12545 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12546 for any hard reg, then this must be FALSE for correct output.
12548 For V9 we have to deal with the fact that only the lower 32 floating
12549 point registers are 32-bit addressable. */
12551 bool
12552 sparc_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
12554 enum mode_class mclass1, mclass2;
12555 unsigned short size1, size2;
12557 if (mode1 == mode2)
12558 return true;
12560 mclass1 = GET_MODE_CLASS (mode1);
12561 mclass2 = GET_MODE_CLASS (mode2);
12562 if (mclass1 != mclass2)
12563 return false;
12565 if (! TARGET_V9)
12566 return true;
12568 /* Classes are the same and we are V9 so we have to deal with upper
12569 vs. lower floating point registers. If one of the modes is a
12570 4-byte mode, and the other is not, we have to mark them as not
12571 tieable because only the lower 32 floating point register are
12572 addressable 32-bits at a time.
12574 We can't just test explicitly for SFmode, otherwise we won't
12575 cover the vector mode cases properly. */
12577 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12578 return true;
12580 size1 = GET_MODE_SIZE (mode1);
12581 size2 = GET_MODE_SIZE (mode2);
12582 if ((size1 > 4 && size2 == 4)
12583 || (size2 > 4 && size1 == 4))
12584 return false;
12586 return true;
12589 /* Implement TARGET_CSTORE_MODE. */
12591 static enum machine_mode
12592 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12594 return (TARGET_ARCH64 ? DImode : SImode);
12597 /* Return the compound expression made of T1 and T2. */
12599 static inline tree
12600 compound_expr (tree t1, tree t2)
12602 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
12605 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
12607 static void
12608 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12610 if (!TARGET_FPU)
12611 return;
12613 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
12614 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
12616 /* We generate the equivalent of feholdexcept (&fenv_var):
12618 unsigned int fenv_var;
12619 __builtin_store_fsr (&fenv_var);
12621 unsigned int tmp1_var;
12622 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
12624 __builtin_load_fsr (&tmp1_var); */
12626 tree fenv_var = create_tmp_var (unsigned_type_node, NULL);
12627 mark_addressable (fenv_var);
12628 tree fenv_addr = build_fold_addr_expr (fenv_var);
12629 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
12630 tree hold_stfsr = build_call_expr (stfsr, 1, fenv_addr);
12632 tree tmp1_var = create_tmp_var (unsigned_type_node, NULL);
12633 mark_addressable (tmp1_var);
12634 tree masked_fenv_var
12635 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
12636 build_int_cst (unsigned_type_node,
12637 ~(accrued_exception_mask | trap_enable_mask)));
12638 tree hold_mask
12639 = build2 (MODIFY_EXPR, void_type_node, tmp1_var, masked_fenv_var);
12641 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
12642 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
12643 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
12645 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
12647 /* We reload the value of tmp1_var to clear the exceptions:
12649 __builtin_load_fsr (&tmp1_var); */
12651 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
12653 /* We generate the equivalent of feupdateenv (&fenv_var):
12655 unsigned int tmp2_var;
12656 __builtin_store_fsr (&tmp2_var);
12658 __builtin_load_fsr (&fenv_var);
12660 if (SPARC_LOW_FE_EXCEPT_VALUES)
12661 tmp2_var >>= 5;
12662 __atomic_feraiseexcept ((int) tmp2_var); */
12664 tree tmp2_var = create_tmp_var (unsigned_type_node, NULL);
12665 mark_addressable (tmp2_var);
12666 tree tmp3_addr = build_fold_addr_expr (tmp2_var);
12667 tree update_stfsr = build_call_expr (stfsr, 1, tmp3_addr);
12669 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
12671 tree atomic_feraiseexcept
12672 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12673 tree update_call
12674 = build_call_expr (atomic_feraiseexcept, 1,
12675 fold_convert (integer_type_node, tmp2_var));
12677 if (SPARC_LOW_FE_EXCEPT_VALUES)
12679 tree shifted_tmp2_var
12680 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
12681 build_int_cst (unsigned_type_node, 5));
12682 tree update_shift
12683 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
12684 update_call = compound_expr (update_shift, update_call);
12687 *update
12688 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
12691 #include "gt-sparc.h"