[Obvious] Remove unused aarch64_types_cmtst_qualifiers, was breaking bootstrap.
[official-gcc.git] / gcc / config / sparc / sparc.c
blob866afb1e59bbcc10b424366d99f67053c7e34edb
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2014 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stringpool.h"
29 #include "stor-layout.h"
30 #include "calls.h"
31 #include "varasm.h"
32 #include "rtl.h"
33 #include "regs.h"
34 #include "hard-reg-set.h"
35 #include "insn-config.h"
36 #include "insn-codes.h"
37 #include "conditions.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "flags.h"
41 #include "function.h"
42 #include "except.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "recog.h"
46 #include "diagnostic-core.h"
47 #include "ggc.h"
48 #include "tm_p.h"
49 #include "debug.h"
50 #include "target.h"
51 #include "target-def.h"
52 #include "common/common-target.h"
53 #include "hash-table.h"
54 #include "vec.h"
55 #include "basic-block.h"
56 #include "tree-ssa-alias.h"
57 #include "internal-fn.h"
58 #include "gimple-fold.h"
59 #include "tree-eh.h"
60 #include "gimple-expr.h"
61 #include "is-a.h"
62 #include "gimple.h"
63 #include "gimplify.h"
64 #include "langhooks.h"
65 #include "reload.h"
66 #include "params.h"
67 #include "df.h"
68 #include "opts.h"
69 #include "tree-pass.h"
70 #include "context.h"
71 #include "wide-int.h"
72 #include "builtins.h"
73 #include "rtl-iter.h"
75 /* Processor costs */
77 struct processor_costs {
78 /* Integer load */
79 const int int_load;
81 /* Integer signed load */
82 const int int_sload;
84 /* Integer zeroed load */
85 const int int_zload;
87 /* Float load */
88 const int float_load;
90 /* fmov, fneg, fabs */
91 const int float_move;
93 /* fadd, fsub */
94 const int float_plusminus;
96 /* fcmp */
97 const int float_cmp;
99 /* fmov, fmovr */
100 const int float_cmove;
102 /* fmul */
103 const int float_mul;
105 /* fdivs */
106 const int float_div_sf;
108 /* fdivd */
109 const int float_div_df;
111 /* fsqrts */
112 const int float_sqrt_sf;
114 /* fsqrtd */
115 const int float_sqrt_df;
117 /* umul/smul */
118 const int int_mul;
120 /* mulX */
121 const int int_mulX;
123 /* integer multiply cost for each bit set past the most
124 significant 3, so the formula for multiply cost becomes:
126 if (rs1 < 0)
127 highest_bit = highest_clear_bit(rs1);
128 else
129 highest_bit = highest_set_bit(rs1);
130 if (highest_bit < 3)
131 highest_bit = 3;
132 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
134 A value of zero indicates that the multiply costs is fixed,
135 and not variable. */
136 const int int_mul_bit_factor;
138 /* udiv/sdiv */
139 const int int_div;
141 /* divX */
142 const int int_divX;
144 /* movcc, movr */
145 const int int_cmove;
147 /* penalty for shifts, due to scheduling rules etc. */
148 const int shift_penalty;
151 static const
152 struct processor_costs cypress_costs = {
153 COSTS_N_INSNS (2), /* int load */
154 COSTS_N_INSNS (2), /* int signed load */
155 COSTS_N_INSNS (2), /* int zeroed load */
156 COSTS_N_INSNS (2), /* float load */
157 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
158 COSTS_N_INSNS (5), /* fadd, fsub */
159 COSTS_N_INSNS (1), /* fcmp */
160 COSTS_N_INSNS (1), /* fmov, fmovr */
161 COSTS_N_INSNS (7), /* fmul */
162 COSTS_N_INSNS (37), /* fdivs */
163 COSTS_N_INSNS (37), /* fdivd */
164 COSTS_N_INSNS (63), /* fsqrts */
165 COSTS_N_INSNS (63), /* fsqrtd */
166 COSTS_N_INSNS (1), /* imul */
167 COSTS_N_INSNS (1), /* imulX */
168 0, /* imul bit factor */
169 COSTS_N_INSNS (1), /* idiv */
170 COSTS_N_INSNS (1), /* idivX */
171 COSTS_N_INSNS (1), /* movcc/movr */
172 0, /* shift penalty */
175 static const
176 struct processor_costs supersparc_costs = {
177 COSTS_N_INSNS (1), /* int load */
178 COSTS_N_INSNS (1), /* int signed load */
179 COSTS_N_INSNS (1), /* int zeroed load */
180 COSTS_N_INSNS (0), /* float load */
181 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
182 COSTS_N_INSNS (3), /* fadd, fsub */
183 COSTS_N_INSNS (3), /* fcmp */
184 COSTS_N_INSNS (1), /* fmov, fmovr */
185 COSTS_N_INSNS (3), /* fmul */
186 COSTS_N_INSNS (6), /* fdivs */
187 COSTS_N_INSNS (9), /* fdivd */
188 COSTS_N_INSNS (12), /* fsqrts */
189 COSTS_N_INSNS (12), /* fsqrtd */
190 COSTS_N_INSNS (4), /* imul */
191 COSTS_N_INSNS (4), /* imulX */
192 0, /* imul bit factor */
193 COSTS_N_INSNS (4), /* idiv */
194 COSTS_N_INSNS (4), /* idivX */
195 COSTS_N_INSNS (1), /* movcc/movr */
196 1, /* shift penalty */
199 static const
200 struct processor_costs hypersparc_costs = {
201 COSTS_N_INSNS (1), /* int load */
202 COSTS_N_INSNS (1), /* int signed load */
203 COSTS_N_INSNS (1), /* int zeroed load */
204 COSTS_N_INSNS (1), /* float load */
205 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
206 COSTS_N_INSNS (1), /* fadd, fsub */
207 COSTS_N_INSNS (1), /* fcmp */
208 COSTS_N_INSNS (1), /* fmov, fmovr */
209 COSTS_N_INSNS (1), /* fmul */
210 COSTS_N_INSNS (8), /* fdivs */
211 COSTS_N_INSNS (12), /* fdivd */
212 COSTS_N_INSNS (17), /* fsqrts */
213 COSTS_N_INSNS (17), /* fsqrtd */
214 COSTS_N_INSNS (17), /* imul */
215 COSTS_N_INSNS (17), /* imulX */
216 0, /* imul bit factor */
217 COSTS_N_INSNS (17), /* idiv */
218 COSTS_N_INSNS (17), /* idivX */
219 COSTS_N_INSNS (1), /* movcc/movr */
220 0, /* shift penalty */
223 static const
224 struct processor_costs leon_costs = {
225 COSTS_N_INSNS (1), /* int load */
226 COSTS_N_INSNS (1), /* int signed load */
227 COSTS_N_INSNS (1), /* int zeroed load */
228 COSTS_N_INSNS (1), /* float load */
229 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
230 COSTS_N_INSNS (1), /* fadd, fsub */
231 COSTS_N_INSNS (1), /* fcmp */
232 COSTS_N_INSNS (1), /* fmov, fmovr */
233 COSTS_N_INSNS (1), /* fmul */
234 COSTS_N_INSNS (15), /* fdivs */
235 COSTS_N_INSNS (15), /* fdivd */
236 COSTS_N_INSNS (23), /* fsqrts */
237 COSTS_N_INSNS (23), /* fsqrtd */
238 COSTS_N_INSNS (5), /* imul */
239 COSTS_N_INSNS (5), /* imulX */
240 0, /* imul bit factor */
241 COSTS_N_INSNS (5), /* idiv */
242 COSTS_N_INSNS (5), /* idivX */
243 COSTS_N_INSNS (1), /* movcc/movr */
244 0, /* shift penalty */
247 static const
248 struct processor_costs leon3_costs = {
249 COSTS_N_INSNS (1), /* int load */
250 COSTS_N_INSNS (1), /* int signed load */
251 COSTS_N_INSNS (1), /* int zeroed load */
252 COSTS_N_INSNS (1), /* float load */
253 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
254 COSTS_N_INSNS (1), /* fadd, fsub */
255 COSTS_N_INSNS (1), /* fcmp */
256 COSTS_N_INSNS (1), /* fmov, fmovr */
257 COSTS_N_INSNS (1), /* fmul */
258 COSTS_N_INSNS (14), /* fdivs */
259 COSTS_N_INSNS (15), /* fdivd */
260 COSTS_N_INSNS (22), /* fsqrts */
261 COSTS_N_INSNS (23), /* fsqrtd */
262 COSTS_N_INSNS (5), /* imul */
263 COSTS_N_INSNS (5), /* imulX */
264 0, /* imul bit factor */
265 COSTS_N_INSNS (35), /* idiv */
266 COSTS_N_INSNS (35), /* idivX */
267 COSTS_N_INSNS (1), /* movcc/movr */
268 0, /* shift penalty */
271 static const
272 struct processor_costs sparclet_costs = {
273 COSTS_N_INSNS (3), /* int load */
274 COSTS_N_INSNS (3), /* int signed load */
275 COSTS_N_INSNS (1), /* int zeroed load */
276 COSTS_N_INSNS (1), /* float load */
277 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
278 COSTS_N_INSNS (1), /* fadd, fsub */
279 COSTS_N_INSNS (1), /* fcmp */
280 COSTS_N_INSNS (1), /* fmov, fmovr */
281 COSTS_N_INSNS (1), /* fmul */
282 COSTS_N_INSNS (1), /* fdivs */
283 COSTS_N_INSNS (1), /* fdivd */
284 COSTS_N_INSNS (1), /* fsqrts */
285 COSTS_N_INSNS (1), /* fsqrtd */
286 COSTS_N_INSNS (5), /* imul */
287 COSTS_N_INSNS (5), /* imulX */
288 0, /* imul bit factor */
289 COSTS_N_INSNS (5), /* idiv */
290 COSTS_N_INSNS (5), /* idivX */
291 COSTS_N_INSNS (1), /* movcc/movr */
292 0, /* shift penalty */
295 static const
296 struct processor_costs ultrasparc_costs = {
297 COSTS_N_INSNS (2), /* int load */
298 COSTS_N_INSNS (3), /* int signed load */
299 COSTS_N_INSNS (2), /* int zeroed load */
300 COSTS_N_INSNS (2), /* float load */
301 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
302 COSTS_N_INSNS (4), /* fadd, fsub */
303 COSTS_N_INSNS (1), /* fcmp */
304 COSTS_N_INSNS (2), /* fmov, fmovr */
305 COSTS_N_INSNS (4), /* fmul */
306 COSTS_N_INSNS (13), /* fdivs */
307 COSTS_N_INSNS (23), /* fdivd */
308 COSTS_N_INSNS (13), /* fsqrts */
309 COSTS_N_INSNS (23), /* fsqrtd */
310 COSTS_N_INSNS (4), /* imul */
311 COSTS_N_INSNS (4), /* imulX */
312 2, /* imul bit factor */
313 COSTS_N_INSNS (37), /* idiv */
314 COSTS_N_INSNS (68), /* idivX */
315 COSTS_N_INSNS (2), /* movcc/movr */
316 2, /* shift penalty */
319 static const
320 struct processor_costs ultrasparc3_costs = {
321 COSTS_N_INSNS (2), /* int load */
322 COSTS_N_INSNS (3), /* int signed load */
323 COSTS_N_INSNS (3), /* int zeroed load */
324 COSTS_N_INSNS (2), /* float load */
325 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
326 COSTS_N_INSNS (4), /* fadd, fsub */
327 COSTS_N_INSNS (5), /* fcmp */
328 COSTS_N_INSNS (3), /* fmov, fmovr */
329 COSTS_N_INSNS (4), /* fmul */
330 COSTS_N_INSNS (17), /* fdivs */
331 COSTS_N_INSNS (20), /* fdivd */
332 COSTS_N_INSNS (20), /* fsqrts */
333 COSTS_N_INSNS (29), /* fsqrtd */
334 COSTS_N_INSNS (6), /* imul */
335 COSTS_N_INSNS (6), /* imulX */
336 0, /* imul bit factor */
337 COSTS_N_INSNS (40), /* idiv */
338 COSTS_N_INSNS (71), /* idivX */
339 COSTS_N_INSNS (2), /* movcc/movr */
340 0, /* shift penalty */
343 static const
344 struct processor_costs niagara_costs = {
345 COSTS_N_INSNS (3), /* int load */
346 COSTS_N_INSNS (3), /* int signed load */
347 COSTS_N_INSNS (3), /* int zeroed load */
348 COSTS_N_INSNS (9), /* float load */
349 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
350 COSTS_N_INSNS (8), /* fadd, fsub */
351 COSTS_N_INSNS (26), /* fcmp */
352 COSTS_N_INSNS (8), /* fmov, fmovr */
353 COSTS_N_INSNS (29), /* fmul */
354 COSTS_N_INSNS (54), /* fdivs */
355 COSTS_N_INSNS (83), /* fdivd */
356 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
357 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
358 COSTS_N_INSNS (11), /* imul */
359 COSTS_N_INSNS (11), /* imulX */
360 0, /* imul bit factor */
361 COSTS_N_INSNS (72), /* idiv */
362 COSTS_N_INSNS (72), /* idivX */
363 COSTS_N_INSNS (1), /* movcc/movr */
364 0, /* shift penalty */
367 static const
368 struct processor_costs niagara2_costs = {
369 COSTS_N_INSNS (3), /* int load */
370 COSTS_N_INSNS (3), /* int signed load */
371 COSTS_N_INSNS (3), /* int zeroed load */
372 COSTS_N_INSNS (3), /* float load */
373 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
374 COSTS_N_INSNS (6), /* fadd, fsub */
375 COSTS_N_INSNS (6), /* fcmp */
376 COSTS_N_INSNS (6), /* fmov, fmovr */
377 COSTS_N_INSNS (6), /* fmul */
378 COSTS_N_INSNS (19), /* fdivs */
379 COSTS_N_INSNS (33), /* fdivd */
380 COSTS_N_INSNS (19), /* fsqrts */
381 COSTS_N_INSNS (33), /* fsqrtd */
382 COSTS_N_INSNS (5), /* imul */
383 COSTS_N_INSNS (5), /* imulX */
384 0, /* imul bit factor */
385 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
386 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
387 COSTS_N_INSNS (1), /* movcc/movr */
388 0, /* shift penalty */
391 static const
392 struct processor_costs niagara3_costs = {
393 COSTS_N_INSNS (3), /* int load */
394 COSTS_N_INSNS (3), /* int signed load */
395 COSTS_N_INSNS (3), /* int zeroed load */
396 COSTS_N_INSNS (3), /* float load */
397 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
398 COSTS_N_INSNS (9), /* fadd, fsub */
399 COSTS_N_INSNS (9), /* fcmp */
400 COSTS_N_INSNS (9), /* fmov, fmovr */
401 COSTS_N_INSNS (9), /* fmul */
402 COSTS_N_INSNS (23), /* fdivs */
403 COSTS_N_INSNS (37), /* fdivd */
404 COSTS_N_INSNS (23), /* fsqrts */
405 COSTS_N_INSNS (37), /* fsqrtd */
406 COSTS_N_INSNS (9), /* imul */
407 COSTS_N_INSNS (9), /* imulX */
408 0, /* imul bit factor */
409 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
410 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
411 COSTS_N_INSNS (1), /* movcc/movr */
412 0, /* shift penalty */
415 static const
416 struct processor_costs niagara4_costs = {
417 COSTS_N_INSNS (5), /* int load */
418 COSTS_N_INSNS (5), /* int signed load */
419 COSTS_N_INSNS (5), /* int zeroed load */
420 COSTS_N_INSNS (5), /* float load */
421 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
422 COSTS_N_INSNS (11), /* fadd, fsub */
423 COSTS_N_INSNS (11), /* fcmp */
424 COSTS_N_INSNS (11), /* fmov, fmovr */
425 COSTS_N_INSNS (11), /* fmul */
426 COSTS_N_INSNS (24), /* fdivs */
427 COSTS_N_INSNS (37), /* fdivd */
428 COSTS_N_INSNS (24), /* fsqrts */
429 COSTS_N_INSNS (37), /* fsqrtd */
430 COSTS_N_INSNS (12), /* imul */
431 COSTS_N_INSNS (12), /* imulX */
432 0, /* imul bit factor */
433 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
434 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
435 COSTS_N_INSNS (1), /* movcc/movr */
436 0, /* shift penalty */
439 static const struct processor_costs *sparc_costs = &cypress_costs;
441 #ifdef HAVE_AS_RELAX_OPTION
442 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
443 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
444 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
445 somebody does not branch between the sethi and jmp. */
446 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
447 #else
448 #define LEAF_SIBCALL_SLOT_RESERVED_P \
449 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
450 #endif
452 /* Vector to say how input registers are mapped to output registers.
453 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
454 eliminate it. You must use -fomit-frame-pointer to get that. */
455 char leaf_reg_remap[] =
456 { 0, 1, 2, 3, 4, 5, 6, 7,
457 -1, -1, -1, -1, -1, -1, 14, -1,
458 -1, -1, -1, -1, -1, -1, -1, -1,
459 8, 9, 10, 11, 12, 13, -1, 15,
461 32, 33, 34, 35, 36, 37, 38, 39,
462 40, 41, 42, 43, 44, 45, 46, 47,
463 48, 49, 50, 51, 52, 53, 54, 55,
464 56, 57, 58, 59, 60, 61, 62, 63,
465 64, 65, 66, 67, 68, 69, 70, 71,
466 72, 73, 74, 75, 76, 77, 78, 79,
467 80, 81, 82, 83, 84, 85, 86, 87,
468 88, 89, 90, 91, 92, 93, 94, 95,
469 96, 97, 98, 99, 100, 101, 102};
471 /* Vector, indexed by hard register number, which contains 1
472 for a register that is allowable in a candidate for leaf
473 function treatment. */
474 char sparc_leaf_regs[] =
475 { 1, 1, 1, 1, 1, 1, 1, 1,
476 0, 0, 0, 0, 0, 0, 1, 0,
477 0, 0, 0, 0, 0, 0, 0, 0,
478 1, 1, 1, 1, 1, 1, 0, 1,
479 1, 1, 1, 1, 1, 1, 1, 1,
480 1, 1, 1, 1, 1, 1, 1, 1,
481 1, 1, 1, 1, 1, 1, 1, 1,
482 1, 1, 1, 1, 1, 1, 1, 1,
483 1, 1, 1, 1, 1, 1, 1, 1,
484 1, 1, 1, 1, 1, 1, 1, 1,
485 1, 1, 1, 1, 1, 1, 1, 1,
486 1, 1, 1, 1, 1, 1, 1, 1,
487 1, 1, 1, 1, 1, 1, 1};
489 struct GTY(()) machine_function
491 /* Size of the frame of the function. */
492 HOST_WIDE_INT frame_size;
494 /* Size of the frame of the function minus the register window save area
495 and the outgoing argument area. */
496 HOST_WIDE_INT apparent_frame_size;
498 /* Register we pretend the frame pointer is allocated to. Normally, this
499 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
500 record "offset" separately as it may be too big for (reg + disp). */
501 rtx frame_base_reg;
502 HOST_WIDE_INT frame_base_offset;
504 /* Number of global or FP registers to be saved (as 4-byte quantities). */
505 int n_global_fp_regs;
507 /* True if the current function is leaf and uses only leaf regs,
508 so that the SPARC leaf function optimization can be applied.
509 Private version of crtl->uses_only_leaf_regs, see
510 sparc_expand_prologue for the rationale. */
511 int leaf_function_p;
513 /* True if the prologue saves local or in registers. */
514 bool save_local_in_regs_p;
516 /* True if the data calculated by sparc_expand_prologue are valid. */
517 bool prologue_data_valid_p;
520 #define sparc_frame_size cfun->machine->frame_size
521 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
522 #define sparc_frame_base_reg cfun->machine->frame_base_reg
523 #define sparc_frame_base_offset cfun->machine->frame_base_offset
524 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
525 #define sparc_leaf_function_p cfun->machine->leaf_function_p
526 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
527 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
529 /* 1 if the next opcode is to be specially indented. */
530 int sparc_indent_opcode = 0;
532 static void sparc_option_override (void);
533 static void sparc_init_modes (void);
534 static void scan_record_type (const_tree, int *, int *, int *);
535 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
536 const_tree, bool, bool, int *, int *);
538 static int supersparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
539 static int hypersparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
541 static void sparc_emit_set_const32 (rtx, rtx);
542 static void sparc_emit_set_const64 (rtx, rtx);
543 static void sparc_output_addr_vec (rtx);
544 static void sparc_output_addr_diff_vec (rtx);
545 static void sparc_output_deferred_case_vectors (void);
546 static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
547 static bool sparc_legitimate_constant_p (enum machine_mode, rtx);
548 static rtx sparc_builtin_saveregs (void);
549 static int epilogue_renumber (rtx *, int);
550 static bool sparc_assemble_integer (rtx, unsigned int, int);
551 static int set_extends (rtx_insn *);
552 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
553 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
554 #ifdef TARGET_SOLARIS
555 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
556 tree) ATTRIBUTE_UNUSED;
557 #endif
558 static int sparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
559 static int sparc_issue_rate (void);
560 static void sparc_sched_init (FILE *, int, int);
561 static int sparc_use_sched_lookahead (void);
563 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
564 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
565 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
566 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
567 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
569 static bool sparc_function_ok_for_sibcall (tree, tree);
570 static void sparc_init_libfuncs (void);
571 static void sparc_init_builtins (void);
572 static void sparc_fpu_init_builtins (void);
573 static void sparc_vis_init_builtins (void);
574 static tree sparc_builtin_decl (unsigned, bool);
575 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
576 static tree sparc_fold_builtin (tree, int, tree *, bool);
577 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
578 HOST_WIDE_INT, tree);
579 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
580 HOST_WIDE_INT, const_tree);
581 static struct machine_function * sparc_init_machine_status (void);
582 static bool sparc_cannot_force_const_mem (enum machine_mode, rtx);
583 static rtx sparc_tls_get_addr (void);
584 static rtx sparc_tls_got (void);
585 static int sparc_register_move_cost (enum machine_mode,
586 reg_class_t, reg_class_t);
587 static bool sparc_rtx_costs (rtx, int, int, int, int *, bool);
588 static rtx sparc_function_value (const_tree, const_tree, bool);
589 static rtx sparc_libcall_value (enum machine_mode, const_rtx);
590 static bool sparc_function_value_regno_p (const unsigned int);
591 static rtx sparc_struct_value_rtx (tree, int);
592 static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode,
593 int *, const_tree, int);
594 static bool sparc_return_in_memory (const_tree, const_tree);
595 static bool sparc_strict_argument_naming (cumulative_args_t);
596 static void sparc_va_start (tree, rtx);
597 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
598 static bool sparc_vector_mode_supported_p (enum machine_mode);
599 static bool sparc_tls_referenced_p (rtx);
600 static rtx sparc_legitimize_tls_address (rtx);
601 static rtx sparc_legitimize_pic_address (rtx, rtx);
602 static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
603 static rtx sparc_delegitimize_address (rtx);
604 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
605 static bool sparc_pass_by_reference (cumulative_args_t,
606 enum machine_mode, const_tree, bool);
607 static void sparc_function_arg_advance (cumulative_args_t,
608 enum machine_mode, const_tree, bool);
609 static rtx sparc_function_arg_1 (cumulative_args_t,
610 enum machine_mode, const_tree, bool, bool);
611 static rtx sparc_function_arg (cumulative_args_t,
612 enum machine_mode, const_tree, bool);
613 static rtx sparc_function_incoming_arg (cumulative_args_t,
614 enum machine_mode, const_tree, bool);
615 static unsigned int sparc_function_arg_boundary (enum machine_mode,
616 const_tree);
617 static int sparc_arg_partial_bytes (cumulative_args_t,
618 enum machine_mode, tree, bool);
619 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
620 static void sparc_file_end (void);
621 static bool sparc_frame_pointer_required (void);
622 static bool sparc_can_eliminate (const int, const int);
623 static rtx sparc_builtin_setjmp_frame_value (void);
624 static void sparc_conditional_register_usage (void);
625 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
626 static const char *sparc_mangle_type (const_tree);
627 #endif
628 static void sparc_trampoline_init (rtx, tree, rtx);
629 static enum machine_mode sparc_preferred_simd_mode (enum machine_mode);
630 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
631 static bool sparc_print_operand_punct_valid_p (unsigned char);
632 static void sparc_print_operand (FILE *, rtx, int);
633 static void sparc_print_operand_address (FILE *, rtx);
634 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
635 enum machine_mode,
636 secondary_reload_info *);
637 static enum machine_mode sparc_cstore_mode (enum insn_code icode);
638 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
640 #ifdef SUBTARGET_ATTRIBUTE_TABLE
641 /* Table of valid machine attributes. */
642 static const struct attribute_spec sparc_attribute_table[] =
644 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
645 do_diagnostic } */
646 SUBTARGET_ATTRIBUTE_TABLE,
647 { NULL, 0, 0, false, false, false, NULL, false }
649 #endif
651 /* Option handling. */
653 /* Parsed value. */
654 enum cmodel sparc_cmodel;
656 char sparc_hard_reg_printed[8];
658 /* Initialize the GCC target structure. */
660 /* The default is to use .half rather than .short for aligned HI objects. */
661 #undef TARGET_ASM_ALIGNED_HI_OP
662 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
664 #undef TARGET_ASM_UNALIGNED_HI_OP
665 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
666 #undef TARGET_ASM_UNALIGNED_SI_OP
667 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
668 #undef TARGET_ASM_UNALIGNED_DI_OP
669 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
671 /* The target hook has to handle DI-mode values. */
672 #undef TARGET_ASM_INTEGER
673 #define TARGET_ASM_INTEGER sparc_assemble_integer
675 #undef TARGET_ASM_FUNCTION_PROLOGUE
676 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
677 #undef TARGET_ASM_FUNCTION_EPILOGUE
678 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
680 #undef TARGET_SCHED_ADJUST_COST
681 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
682 #undef TARGET_SCHED_ISSUE_RATE
683 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
684 #undef TARGET_SCHED_INIT
685 #define TARGET_SCHED_INIT sparc_sched_init
686 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
687 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
689 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
690 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
692 #undef TARGET_INIT_LIBFUNCS
693 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
695 #undef TARGET_LEGITIMIZE_ADDRESS
696 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
697 #undef TARGET_DELEGITIMIZE_ADDRESS
698 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
699 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
700 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
702 #undef TARGET_INIT_BUILTINS
703 #define TARGET_INIT_BUILTINS sparc_init_builtins
704 #undef TARGET_BUILTIN_DECL
705 #define TARGET_BUILTIN_DECL sparc_builtin_decl
706 #undef TARGET_EXPAND_BUILTIN
707 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
708 #undef TARGET_FOLD_BUILTIN
709 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
711 #if TARGET_TLS
712 #undef TARGET_HAVE_TLS
713 #define TARGET_HAVE_TLS true
714 #endif
716 #undef TARGET_CANNOT_FORCE_CONST_MEM
717 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
719 #undef TARGET_ASM_OUTPUT_MI_THUNK
720 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
721 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
722 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
724 #undef TARGET_RTX_COSTS
725 #define TARGET_RTX_COSTS sparc_rtx_costs
726 #undef TARGET_ADDRESS_COST
727 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
728 #undef TARGET_REGISTER_MOVE_COST
729 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
731 #undef TARGET_PROMOTE_FUNCTION_MODE
732 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
734 #undef TARGET_FUNCTION_VALUE
735 #define TARGET_FUNCTION_VALUE sparc_function_value
736 #undef TARGET_LIBCALL_VALUE
737 #define TARGET_LIBCALL_VALUE sparc_libcall_value
738 #undef TARGET_FUNCTION_VALUE_REGNO_P
739 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
741 #undef TARGET_STRUCT_VALUE_RTX
742 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
743 #undef TARGET_RETURN_IN_MEMORY
744 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
745 #undef TARGET_MUST_PASS_IN_STACK
746 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
747 #undef TARGET_PASS_BY_REFERENCE
748 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
749 #undef TARGET_ARG_PARTIAL_BYTES
750 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
751 #undef TARGET_FUNCTION_ARG_ADVANCE
752 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
753 #undef TARGET_FUNCTION_ARG
754 #define TARGET_FUNCTION_ARG sparc_function_arg
755 #undef TARGET_FUNCTION_INCOMING_ARG
756 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
757 #undef TARGET_FUNCTION_ARG_BOUNDARY
758 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
760 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
761 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
762 #undef TARGET_STRICT_ARGUMENT_NAMING
763 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
765 #undef TARGET_EXPAND_BUILTIN_VA_START
766 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
767 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
768 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
770 #undef TARGET_VECTOR_MODE_SUPPORTED_P
771 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
773 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
774 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
776 #ifdef SUBTARGET_INSERT_ATTRIBUTES
777 #undef TARGET_INSERT_ATTRIBUTES
778 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
779 #endif
781 #ifdef SUBTARGET_ATTRIBUTE_TABLE
782 #undef TARGET_ATTRIBUTE_TABLE
783 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
784 #endif
786 #undef TARGET_RELAXED_ORDERING
787 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
789 #undef TARGET_OPTION_OVERRIDE
790 #define TARGET_OPTION_OVERRIDE sparc_option_override
792 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
793 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
794 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
795 #endif
797 #undef TARGET_ASM_FILE_END
798 #define TARGET_ASM_FILE_END sparc_file_end
800 #undef TARGET_FRAME_POINTER_REQUIRED
801 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
803 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
804 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
806 #undef TARGET_CAN_ELIMINATE
807 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
809 #undef TARGET_PREFERRED_RELOAD_CLASS
810 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
812 #undef TARGET_SECONDARY_RELOAD
813 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
815 #undef TARGET_CONDITIONAL_REGISTER_USAGE
816 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
818 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
819 #undef TARGET_MANGLE_TYPE
820 #define TARGET_MANGLE_TYPE sparc_mangle_type
821 #endif
823 #undef TARGET_LEGITIMATE_ADDRESS_P
824 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
826 #undef TARGET_LEGITIMATE_CONSTANT_P
827 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
829 #undef TARGET_TRAMPOLINE_INIT
830 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
832 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
833 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
834 #undef TARGET_PRINT_OPERAND
835 #define TARGET_PRINT_OPERAND sparc_print_operand
836 #undef TARGET_PRINT_OPERAND_ADDRESS
837 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
839 /* The value stored by LDSTUB. */
840 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
841 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
843 #undef TARGET_CSTORE_MODE
844 #define TARGET_CSTORE_MODE sparc_cstore_mode
846 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
847 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
849 struct gcc_target targetm = TARGET_INITIALIZER;
851 /* Return the memory reference contained in X if any, zero otherwise. */
853 static rtx
854 mem_ref (rtx x)
856 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
857 x = XEXP (x, 0);
859 if (MEM_P (x))
860 return x;
862 return NULL_RTX;
865 /* We use a machine specific pass to enable workarounds for errata.
866 We need to have the (essentially) final form of the insn stream in order
867 to properly detect the various hazards. Therefore, this machine specific
868 pass runs as late as possible. The pass is inserted in the pass pipeline
869 at the end of sparc_option_override. */
871 static unsigned int
872 sparc_do_work_around_errata (void)
874 rtx_insn *insn, *next;
876 /* Force all instructions to be split into their final form. */
877 split_all_insns_noflow ();
879 /* Now look for specific patterns in the insn stream. */
880 for (insn = get_insns (); insn; insn = next)
882 bool insert_nop = false;
883 rtx set;
885 /* Look into the instruction in a delay slot. */
886 if (NONJUMP_INSN_P (insn))
887 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
888 insn = seq->insn (1);
890 /* Look for a single-word load into an odd-numbered FP register. */
891 if (sparc_fix_at697f
892 && NONJUMP_INSN_P (insn)
893 && (set = single_set (insn)) != NULL_RTX
894 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
895 && MEM_P (SET_SRC (set))
896 && REG_P (SET_DEST (set))
897 && REGNO (SET_DEST (set)) > 31
898 && REGNO (SET_DEST (set)) % 2 != 0)
900 /* The wrong dependency is on the enclosing double register. */
901 const unsigned int x = REGNO (SET_DEST (set)) - 1;
902 unsigned int src1, src2, dest;
903 int code;
905 next = next_active_insn (insn);
906 if (!next)
907 break;
908 /* If the insn is a branch, then it cannot be problematic. */
909 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
910 continue;
912 extract_insn (next);
913 code = INSN_CODE (next);
915 switch (code)
917 case CODE_FOR_adddf3:
918 case CODE_FOR_subdf3:
919 case CODE_FOR_muldf3:
920 case CODE_FOR_divdf3:
921 dest = REGNO (recog_data.operand[0]);
922 src1 = REGNO (recog_data.operand[1]);
923 src2 = REGNO (recog_data.operand[2]);
924 if (src1 != src2)
926 /* Case [1-4]:
927 ld [address], %fx+1
928 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
929 if ((src1 == x || src2 == x)
930 && (dest == src1 || dest == src2))
931 insert_nop = true;
933 else
935 /* Case 5:
936 ld [address], %fx+1
937 FPOPd %fx, %fx, %fx */
938 if (src1 == x
939 && dest == src1
940 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
941 insert_nop = true;
943 break;
945 case CODE_FOR_sqrtdf2:
946 dest = REGNO (recog_data.operand[0]);
947 src1 = REGNO (recog_data.operand[1]);
948 /* Case 6:
949 ld [address], %fx+1
950 fsqrtd %fx, %fx */
951 if (src1 == x && dest == src1)
952 insert_nop = true;
953 break;
955 default:
956 break;
960 /* Look for a single-word load into an integer register. */
961 else if (sparc_fix_ut699
962 && NONJUMP_INSN_P (insn)
963 && (set = single_set (insn)) != NULL_RTX
964 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
965 && mem_ref (SET_SRC (set)) != NULL_RTX
966 && REG_P (SET_DEST (set))
967 && REGNO (SET_DEST (set)) < 32)
969 /* There is no problem if the second memory access has a data
970 dependency on the first single-cycle load. */
971 rtx x = SET_DEST (set);
973 next = next_active_insn (insn);
974 if (!next)
975 break;
976 /* If the insn is a branch, then it cannot be problematic. */
977 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
978 continue;
980 /* Look for a second memory access to/from an integer register. */
981 if ((set = single_set (next)) != NULL_RTX)
983 rtx src = SET_SRC (set);
984 rtx dest = SET_DEST (set);
985 rtx mem;
987 /* LDD is affected. */
988 if ((mem = mem_ref (src)) != NULL_RTX
989 && REG_P (dest)
990 && REGNO (dest) < 32
991 && !reg_mentioned_p (x, XEXP (mem, 0)))
992 insert_nop = true;
994 /* STD is *not* affected. */
995 else if (MEM_P (dest)
996 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
997 && (src == CONST0_RTX (GET_MODE (dest))
998 || (REG_P (src)
999 && REGNO (src) < 32
1000 && REGNO (src) != REGNO (x)))
1001 && !reg_mentioned_p (x, XEXP (dest, 0)))
1002 insert_nop = true;
1006 /* Look for a single-word load/operation into an FP register. */
1007 else if (sparc_fix_ut699
1008 && NONJUMP_INSN_P (insn)
1009 && (set = single_set (insn)) != NULL_RTX
1010 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1011 && REG_P (SET_DEST (set))
1012 && REGNO (SET_DEST (set)) > 31)
1014 /* Number of instructions in the problematic window. */
1015 const int n_insns = 4;
1016 /* The problematic combination is with the sibling FP register. */
1017 const unsigned int x = REGNO (SET_DEST (set));
1018 const unsigned int y = x ^ 1;
1019 rtx after;
1020 int i;
1022 next = next_active_insn (insn);
1023 if (!next)
1024 break;
1025 /* If the insn is a branch, then it cannot be problematic. */
1026 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1027 continue;
1029 /* Look for a second load/operation into the sibling FP register. */
1030 if (!((set = single_set (next)) != NULL_RTX
1031 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1032 && REG_P (SET_DEST (set))
1033 && REGNO (SET_DEST (set)) == y))
1034 continue;
1036 /* Look for a (possible) store from the FP register in the next N
1037 instructions, but bail out if it is again modified or if there
1038 is a store from the sibling FP register before this store. */
1039 for (after = next, i = 0; i < n_insns; i++)
1041 bool branch_p;
1043 after = next_active_insn (after);
1044 if (!after)
1045 break;
1047 /* This is a branch with an empty delay slot. */
1048 if (!NONJUMP_INSN_P (after))
1050 if (++i == n_insns)
1051 break;
1052 branch_p = true;
1053 after = NULL_RTX;
1055 /* This is a branch with a filled delay slot. */
1056 else if (GET_CODE (PATTERN (after)) == SEQUENCE)
1058 if (++i == n_insns)
1059 break;
1060 branch_p = true;
1061 after = XVECEXP (PATTERN (after), 0, 1);
1063 /* This is a regular instruction. */
1064 else
1065 branch_p = false;
1067 if (after && (set = single_set (after)) != NULL_RTX)
1069 const rtx src = SET_SRC (set);
1070 const rtx dest = SET_DEST (set);
1071 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1073 /* If the FP register is again modified before the store,
1074 then the store isn't affected. */
1075 if (REG_P (dest)
1076 && (REGNO (dest) == x
1077 || (REGNO (dest) == y && size == 8)))
1078 break;
1080 if (MEM_P (dest) && REG_P (src))
1082 /* If there is a store from the sibling FP register
1083 before the store, then the store is not affected. */
1084 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1085 break;
1087 /* Otherwise, the store is affected. */
1088 if (REGNO (src) == x && size == 4)
1090 insert_nop = true;
1091 break;
1096 /* If we have a branch in the first M instructions, then we
1097 cannot see the (M+2)th instruction so we play safe. */
1098 if (branch_p && i <= (n_insns - 2))
1100 insert_nop = true;
1101 break;
1106 else
1107 next = NEXT_INSN (insn);
1109 if (insert_nop)
1110 emit_insn_before (gen_nop (), next);
1113 return 0;
1116 namespace {
1118 const pass_data pass_data_work_around_errata =
1120 RTL_PASS, /* type */
1121 "errata", /* name */
1122 OPTGROUP_NONE, /* optinfo_flags */
1123 TV_MACH_DEP, /* tv_id */
1124 0, /* properties_required */
1125 0, /* properties_provided */
1126 0, /* properties_destroyed */
1127 0, /* todo_flags_start */
1128 0, /* todo_flags_finish */
1131 class pass_work_around_errata : public rtl_opt_pass
1133 public:
1134 pass_work_around_errata(gcc::context *ctxt)
1135 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1138 /* opt_pass methods: */
1139 virtual bool gate (function *)
1141 /* The only errata we handle are those of the AT697F and UT699. */
1142 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
1145 virtual unsigned int execute (function *)
1147 return sparc_do_work_around_errata ();
1150 }; // class pass_work_around_errata
1152 } // anon namespace
1154 rtl_opt_pass *
1155 make_pass_work_around_errata (gcc::context *ctxt)
1157 return new pass_work_around_errata (ctxt);
1160 /* Helpers for TARGET_DEBUG_OPTIONS. */
1161 static void
1162 dump_target_flag_bits (const int flags)
1164 if (flags & MASK_64BIT)
1165 fprintf (stderr, "64BIT ");
1166 if (flags & MASK_APP_REGS)
1167 fprintf (stderr, "APP_REGS ");
1168 if (flags & MASK_FASTER_STRUCTS)
1169 fprintf (stderr, "FASTER_STRUCTS ");
1170 if (flags & MASK_FLAT)
1171 fprintf (stderr, "FLAT ");
1172 if (flags & MASK_FMAF)
1173 fprintf (stderr, "FMAF ");
1174 if (flags & MASK_FPU)
1175 fprintf (stderr, "FPU ");
1176 if (flags & MASK_HARD_QUAD)
1177 fprintf (stderr, "HARD_QUAD ");
1178 if (flags & MASK_POPC)
1179 fprintf (stderr, "POPC ");
1180 if (flags & MASK_PTR64)
1181 fprintf (stderr, "PTR64 ");
1182 if (flags & MASK_STACK_BIAS)
1183 fprintf (stderr, "STACK_BIAS ");
1184 if (flags & MASK_UNALIGNED_DOUBLES)
1185 fprintf (stderr, "UNALIGNED_DOUBLES ");
1186 if (flags & MASK_V8PLUS)
1187 fprintf (stderr, "V8PLUS ");
1188 if (flags & MASK_VIS)
1189 fprintf (stderr, "VIS ");
1190 if (flags & MASK_VIS2)
1191 fprintf (stderr, "VIS2 ");
1192 if (flags & MASK_VIS3)
1193 fprintf (stderr, "VIS3 ");
1194 if (flags & MASK_CBCOND)
1195 fprintf (stderr, "CBCOND ");
1196 if (flags & MASK_DEPRECATED_V8_INSNS)
1197 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1198 if (flags & MASK_SPARCLET)
1199 fprintf (stderr, "SPARCLET ");
1200 if (flags & MASK_SPARCLITE)
1201 fprintf (stderr, "SPARCLITE ");
1202 if (flags & MASK_V8)
1203 fprintf (stderr, "V8 ");
1204 if (flags & MASK_V9)
1205 fprintf (stderr, "V9 ");
1208 static void
1209 dump_target_flags (const char *prefix, const int flags)
1211 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1212 dump_target_flag_bits (flags);
1213 fprintf(stderr, "]\n");
1216 /* Validate and override various options, and do some machine dependent
1217 initialization. */
1219 static void
1220 sparc_option_override (void)
1222 static struct code_model {
1223 const char *const name;
1224 const enum cmodel value;
1225 } const cmodels[] = {
1226 { "32", CM_32 },
1227 { "medlow", CM_MEDLOW },
1228 { "medmid", CM_MEDMID },
1229 { "medany", CM_MEDANY },
1230 { "embmedany", CM_EMBMEDANY },
1231 { NULL, (enum cmodel) 0 }
1233 const struct code_model *cmodel;
1234 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1235 static struct cpu_default {
1236 const int cpu;
1237 const enum processor_type processor;
1238 } const cpu_default[] = {
1239 /* There must be one entry here for each TARGET_CPU value. */
1240 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1241 { TARGET_CPU_v8, PROCESSOR_V8 },
1242 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1243 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1244 { TARGET_CPU_leon, PROCESSOR_LEON },
1245 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1246 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1247 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1248 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1249 { TARGET_CPU_v9, PROCESSOR_V9 },
1250 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1251 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1252 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1253 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1254 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1255 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1256 { -1, PROCESSOR_V7 }
1258 const struct cpu_default *def;
1259 /* Table of values for -m{cpu,tune}=. This must match the order of
1260 the enum processor_type in sparc-opts.h. */
1261 static struct cpu_table {
1262 const char *const name;
1263 const int disable;
1264 const int enable;
1265 } const cpu_table[] = {
1266 { "v7", MASK_ISA, 0 },
1267 { "cypress", MASK_ISA, 0 },
1268 { "v8", MASK_ISA, MASK_V8 },
1269 /* TI TMS390Z55 supersparc */
1270 { "supersparc", MASK_ISA, MASK_V8 },
1271 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1272 { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
1273 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
1274 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1275 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1276 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1277 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1278 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1279 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1280 { "sparclet", MASK_ISA, MASK_SPARCLET },
1281 /* TEMIC sparclet */
1282 { "tsc701", MASK_ISA, MASK_SPARCLET },
1283 { "v9", MASK_ISA, MASK_V9 },
1284 /* UltraSPARC I, II, IIi */
1285 { "ultrasparc", MASK_ISA,
1286 /* Although insns using %y are deprecated, it is a clear win. */
1287 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1288 /* UltraSPARC III */
1289 /* ??? Check if %y issue still holds true. */
1290 { "ultrasparc3", MASK_ISA,
1291 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1292 /* UltraSPARC T1 */
1293 { "niagara", MASK_ISA,
1294 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1295 /* UltraSPARC T2 */
1296 { "niagara2", MASK_ISA,
1297 MASK_V9|MASK_POPC|MASK_VIS2 },
1298 /* UltraSPARC T3 */
1299 { "niagara3", MASK_ISA,
1300 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
1301 /* UltraSPARC T4 */
1302 { "niagara4", MASK_ISA,
1303 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1305 const struct cpu_table *cpu;
1306 unsigned int i;
1307 int fpu;
1309 if (sparc_debug_string != NULL)
1311 const char *q;
1312 char *p;
1314 p = ASTRDUP (sparc_debug_string);
1315 while ((q = strtok (p, ",")) != NULL)
1317 bool invert;
1318 int mask;
1320 p = NULL;
1321 if (*q == '!')
1323 invert = true;
1324 q++;
1326 else
1327 invert = false;
1329 if (! strcmp (q, "all"))
1330 mask = MASK_DEBUG_ALL;
1331 else if (! strcmp (q, "options"))
1332 mask = MASK_DEBUG_OPTIONS;
1333 else
1334 error ("unknown -mdebug-%s switch", q);
1336 if (invert)
1337 sparc_debug &= ~mask;
1338 else
1339 sparc_debug |= mask;
1343 if (TARGET_DEBUG_OPTIONS)
1345 dump_target_flags("Initial target_flags", target_flags);
1346 dump_target_flags("target_flags_explicit", target_flags_explicit);
1349 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1350 SUBTARGET_OVERRIDE_OPTIONS;
1351 #endif
1353 #ifndef SPARC_BI_ARCH
1354 /* Check for unsupported architecture size. */
1355 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1356 error ("%s is not supported by this configuration",
1357 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1358 #endif
1360 /* We force all 64bit archs to use 128 bit long double */
1361 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1363 error ("-mlong-double-64 not allowed with -m64");
1364 target_flags |= MASK_LONG_DOUBLE_128;
1367 /* Code model selection. */
1368 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1370 #ifdef SPARC_BI_ARCH
1371 if (TARGET_ARCH32)
1372 sparc_cmodel = CM_32;
1373 #endif
1375 if (sparc_cmodel_string != NULL)
1377 if (TARGET_ARCH64)
1379 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1380 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1381 break;
1382 if (cmodel->name == NULL)
1383 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1384 else
1385 sparc_cmodel = cmodel->value;
1387 else
1388 error ("-mcmodel= is not supported on 32 bit systems");
1391 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1392 for (i = 8; i < 16; i++)
1393 if (!call_used_regs [i])
1395 error ("-fcall-saved-REG is not supported for out registers");
1396 call_used_regs [i] = 1;
1399 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1401 /* Set the default CPU. */
1402 if (!global_options_set.x_sparc_cpu_and_features)
1404 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1405 if (def->cpu == TARGET_CPU_DEFAULT)
1406 break;
1407 gcc_assert (def->cpu != -1);
1408 sparc_cpu_and_features = def->processor;
1411 if (!global_options_set.x_sparc_cpu)
1412 sparc_cpu = sparc_cpu_and_features;
1414 cpu = &cpu_table[(int) sparc_cpu_and_features];
1416 if (TARGET_DEBUG_OPTIONS)
1418 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1419 fprintf (stderr, "sparc_cpu: %s\n",
1420 cpu_table[(int) sparc_cpu].name);
1421 dump_target_flags ("cpu->disable", cpu->disable);
1422 dump_target_flags ("cpu->enable", cpu->enable);
1425 target_flags &= ~cpu->disable;
1426 target_flags |= (cpu->enable
1427 #ifndef HAVE_AS_FMAF_HPC_VIS3
1428 & ~(MASK_FMAF | MASK_VIS3)
1429 #endif
1430 #ifndef HAVE_AS_SPARC4
1431 & ~MASK_CBCOND
1432 #endif
1433 #ifndef HAVE_AS_LEON
1434 & ~(MASK_LEON | MASK_LEON3)
1435 #endif
1438 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1439 the processor default. */
1440 if (target_flags_explicit & MASK_FPU)
1441 target_flags = (target_flags & ~MASK_FPU) | fpu;
1443 /* -mvis2 implies -mvis */
1444 if (TARGET_VIS2)
1445 target_flags |= MASK_VIS;
1447 /* -mvis3 implies -mvis2 and -mvis */
1448 if (TARGET_VIS3)
1449 target_flags |= MASK_VIS2 | MASK_VIS;
1451 /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is
1452 disabled. */
1453 if (! TARGET_FPU)
1454 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF);
1456 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1457 are available.
1458 -m64 also implies v9. */
1459 if (TARGET_VIS || TARGET_ARCH64)
1461 target_flags |= MASK_V9;
1462 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1465 /* -mvis also implies -mv8plus on 32-bit */
1466 if (TARGET_VIS && ! TARGET_ARCH64)
1467 target_flags |= MASK_V8PLUS;
1469 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1470 if (TARGET_V9 && TARGET_ARCH32)
1471 target_flags |= MASK_DEPRECATED_V8_INSNS;
1473 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1474 if (! TARGET_V9 || TARGET_ARCH64)
1475 target_flags &= ~MASK_V8PLUS;
1477 /* Don't use stack biasing in 32 bit mode. */
1478 if (TARGET_ARCH32)
1479 target_flags &= ~MASK_STACK_BIAS;
1481 /* Supply a default value for align_functions. */
1482 if (align_functions == 0
1483 && (sparc_cpu == PROCESSOR_ULTRASPARC
1484 || sparc_cpu == PROCESSOR_ULTRASPARC3
1485 || sparc_cpu == PROCESSOR_NIAGARA
1486 || sparc_cpu == PROCESSOR_NIAGARA2
1487 || sparc_cpu == PROCESSOR_NIAGARA3
1488 || sparc_cpu == PROCESSOR_NIAGARA4))
1489 align_functions = 32;
1491 /* Validate PCC_STRUCT_RETURN. */
1492 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1493 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1495 /* Only use .uaxword when compiling for a 64-bit target. */
1496 if (!TARGET_ARCH64)
1497 targetm.asm_out.unaligned_op.di = NULL;
1499 /* Do various machine dependent initializations. */
1500 sparc_init_modes ();
1502 /* Set up function hooks. */
1503 init_machine_status = sparc_init_machine_status;
1505 switch (sparc_cpu)
1507 case PROCESSOR_V7:
1508 case PROCESSOR_CYPRESS:
1509 sparc_costs = &cypress_costs;
1510 break;
1511 case PROCESSOR_V8:
1512 case PROCESSOR_SPARCLITE:
1513 case PROCESSOR_SUPERSPARC:
1514 sparc_costs = &supersparc_costs;
1515 break;
1516 case PROCESSOR_F930:
1517 case PROCESSOR_F934:
1518 case PROCESSOR_HYPERSPARC:
1519 case PROCESSOR_SPARCLITE86X:
1520 sparc_costs = &hypersparc_costs;
1521 break;
1522 case PROCESSOR_LEON:
1523 sparc_costs = &leon_costs;
1524 break;
1525 case PROCESSOR_LEON3:
1526 sparc_costs = &leon3_costs;
1527 break;
1528 case PROCESSOR_SPARCLET:
1529 case PROCESSOR_TSC701:
1530 sparc_costs = &sparclet_costs;
1531 break;
1532 case PROCESSOR_V9:
1533 case PROCESSOR_ULTRASPARC:
1534 sparc_costs = &ultrasparc_costs;
1535 break;
1536 case PROCESSOR_ULTRASPARC3:
1537 sparc_costs = &ultrasparc3_costs;
1538 break;
1539 case PROCESSOR_NIAGARA:
1540 sparc_costs = &niagara_costs;
1541 break;
1542 case PROCESSOR_NIAGARA2:
1543 sparc_costs = &niagara2_costs;
1544 break;
1545 case PROCESSOR_NIAGARA3:
1546 sparc_costs = &niagara3_costs;
1547 break;
1548 case PROCESSOR_NIAGARA4:
1549 sparc_costs = &niagara4_costs;
1550 break;
1551 case PROCESSOR_NATIVE:
1552 gcc_unreachable ();
1555 if (sparc_memory_model == SMM_DEFAULT)
1557 /* Choose the memory model for the operating system. */
1558 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1559 if (os_default != SMM_DEFAULT)
1560 sparc_memory_model = os_default;
1561 /* Choose the most relaxed model for the processor. */
1562 else if (TARGET_V9)
1563 sparc_memory_model = SMM_RMO;
1564 else if (TARGET_LEON3)
1565 sparc_memory_model = SMM_TSO;
1566 else if (TARGET_LEON)
1567 sparc_memory_model = SMM_SC;
1568 else if (TARGET_V8)
1569 sparc_memory_model = SMM_PSO;
1570 else
1571 sparc_memory_model = SMM_SC;
1574 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1575 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1576 target_flags |= MASK_LONG_DOUBLE_128;
1577 #endif
1579 if (TARGET_DEBUG_OPTIONS)
1580 dump_target_flags ("Final target_flags", target_flags);
1582 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1583 ((sparc_cpu == PROCESSOR_ULTRASPARC
1584 || sparc_cpu == PROCESSOR_NIAGARA
1585 || sparc_cpu == PROCESSOR_NIAGARA2
1586 || sparc_cpu == PROCESSOR_NIAGARA3
1587 || sparc_cpu == PROCESSOR_NIAGARA4)
1589 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1590 ? 8 : 3)),
1591 global_options.x_param_values,
1592 global_options_set.x_param_values);
1593 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1594 ((sparc_cpu == PROCESSOR_ULTRASPARC
1595 || sparc_cpu == PROCESSOR_ULTRASPARC3
1596 || sparc_cpu == PROCESSOR_NIAGARA
1597 || sparc_cpu == PROCESSOR_NIAGARA2
1598 || sparc_cpu == PROCESSOR_NIAGARA3
1599 || sparc_cpu == PROCESSOR_NIAGARA4)
1600 ? 64 : 32),
1601 global_options.x_param_values,
1602 global_options_set.x_param_values);
1604 /* Disable save slot sharing for call-clobbered registers by default.
1605 The IRA sharing algorithm works on single registers only and this
1606 pessimizes for double floating-point registers. */
1607 if (!global_options_set.x_flag_ira_share_save_slots)
1608 flag_ira_share_save_slots = 0;
1610 /* We register a machine specific pass to work around errata, if any.
1611 The pass mut be scheduled as late as possible so that we have the
1612 (essentially) final form of the insn stream to work on.
1613 Registering the pass must be done at start up. It's convenient to
1614 do it here. */
1615 opt_pass *errata_pass = make_pass_work_around_errata (g);
1616 struct register_pass_info insert_pass_work_around_errata =
1618 errata_pass, /* pass */
1619 "dbr", /* reference_pass_name */
1620 1, /* ref_pass_instance_number */
1621 PASS_POS_INSERT_AFTER /* po_op */
1623 register_pass (&insert_pass_work_around_errata);
1626 /* Miscellaneous utilities. */
1628 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1629 or branch on register contents instructions. */
1632 v9_regcmp_p (enum rtx_code code)
1634 return (code == EQ || code == NE || code == GE || code == LT
1635 || code == LE || code == GT);
1638 /* Nonzero if OP is a floating point constant which can
1639 be loaded into an integer register using a single
1640 sethi instruction. */
1643 fp_sethi_p (rtx op)
1645 if (GET_CODE (op) == CONST_DOUBLE)
1647 REAL_VALUE_TYPE r;
1648 long i;
1650 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1651 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1652 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1655 return 0;
1658 /* Nonzero if OP is a floating point constant which can
1659 be loaded into an integer register using a single
1660 mov instruction. */
1663 fp_mov_p (rtx op)
1665 if (GET_CODE (op) == CONST_DOUBLE)
1667 REAL_VALUE_TYPE r;
1668 long i;
1670 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1671 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1672 return SPARC_SIMM13_P (i);
1675 return 0;
1678 /* Nonzero if OP is a floating point constant which can
1679 be loaded into an integer register using a high/losum
1680 instruction sequence. */
1683 fp_high_losum_p (rtx op)
1685 /* The constraints calling this should only be in
1686 SFmode move insns, so any constant which cannot
1687 be moved using a single insn will do. */
1688 if (GET_CODE (op) == CONST_DOUBLE)
1690 REAL_VALUE_TYPE r;
1691 long i;
1693 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1694 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1695 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1698 return 0;
1701 /* Return true if the address of LABEL can be loaded by means of the
1702 mov{si,di}_pic_label_ref patterns in PIC mode. */
1704 static bool
1705 can_use_mov_pic_label_ref (rtx label)
1707 /* VxWorks does not impose a fixed gap between segments; the run-time
1708 gap can be different from the object-file gap. We therefore can't
1709 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1710 are absolutely sure that X is in the same segment as the GOT.
1711 Unfortunately, the flexibility of linker scripts means that we
1712 can't be sure of that in general, so assume that GOT-relative
1713 accesses are never valid on VxWorks. */
1714 if (TARGET_VXWORKS_RTP)
1715 return false;
1717 /* Similarly, if the label is non-local, it might end up being placed
1718 in a different section than the current one; now mov_pic_label_ref
1719 requires the label and the code to be in the same section. */
1720 if (LABEL_REF_NONLOCAL_P (label))
1721 return false;
1723 /* Finally, if we are reordering basic blocks and partition into hot
1724 and cold sections, this might happen for any label. */
1725 if (flag_reorder_blocks_and_partition)
1726 return false;
1728 return true;
1731 /* Expand a move instruction. Return true if all work is done. */
1733 bool
1734 sparc_expand_move (enum machine_mode mode, rtx *operands)
1736 /* Handle sets of MEM first. */
1737 if (GET_CODE (operands[0]) == MEM)
1739 /* 0 is a register (or a pair of registers) on SPARC. */
1740 if (register_or_zero_operand (operands[1], mode))
1741 return false;
1743 if (!reload_in_progress)
1745 operands[0] = validize_mem (operands[0]);
1746 operands[1] = force_reg (mode, operands[1]);
1750 /* Fixup TLS cases. */
1751 if (TARGET_HAVE_TLS
1752 && CONSTANT_P (operands[1])
1753 && sparc_tls_referenced_p (operands [1]))
1755 operands[1] = sparc_legitimize_tls_address (operands[1]);
1756 return false;
1759 /* Fixup PIC cases. */
1760 if (flag_pic && CONSTANT_P (operands[1]))
1762 if (pic_address_needs_scratch (operands[1]))
1763 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1765 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1766 if (GET_CODE (operands[1]) == LABEL_REF
1767 && can_use_mov_pic_label_ref (operands[1]))
1769 if (mode == SImode)
1771 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1772 return true;
1775 if (mode == DImode)
1777 gcc_assert (TARGET_ARCH64);
1778 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1779 return true;
1783 if (symbolic_operand (operands[1], mode))
1785 operands[1]
1786 = sparc_legitimize_pic_address (operands[1],
1787 reload_in_progress
1788 ? operands[0] : NULL_RTX);
1789 return false;
1793 /* If we are trying to toss an integer constant into FP registers,
1794 or loading a FP or vector constant, force it into memory. */
1795 if (CONSTANT_P (operands[1])
1796 && REG_P (operands[0])
1797 && (SPARC_FP_REG_P (REGNO (operands[0]))
1798 || SCALAR_FLOAT_MODE_P (mode)
1799 || VECTOR_MODE_P (mode)))
1801 /* emit_group_store will send such bogosity to us when it is
1802 not storing directly into memory. So fix this up to avoid
1803 crashes in output_constant_pool. */
1804 if (operands [1] == const0_rtx)
1805 operands[1] = CONST0_RTX (mode);
1807 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1808 always other regs. */
1809 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1810 && (const_zero_operand (operands[1], mode)
1811 || const_all_ones_operand (operands[1], mode)))
1812 return false;
1814 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1815 /* We are able to build any SF constant in integer registers
1816 with at most 2 instructions. */
1817 && (mode == SFmode
1818 /* And any DF constant in integer registers. */
1819 || (mode == DFmode
1820 && ! can_create_pseudo_p ())))
1821 return false;
1823 operands[1] = force_const_mem (mode, operands[1]);
1824 if (!reload_in_progress)
1825 operands[1] = validize_mem (operands[1]);
1826 return false;
1829 /* Accept non-constants and valid constants unmodified. */
1830 if (!CONSTANT_P (operands[1])
1831 || GET_CODE (operands[1]) == HIGH
1832 || input_operand (operands[1], mode))
1833 return false;
1835 switch (mode)
1837 case QImode:
1838 /* All QImode constants require only one insn, so proceed. */
1839 break;
1841 case HImode:
1842 case SImode:
1843 sparc_emit_set_const32 (operands[0], operands[1]);
1844 return true;
1846 case DImode:
1847 /* input_operand should have filtered out 32-bit mode. */
1848 sparc_emit_set_const64 (operands[0], operands[1]);
1849 return true;
1851 case TImode:
1853 rtx high, low;
1854 /* TImode isn't available in 32-bit mode. */
1855 split_double (operands[1], &high, &low);
1856 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1857 high));
1858 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1859 low));
1861 return true;
1863 default:
1864 gcc_unreachable ();
1867 return false;
1870 /* Load OP1, a 32-bit constant, into OP0, a register.
1871 We know it can't be done in one insn when we get
1872 here, the move expander guarantees this. */
1874 static void
1875 sparc_emit_set_const32 (rtx op0, rtx op1)
1877 enum machine_mode mode = GET_MODE (op0);
1878 rtx temp = op0;
1880 if (can_create_pseudo_p ())
1881 temp = gen_reg_rtx (mode);
1883 if (GET_CODE (op1) == CONST_INT)
1885 gcc_assert (!small_int_operand (op1, mode)
1886 && !const_high_operand (op1, mode));
1888 /* Emit them as real moves instead of a HIGH/LO_SUM,
1889 this way CSE can see everything and reuse intermediate
1890 values if it wants. */
1891 emit_insn (gen_rtx_SET (VOIDmode, temp,
1892 GEN_INT (INTVAL (op1)
1893 & ~(HOST_WIDE_INT)0x3ff)));
1895 emit_insn (gen_rtx_SET (VOIDmode,
1896 op0,
1897 gen_rtx_IOR (mode, temp,
1898 GEN_INT (INTVAL (op1) & 0x3ff))));
1900 else
1902 /* A symbol, emit in the traditional way. */
1903 emit_insn (gen_rtx_SET (VOIDmode, temp,
1904 gen_rtx_HIGH (mode, op1)));
1905 emit_insn (gen_rtx_SET (VOIDmode,
1906 op0, gen_rtx_LO_SUM (mode, temp, op1)));
1910 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1911 If TEMP is nonzero, we are forbidden to use any other scratch
1912 registers. Otherwise, we are allowed to generate them as needed.
1914 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1915 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1917 void
1918 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1920 rtx temp1, temp2, temp3, temp4, temp5;
1921 rtx ti_temp = 0;
1923 if (temp && GET_MODE (temp) == TImode)
1925 ti_temp = temp;
1926 temp = gen_rtx_REG (DImode, REGNO (temp));
1929 /* SPARC-V9 code-model support. */
1930 switch (sparc_cmodel)
1932 case CM_MEDLOW:
1933 /* The range spanned by all instructions in the object is less
1934 than 2^31 bytes (2GB) and the distance from any instruction
1935 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1936 than 2^31 bytes (2GB).
1938 The executable must be in the low 4TB of the virtual address
1939 space.
1941 sethi %hi(symbol), %temp1
1942 or %temp1, %lo(symbol), %reg */
1943 if (temp)
1944 temp1 = temp; /* op0 is allowed. */
1945 else
1946 temp1 = gen_reg_rtx (DImode);
1948 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1949 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1950 break;
1952 case CM_MEDMID:
1953 /* The range spanned by all instructions in the object is less
1954 than 2^31 bytes (2GB) and the distance from any instruction
1955 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1956 than 2^31 bytes (2GB).
1958 The executable must be in the low 16TB of the virtual address
1959 space.
1961 sethi %h44(symbol), %temp1
1962 or %temp1, %m44(symbol), %temp2
1963 sllx %temp2, 12, %temp3
1964 or %temp3, %l44(symbol), %reg */
1965 if (temp)
1967 temp1 = op0;
1968 temp2 = op0;
1969 temp3 = temp; /* op0 is allowed. */
1971 else
1973 temp1 = gen_reg_rtx (DImode);
1974 temp2 = gen_reg_rtx (DImode);
1975 temp3 = gen_reg_rtx (DImode);
1978 emit_insn (gen_seth44 (temp1, op1));
1979 emit_insn (gen_setm44 (temp2, temp1, op1));
1980 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1981 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1982 emit_insn (gen_setl44 (op0, temp3, op1));
1983 break;
1985 case CM_MEDANY:
1986 /* The range spanned by all instructions in the object is less
1987 than 2^31 bytes (2GB) and the distance from any instruction
1988 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1989 than 2^31 bytes (2GB).
1991 The executable can be placed anywhere in the virtual address
1992 space.
1994 sethi %hh(symbol), %temp1
1995 sethi %lm(symbol), %temp2
1996 or %temp1, %hm(symbol), %temp3
1997 sllx %temp3, 32, %temp4
1998 or %temp4, %temp2, %temp5
1999 or %temp5, %lo(symbol), %reg */
2000 if (temp)
2002 /* It is possible that one of the registers we got for operands[2]
2003 might coincide with that of operands[0] (which is why we made
2004 it TImode). Pick the other one to use as our scratch. */
2005 if (rtx_equal_p (temp, op0))
2007 gcc_assert (ti_temp);
2008 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2010 temp1 = op0;
2011 temp2 = temp; /* op0 is _not_ allowed, see above. */
2012 temp3 = op0;
2013 temp4 = op0;
2014 temp5 = op0;
2016 else
2018 temp1 = gen_reg_rtx (DImode);
2019 temp2 = gen_reg_rtx (DImode);
2020 temp3 = gen_reg_rtx (DImode);
2021 temp4 = gen_reg_rtx (DImode);
2022 temp5 = gen_reg_rtx (DImode);
2025 emit_insn (gen_sethh (temp1, op1));
2026 emit_insn (gen_setlm (temp2, op1));
2027 emit_insn (gen_sethm (temp3, temp1, op1));
2028 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2029 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2030 emit_insn (gen_rtx_SET (VOIDmode, temp5,
2031 gen_rtx_PLUS (DImode, temp4, temp2)));
2032 emit_insn (gen_setlo (op0, temp5, op1));
2033 break;
2035 case CM_EMBMEDANY:
2036 /* Old old old backwards compatibility kruft here.
2037 Essentially it is MEDLOW with a fixed 64-bit
2038 virtual base added to all data segment addresses.
2039 Text-segment stuff is computed like MEDANY, we can't
2040 reuse the code above because the relocation knobs
2041 look different.
2043 Data segment: sethi %hi(symbol), %temp1
2044 add %temp1, EMBMEDANY_BASE_REG, %temp2
2045 or %temp2, %lo(symbol), %reg */
2046 if (data_segment_operand (op1, GET_MODE (op1)))
2048 if (temp)
2050 temp1 = temp; /* op0 is allowed. */
2051 temp2 = op0;
2053 else
2055 temp1 = gen_reg_rtx (DImode);
2056 temp2 = gen_reg_rtx (DImode);
2059 emit_insn (gen_embmedany_sethi (temp1, op1));
2060 emit_insn (gen_embmedany_brsum (temp2, temp1));
2061 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2064 /* Text segment: sethi %uhi(symbol), %temp1
2065 sethi %hi(symbol), %temp2
2066 or %temp1, %ulo(symbol), %temp3
2067 sllx %temp3, 32, %temp4
2068 or %temp4, %temp2, %temp5
2069 or %temp5, %lo(symbol), %reg */
2070 else
2072 if (temp)
2074 /* It is possible that one of the registers we got for operands[2]
2075 might coincide with that of operands[0] (which is why we made
2076 it TImode). Pick the other one to use as our scratch. */
2077 if (rtx_equal_p (temp, op0))
2079 gcc_assert (ti_temp);
2080 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2082 temp1 = op0;
2083 temp2 = temp; /* op0 is _not_ allowed, see above. */
2084 temp3 = op0;
2085 temp4 = op0;
2086 temp5 = op0;
2088 else
2090 temp1 = gen_reg_rtx (DImode);
2091 temp2 = gen_reg_rtx (DImode);
2092 temp3 = gen_reg_rtx (DImode);
2093 temp4 = gen_reg_rtx (DImode);
2094 temp5 = gen_reg_rtx (DImode);
2097 emit_insn (gen_embmedany_textuhi (temp1, op1));
2098 emit_insn (gen_embmedany_texthi (temp2, op1));
2099 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2100 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2101 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2102 emit_insn (gen_rtx_SET (VOIDmode, temp5,
2103 gen_rtx_PLUS (DImode, temp4, temp2)));
2104 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2106 break;
2108 default:
2109 gcc_unreachable ();
2113 #if HOST_BITS_PER_WIDE_INT == 32
2114 static void
2115 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
2117 gcc_unreachable ();
2119 #else
2120 /* These avoid problems when cross compiling. If we do not
2121 go through all this hair then the optimizer will see
2122 invalid REG_EQUAL notes or in some cases none at all. */
2123 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2124 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2125 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2126 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2128 /* The optimizer is not to assume anything about exactly
2129 which bits are set for a HIGH, they are unspecified.
2130 Unfortunately this leads to many missed optimizations
2131 during CSE. We mask out the non-HIGH bits, and matches
2132 a plain movdi, to alleviate this problem. */
2133 static rtx
2134 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2136 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2139 static rtx
2140 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2142 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
2145 static rtx
2146 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2148 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2151 static rtx
2152 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2154 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2157 /* Worker routines for 64-bit constant formation on arch64.
2158 One of the key things to be doing in these emissions is
2159 to create as many temp REGs as possible. This makes it
2160 possible for half-built constants to be used later when
2161 such values are similar to something required later on.
2162 Without doing this, the optimizer cannot see such
2163 opportunities. */
2165 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2166 unsigned HOST_WIDE_INT, int);
2168 static void
2169 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2170 unsigned HOST_WIDE_INT low_bits, int is_neg)
2172 unsigned HOST_WIDE_INT high_bits;
2174 if (is_neg)
2175 high_bits = (~low_bits) & 0xffffffff;
2176 else
2177 high_bits = low_bits;
2179 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2180 if (!is_neg)
2182 emit_insn (gen_rtx_SET (VOIDmode, op0,
2183 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2185 else
2187 /* If we are XOR'ing with -1, then we should emit a one's complement
2188 instead. This way the combiner will notice logical operations
2189 such as ANDN later on and substitute. */
2190 if ((low_bits & 0x3ff) == 0x3ff)
2192 emit_insn (gen_rtx_SET (VOIDmode, op0,
2193 gen_rtx_NOT (DImode, temp)));
2195 else
2197 emit_insn (gen_rtx_SET (VOIDmode, op0,
2198 gen_safe_XOR64 (temp,
2199 (-(HOST_WIDE_INT)0x400
2200 | (low_bits & 0x3ff)))));
2205 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2206 unsigned HOST_WIDE_INT, int);
2208 static void
2209 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2210 unsigned HOST_WIDE_INT high_bits,
2211 unsigned HOST_WIDE_INT low_immediate,
2212 int shift_count)
2214 rtx temp2 = op0;
2216 if ((high_bits & 0xfffffc00) != 0)
2218 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2219 if ((high_bits & ~0xfffffc00) != 0)
2220 emit_insn (gen_rtx_SET (VOIDmode, op0,
2221 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2222 else
2223 temp2 = temp;
2225 else
2227 emit_insn (gen_safe_SET64 (temp, high_bits));
2228 temp2 = temp;
2231 /* Now shift it up into place. */
2232 emit_insn (gen_rtx_SET (VOIDmode, op0,
2233 gen_rtx_ASHIFT (DImode, temp2,
2234 GEN_INT (shift_count))));
2236 /* If there is a low immediate part piece, finish up by
2237 putting that in as well. */
2238 if (low_immediate != 0)
2239 emit_insn (gen_rtx_SET (VOIDmode, op0,
2240 gen_safe_OR64 (op0, low_immediate)));
2243 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2244 unsigned HOST_WIDE_INT);
2246 /* Full 64-bit constant decomposition. Even though this is the
2247 'worst' case, we still optimize a few things away. */
2248 static void
2249 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2250 unsigned HOST_WIDE_INT high_bits,
2251 unsigned HOST_WIDE_INT low_bits)
2253 rtx sub_temp = op0;
2255 if (can_create_pseudo_p ())
2256 sub_temp = gen_reg_rtx (DImode);
2258 if ((high_bits & 0xfffffc00) != 0)
2260 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2261 if ((high_bits & ~0xfffffc00) != 0)
2262 emit_insn (gen_rtx_SET (VOIDmode,
2263 sub_temp,
2264 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2265 else
2266 sub_temp = temp;
2268 else
2270 emit_insn (gen_safe_SET64 (temp, high_bits));
2271 sub_temp = temp;
2274 if (can_create_pseudo_p ())
2276 rtx temp2 = gen_reg_rtx (DImode);
2277 rtx temp3 = gen_reg_rtx (DImode);
2278 rtx temp4 = gen_reg_rtx (DImode);
2280 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2281 gen_rtx_ASHIFT (DImode, sub_temp,
2282 GEN_INT (32))));
2284 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2285 if ((low_bits & ~0xfffffc00) != 0)
2287 emit_insn (gen_rtx_SET (VOIDmode, temp3,
2288 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2289 emit_insn (gen_rtx_SET (VOIDmode, op0,
2290 gen_rtx_PLUS (DImode, temp4, temp3)));
2292 else
2294 emit_insn (gen_rtx_SET (VOIDmode, op0,
2295 gen_rtx_PLUS (DImode, temp4, temp2)));
2298 else
2300 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2301 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2302 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2303 int to_shift = 12;
2305 /* We are in the middle of reload, so this is really
2306 painful. However we do still make an attempt to
2307 avoid emitting truly stupid code. */
2308 if (low1 != const0_rtx)
2310 emit_insn (gen_rtx_SET (VOIDmode, op0,
2311 gen_rtx_ASHIFT (DImode, sub_temp,
2312 GEN_INT (to_shift))));
2313 emit_insn (gen_rtx_SET (VOIDmode, op0,
2314 gen_rtx_IOR (DImode, op0, low1)));
2315 sub_temp = op0;
2316 to_shift = 12;
2318 else
2320 to_shift += 12;
2322 if (low2 != const0_rtx)
2324 emit_insn (gen_rtx_SET (VOIDmode, op0,
2325 gen_rtx_ASHIFT (DImode, sub_temp,
2326 GEN_INT (to_shift))));
2327 emit_insn (gen_rtx_SET (VOIDmode, op0,
2328 gen_rtx_IOR (DImode, op0, low2)));
2329 sub_temp = op0;
2330 to_shift = 8;
2332 else
2334 to_shift += 8;
2336 emit_insn (gen_rtx_SET (VOIDmode, op0,
2337 gen_rtx_ASHIFT (DImode, sub_temp,
2338 GEN_INT (to_shift))));
2339 if (low3 != const0_rtx)
2340 emit_insn (gen_rtx_SET (VOIDmode, op0,
2341 gen_rtx_IOR (DImode, op0, low3)));
2342 /* phew... */
2346 /* Analyze a 64-bit constant for certain properties. */
2347 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2348 unsigned HOST_WIDE_INT,
2349 int *, int *, int *);
2351 static void
2352 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2353 unsigned HOST_WIDE_INT low_bits,
2354 int *hbsp, int *lbsp, int *abbasp)
2356 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2357 int i;
2359 lowest_bit_set = highest_bit_set = -1;
2360 i = 0;
2363 if ((lowest_bit_set == -1)
2364 && ((low_bits >> i) & 1))
2365 lowest_bit_set = i;
2366 if ((highest_bit_set == -1)
2367 && ((high_bits >> (32 - i - 1)) & 1))
2368 highest_bit_set = (64 - i - 1);
2370 while (++i < 32
2371 && ((highest_bit_set == -1)
2372 || (lowest_bit_set == -1)));
2373 if (i == 32)
2375 i = 0;
2378 if ((lowest_bit_set == -1)
2379 && ((high_bits >> i) & 1))
2380 lowest_bit_set = i + 32;
2381 if ((highest_bit_set == -1)
2382 && ((low_bits >> (32 - i - 1)) & 1))
2383 highest_bit_set = 32 - i - 1;
2385 while (++i < 32
2386 && ((highest_bit_set == -1)
2387 || (lowest_bit_set == -1)));
2389 /* If there are no bits set this should have gone out
2390 as one instruction! */
2391 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2392 all_bits_between_are_set = 1;
2393 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2395 if (i < 32)
2397 if ((low_bits & (1 << i)) != 0)
2398 continue;
2400 else
2402 if ((high_bits & (1 << (i - 32))) != 0)
2403 continue;
2405 all_bits_between_are_set = 0;
2406 break;
2408 *hbsp = highest_bit_set;
2409 *lbsp = lowest_bit_set;
2410 *abbasp = all_bits_between_are_set;
2413 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2415 static int
2416 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2417 unsigned HOST_WIDE_INT low_bits)
2419 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2421 if (high_bits == 0
2422 || high_bits == 0xffffffff)
2423 return 1;
2425 analyze_64bit_constant (high_bits, low_bits,
2426 &highest_bit_set, &lowest_bit_set,
2427 &all_bits_between_are_set);
2429 if ((highest_bit_set == 63
2430 || lowest_bit_set == 0)
2431 && all_bits_between_are_set != 0)
2432 return 1;
2434 if ((highest_bit_set - lowest_bit_set) < 21)
2435 return 1;
2437 return 0;
2440 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2441 unsigned HOST_WIDE_INT,
2442 int, int);
2444 static unsigned HOST_WIDE_INT
2445 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2446 unsigned HOST_WIDE_INT low_bits,
2447 int lowest_bit_set, int shift)
2449 HOST_WIDE_INT hi, lo;
2451 if (lowest_bit_set < 32)
2453 lo = (low_bits >> lowest_bit_set) << shift;
2454 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2456 else
2458 lo = 0;
2459 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2461 gcc_assert (! (hi & lo));
2462 return (hi | lo);
2465 /* Here we are sure to be arch64 and this is an integer constant
2466 being loaded into a register. Emit the most efficient
2467 insn sequence possible. Detection of all the 1-insn cases
2468 has been done already. */
2469 static void
2470 sparc_emit_set_const64 (rtx op0, rtx op1)
2472 unsigned HOST_WIDE_INT high_bits, low_bits;
2473 int lowest_bit_set, highest_bit_set;
2474 int all_bits_between_are_set;
2475 rtx temp = 0;
2477 /* Sanity check that we know what we are working with. */
2478 gcc_assert (TARGET_ARCH64
2479 && (GET_CODE (op0) == SUBREG
2480 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2482 if (! can_create_pseudo_p ())
2483 temp = op0;
2485 if (GET_CODE (op1) != CONST_INT)
2487 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2488 return;
2491 if (! temp)
2492 temp = gen_reg_rtx (DImode);
2494 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2495 low_bits = (INTVAL (op1) & 0xffffffff);
2497 /* low_bits bits 0 --> 31
2498 high_bits bits 32 --> 63 */
2500 analyze_64bit_constant (high_bits, low_bits,
2501 &highest_bit_set, &lowest_bit_set,
2502 &all_bits_between_are_set);
2504 /* First try for a 2-insn sequence. */
2506 /* These situations are preferred because the optimizer can
2507 * do more things with them:
2508 * 1) mov -1, %reg
2509 * sllx %reg, shift, %reg
2510 * 2) mov -1, %reg
2511 * srlx %reg, shift, %reg
2512 * 3) mov some_small_const, %reg
2513 * sllx %reg, shift, %reg
2515 if (((highest_bit_set == 63
2516 || lowest_bit_set == 0)
2517 && all_bits_between_are_set != 0)
2518 || ((highest_bit_set - lowest_bit_set) < 12))
2520 HOST_WIDE_INT the_const = -1;
2521 int shift = lowest_bit_set;
2523 if ((highest_bit_set != 63
2524 && lowest_bit_set != 0)
2525 || all_bits_between_are_set == 0)
2527 the_const =
2528 create_simple_focus_bits (high_bits, low_bits,
2529 lowest_bit_set, 0);
2531 else if (lowest_bit_set == 0)
2532 shift = -(63 - highest_bit_set);
2534 gcc_assert (SPARC_SIMM13_P (the_const));
2535 gcc_assert (shift != 0);
2537 emit_insn (gen_safe_SET64 (temp, the_const));
2538 if (shift > 0)
2539 emit_insn (gen_rtx_SET (VOIDmode,
2540 op0,
2541 gen_rtx_ASHIFT (DImode,
2542 temp,
2543 GEN_INT (shift))));
2544 else if (shift < 0)
2545 emit_insn (gen_rtx_SET (VOIDmode,
2546 op0,
2547 gen_rtx_LSHIFTRT (DImode,
2548 temp,
2549 GEN_INT (-shift))));
2550 return;
2553 /* Now a range of 22 or less bits set somewhere.
2554 * 1) sethi %hi(focus_bits), %reg
2555 * sllx %reg, shift, %reg
2556 * 2) sethi %hi(focus_bits), %reg
2557 * srlx %reg, shift, %reg
2559 if ((highest_bit_set - lowest_bit_set) < 21)
2561 unsigned HOST_WIDE_INT focus_bits =
2562 create_simple_focus_bits (high_bits, low_bits,
2563 lowest_bit_set, 10);
2565 gcc_assert (SPARC_SETHI_P (focus_bits));
2566 gcc_assert (lowest_bit_set != 10);
2568 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2570 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2571 if (lowest_bit_set < 10)
2572 emit_insn (gen_rtx_SET (VOIDmode,
2573 op0,
2574 gen_rtx_LSHIFTRT (DImode, temp,
2575 GEN_INT (10 - lowest_bit_set))));
2576 else if (lowest_bit_set > 10)
2577 emit_insn (gen_rtx_SET (VOIDmode,
2578 op0,
2579 gen_rtx_ASHIFT (DImode, temp,
2580 GEN_INT (lowest_bit_set - 10))));
2581 return;
2584 /* 1) sethi %hi(low_bits), %reg
2585 * or %reg, %lo(low_bits), %reg
2586 * 2) sethi %hi(~low_bits), %reg
2587 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2589 if (high_bits == 0
2590 || high_bits == 0xffffffff)
2592 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2593 (high_bits == 0xffffffff));
2594 return;
2597 /* Now, try 3-insn sequences. */
2599 /* 1) sethi %hi(high_bits), %reg
2600 * or %reg, %lo(high_bits), %reg
2601 * sllx %reg, 32, %reg
2603 if (low_bits == 0)
2605 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2606 return;
2609 /* We may be able to do something quick
2610 when the constant is negated, so try that. */
2611 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2612 (~low_bits) & 0xfffffc00))
2614 /* NOTE: The trailing bits get XOR'd so we need the
2615 non-negated bits, not the negated ones. */
2616 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2618 if ((((~high_bits) & 0xffffffff) == 0
2619 && ((~low_bits) & 0x80000000) == 0)
2620 || (((~high_bits) & 0xffffffff) == 0xffffffff
2621 && ((~low_bits) & 0x80000000) != 0))
2623 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2625 if ((SPARC_SETHI_P (fast_int)
2626 && (~high_bits & 0xffffffff) == 0)
2627 || SPARC_SIMM13_P (fast_int))
2628 emit_insn (gen_safe_SET64 (temp, fast_int));
2629 else
2630 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2632 else
2634 rtx negated_const;
2635 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2636 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2637 sparc_emit_set_const64 (temp, negated_const);
2640 /* If we are XOR'ing with -1, then we should emit a one's complement
2641 instead. This way the combiner will notice logical operations
2642 such as ANDN later on and substitute. */
2643 if (trailing_bits == 0x3ff)
2645 emit_insn (gen_rtx_SET (VOIDmode, op0,
2646 gen_rtx_NOT (DImode, temp)));
2648 else
2650 emit_insn (gen_rtx_SET (VOIDmode,
2651 op0,
2652 gen_safe_XOR64 (temp,
2653 (-0x400 | trailing_bits))));
2655 return;
2658 /* 1) sethi %hi(xxx), %reg
2659 * or %reg, %lo(xxx), %reg
2660 * sllx %reg, yyy, %reg
2662 * ??? This is just a generalized version of the low_bits==0
2663 * thing above, FIXME...
2665 if ((highest_bit_set - lowest_bit_set) < 32)
2667 unsigned HOST_WIDE_INT focus_bits =
2668 create_simple_focus_bits (high_bits, low_bits,
2669 lowest_bit_set, 0);
2671 /* We can't get here in this state. */
2672 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2674 /* So what we know is that the set bits straddle the
2675 middle of the 64-bit word. */
2676 sparc_emit_set_const64_quick2 (op0, temp,
2677 focus_bits, 0,
2678 lowest_bit_set);
2679 return;
2682 /* 1) sethi %hi(high_bits), %reg
2683 * or %reg, %lo(high_bits), %reg
2684 * sllx %reg, 32, %reg
2685 * or %reg, low_bits, %reg
2687 if (SPARC_SIMM13_P(low_bits)
2688 && ((int)low_bits > 0))
2690 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2691 return;
2694 /* The easiest way when all else fails, is full decomposition. */
2695 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2697 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
2699 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2700 return the mode to be used for the comparison. For floating-point,
2701 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
2702 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2703 processing is needed. */
2705 enum machine_mode
2706 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2708 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2710 switch (op)
2712 case EQ:
2713 case NE:
2714 case UNORDERED:
2715 case ORDERED:
2716 case UNLT:
2717 case UNLE:
2718 case UNGT:
2719 case UNGE:
2720 case UNEQ:
2721 case LTGT:
2722 return CCFPmode;
2724 case LT:
2725 case LE:
2726 case GT:
2727 case GE:
2728 return CCFPEmode;
2730 default:
2731 gcc_unreachable ();
2734 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2735 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2737 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2738 return CCX_NOOVmode;
2739 else
2740 return CC_NOOVmode;
2742 else
2744 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2745 return CCXmode;
2746 else
2747 return CCmode;
2751 /* Emit the compare insn and return the CC reg for a CODE comparison
2752 with operands X and Y. */
2754 static rtx
2755 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2757 enum machine_mode mode;
2758 rtx cc_reg;
2760 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2761 return x;
2763 mode = SELECT_CC_MODE (code, x, y);
2765 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2766 fcc regs (cse can't tell they're really call clobbered regs and will
2767 remove a duplicate comparison even if there is an intervening function
2768 call - it will then try to reload the cc reg via an int reg which is why
2769 we need the movcc patterns). It is possible to provide the movcc
2770 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2771 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2772 to tell cse that CCFPE mode registers (even pseudos) are call
2773 clobbered. */
2775 /* ??? This is an experiment. Rather than making changes to cse which may
2776 or may not be easy/clean, we do our own cse. This is possible because
2777 we will generate hard registers. Cse knows they're call clobbered (it
2778 doesn't know the same thing about pseudos). If we guess wrong, no big
2779 deal, but if we win, great! */
2781 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2782 #if 1 /* experiment */
2784 int reg;
2785 /* We cycle through the registers to ensure they're all exercised. */
2786 static int next_fcc_reg = 0;
2787 /* Previous x,y for each fcc reg. */
2788 static rtx prev_args[4][2];
2790 /* Scan prev_args for x,y. */
2791 for (reg = 0; reg < 4; reg++)
2792 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2793 break;
2794 if (reg == 4)
2796 reg = next_fcc_reg;
2797 prev_args[reg][0] = x;
2798 prev_args[reg][1] = y;
2799 next_fcc_reg = (next_fcc_reg + 1) & 3;
2801 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2803 #else
2804 cc_reg = gen_reg_rtx (mode);
2805 #endif /* ! experiment */
2806 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2807 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2808 else
2809 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2811 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2812 will only result in an unrecognizable insn so no point in asserting. */
2813 emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2815 return cc_reg;
2819 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2822 gen_compare_reg (rtx cmp)
2824 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2827 /* This function is used for v9 only.
2828 DEST is the target of the Scc insn.
2829 CODE is the code for an Scc's comparison.
2830 X and Y are the values we compare.
2832 This function is needed to turn
2834 (set (reg:SI 110)
2835 (gt (reg:CCX 100 %icc)
2836 (const_int 0)))
2837 into
2838 (set (reg:SI 110)
2839 (gt:DI (reg:CCX 100 %icc)
2840 (const_int 0)))
2842 IE: The instruction recognizer needs to see the mode of the comparison to
2843 find the right instruction. We could use "gt:DI" right in the
2844 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2846 static int
2847 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2849 if (! TARGET_ARCH64
2850 && (GET_MODE (x) == DImode
2851 || GET_MODE (dest) == DImode))
2852 return 0;
2854 /* Try to use the movrCC insns. */
2855 if (TARGET_ARCH64
2856 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2857 && y == const0_rtx
2858 && v9_regcmp_p (compare_code))
2860 rtx op0 = x;
2861 rtx temp;
2863 /* Special case for op0 != 0. This can be done with one instruction if
2864 dest == x. */
2866 if (compare_code == NE
2867 && GET_MODE (dest) == DImode
2868 && rtx_equal_p (op0, dest))
2870 emit_insn (gen_rtx_SET (VOIDmode, dest,
2871 gen_rtx_IF_THEN_ELSE (DImode,
2872 gen_rtx_fmt_ee (compare_code, DImode,
2873 op0, const0_rtx),
2874 const1_rtx,
2875 dest)));
2876 return 1;
2879 if (reg_overlap_mentioned_p (dest, op0))
2881 /* Handle the case where dest == x.
2882 We "early clobber" the result. */
2883 op0 = gen_reg_rtx (GET_MODE (x));
2884 emit_move_insn (op0, x);
2887 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2888 if (GET_MODE (op0) != DImode)
2890 temp = gen_reg_rtx (DImode);
2891 convert_move (temp, op0, 0);
2893 else
2894 temp = op0;
2895 emit_insn (gen_rtx_SET (VOIDmode, dest,
2896 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2897 gen_rtx_fmt_ee (compare_code, DImode,
2898 temp, const0_rtx),
2899 const1_rtx,
2900 dest)));
2901 return 1;
2903 else
2905 x = gen_compare_reg_1 (compare_code, x, y);
2906 y = const0_rtx;
2908 gcc_assert (GET_MODE (x) != CC_NOOVmode
2909 && GET_MODE (x) != CCX_NOOVmode);
2911 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2912 emit_insn (gen_rtx_SET (VOIDmode, dest,
2913 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2914 gen_rtx_fmt_ee (compare_code,
2915 GET_MODE (x), x, y),
2916 const1_rtx, dest)));
2917 return 1;
2922 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2923 without jumps using the addx/subx instructions. */
2925 bool
2926 emit_scc_insn (rtx operands[])
2928 rtx tem;
2929 rtx x;
2930 rtx y;
2931 enum rtx_code code;
2933 /* The quad-word fp compare library routines all return nonzero to indicate
2934 true, which is different from the equivalent libgcc routines, so we must
2935 handle them specially here. */
2936 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2938 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2939 GET_CODE (operands[1]));
2940 operands[2] = XEXP (operands[1], 0);
2941 operands[3] = XEXP (operands[1], 1);
2944 code = GET_CODE (operands[1]);
2945 x = operands[2];
2946 y = operands[3];
2948 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2949 more applications). The exception to this is "reg != 0" which can
2950 be done in one instruction on v9 (so we do it). */
2951 if (code == EQ)
2953 if (GET_MODE (x) == SImode)
2955 rtx pat;
2956 if (TARGET_ARCH64)
2957 pat = gen_seqsidi_special (operands[0], x, y);
2958 else
2959 pat = gen_seqsisi_special (operands[0], x, y);
2960 emit_insn (pat);
2961 return true;
2963 else if (GET_MODE (x) == DImode)
2965 rtx pat = gen_seqdi_special (operands[0], x, y);
2966 emit_insn (pat);
2967 return true;
2971 if (code == NE)
2973 if (GET_MODE (x) == SImode)
2975 rtx pat;
2976 if (TARGET_ARCH64)
2977 pat = gen_snesidi_special (operands[0], x, y);
2978 else
2979 pat = gen_snesisi_special (operands[0], x, y);
2980 emit_insn (pat);
2981 return true;
2983 else if (GET_MODE (x) == DImode)
2985 rtx pat;
2986 if (TARGET_VIS3)
2987 pat = gen_snedi_special_vis3 (operands[0], x, y);
2988 else
2989 pat = gen_snedi_special (operands[0], x, y);
2990 emit_insn (pat);
2991 return true;
2995 if (TARGET_V9
2996 && TARGET_ARCH64
2997 && GET_MODE (x) == DImode
2998 && !(TARGET_VIS3
2999 && (code == GTU || code == LTU))
3000 && gen_v9_scc (operands[0], code, x, y))
3001 return true;
3003 /* We can do LTU and GEU using the addx/subx instructions too. And
3004 for GTU/LEU, if both operands are registers swap them and fall
3005 back to the easy case. */
3006 if (code == GTU || code == LEU)
3008 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3009 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3011 tem = x;
3012 x = y;
3013 y = tem;
3014 code = swap_condition (code);
3018 if (code == LTU
3019 || (!TARGET_VIS3 && code == GEU))
3021 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3022 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3023 gen_compare_reg_1 (code, x, y),
3024 const0_rtx)));
3025 return true;
3028 /* All the posibilities to use addx/subx based sequences has been
3029 exhausted, try for a 3 instruction sequence using v9 conditional
3030 moves. */
3031 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3032 return true;
3034 /* Nope, do branches. */
3035 return false;
3038 /* Emit a conditional jump insn for the v9 architecture using comparison code
3039 CODE and jump target LABEL.
3040 This function exists to take advantage of the v9 brxx insns. */
3042 static void
3043 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3045 emit_jump_insn (gen_rtx_SET (VOIDmode,
3046 pc_rtx,
3047 gen_rtx_IF_THEN_ELSE (VOIDmode,
3048 gen_rtx_fmt_ee (code, GET_MODE (op0),
3049 op0, const0_rtx),
3050 gen_rtx_LABEL_REF (VOIDmode, label),
3051 pc_rtx)));
3054 /* Emit a conditional jump insn for the UA2011 architecture using
3055 comparison code CODE and jump target LABEL. This function exists
3056 to take advantage of the UA2011 Compare and Branch insns. */
3058 static void
3059 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3061 rtx if_then_else;
3063 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3064 gen_rtx_fmt_ee(code, GET_MODE(op0),
3065 op0, op1),
3066 gen_rtx_LABEL_REF (VOIDmode, label),
3067 pc_rtx);
3069 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, if_then_else));
3072 void
3073 emit_conditional_branch_insn (rtx operands[])
3075 /* The quad-word fp compare library routines all return nonzero to indicate
3076 true, which is different from the equivalent libgcc routines, so we must
3077 handle them specially here. */
3078 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3080 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3081 GET_CODE (operands[0]));
3082 operands[1] = XEXP (operands[0], 0);
3083 operands[2] = XEXP (operands[0], 1);
3086 /* If we can tell early on that the comparison is against a constant
3087 that won't fit in the 5-bit signed immediate field of a cbcond,
3088 use one of the other v9 conditional branch sequences. */
3089 if (TARGET_CBCOND
3090 && GET_CODE (operands[1]) == REG
3091 && (GET_MODE (operands[1]) == SImode
3092 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3093 && (GET_CODE (operands[2]) != CONST_INT
3094 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3096 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3097 return;
3100 if (TARGET_ARCH64 && operands[2] == const0_rtx
3101 && GET_CODE (operands[1]) == REG
3102 && GET_MODE (operands[1]) == DImode)
3104 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3105 return;
3108 operands[1] = gen_compare_reg (operands[0]);
3109 operands[2] = const0_rtx;
3110 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3111 operands[1], operands[2]);
3112 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3113 operands[3]));
3117 /* Generate a DFmode part of a hard TFmode register.
3118 REG is the TFmode hard register, LOW is 1 for the
3119 low 64bit of the register and 0 otherwise.
3122 gen_df_reg (rtx reg, int low)
3124 int regno = REGNO (reg);
3126 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3127 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3128 return gen_rtx_REG (DFmode, regno);
3131 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3132 Unlike normal calls, TFmode operands are passed by reference. It is
3133 assumed that no more than 3 operands are required. */
3135 static void
3136 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3138 rtx ret_slot = NULL, arg[3], func_sym;
3139 int i;
3141 /* We only expect to be called for conversions, unary, and binary ops. */
3142 gcc_assert (nargs == 2 || nargs == 3);
3144 for (i = 0; i < nargs; ++i)
3146 rtx this_arg = operands[i];
3147 rtx this_slot;
3149 /* TFmode arguments and return values are passed by reference. */
3150 if (GET_MODE (this_arg) == TFmode)
3152 int force_stack_temp;
3154 force_stack_temp = 0;
3155 if (TARGET_BUGGY_QP_LIB && i == 0)
3156 force_stack_temp = 1;
3158 if (GET_CODE (this_arg) == MEM
3159 && ! force_stack_temp)
3161 tree expr = MEM_EXPR (this_arg);
3162 if (expr)
3163 mark_addressable (expr);
3164 this_arg = XEXP (this_arg, 0);
3166 else if (CONSTANT_P (this_arg)
3167 && ! force_stack_temp)
3169 this_slot = force_const_mem (TFmode, this_arg);
3170 this_arg = XEXP (this_slot, 0);
3172 else
3174 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3176 /* Operand 0 is the return value. We'll copy it out later. */
3177 if (i > 0)
3178 emit_move_insn (this_slot, this_arg);
3179 else
3180 ret_slot = this_slot;
3182 this_arg = XEXP (this_slot, 0);
3186 arg[i] = this_arg;
3189 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3191 if (GET_MODE (operands[0]) == TFmode)
3193 if (nargs == 2)
3194 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3195 arg[0], GET_MODE (arg[0]),
3196 arg[1], GET_MODE (arg[1]));
3197 else
3198 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3199 arg[0], GET_MODE (arg[0]),
3200 arg[1], GET_MODE (arg[1]),
3201 arg[2], GET_MODE (arg[2]));
3203 if (ret_slot)
3204 emit_move_insn (operands[0], ret_slot);
3206 else
3208 rtx ret;
3210 gcc_assert (nargs == 2);
3212 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3213 GET_MODE (operands[0]), 1,
3214 arg[1], GET_MODE (arg[1]));
3216 if (ret != operands[0])
3217 emit_move_insn (operands[0], ret);
3221 /* Expand soft-float TFmode calls to sparc abi routines. */
3223 static void
3224 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3226 const char *func;
3228 switch (code)
3230 case PLUS:
3231 func = "_Qp_add";
3232 break;
3233 case MINUS:
3234 func = "_Qp_sub";
3235 break;
3236 case MULT:
3237 func = "_Qp_mul";
3238 break;
3239 case DIV:
3240 func = "_Qp_div";
3241 break;
3242 default:
3243 gcc_unreachable ();
3246 emit_soft_tfmode_libcall (func, 3, operands);
3249 static void
3250 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3252 const char *func;
3254 gcc_assert (code == SQRT);
3255 func = "_Qp_sqrt";
3257 emit_soft_tfmode_libcall (func, 2, operands);
3260 static void
3261 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3263 const char *func;
3265 switch (code)
3267 case FLOAT_EXTEND:
3268 switch (GET_MODE (operands[1]))
3270 case SFmode:
3271 func = "_Qp_stoq";
3272 break;
3273 case DFmode:
3274 func = "_Qp_dtoq";
3275 break;
3276 default:
3277 gcc_unreachable ();
3279 break;
3281 case FLOAT_TRUNCATE:
3282 switch (GET_MODE (operands[0]))
3284 case SFmode:
3285 func = "_Qp_qtos";
3286 break;
3287 case DFmode:
3288 func = "_Qp_qtod";
3289 break;
3290 default:
3291 gcc_unreachable ();
3293 break;
3295 case FLOAT:
3296 switch (GET_MODE (operands[1]))
3298 case SImode:
3299 func = "_Qp_itoq";
3300 if (TARGET_ARCH64)
3301 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3302 break;
3303 case DImode:
3304 func = "_Qp_xtoq";
3305 break;
3306 default:
3307 gcc_unreachable ();
3309 break;
3311 case UNSIGNED_FLOAT:
3312 switch (GET_MODE (operands[1]))
3314 case SImode:
3315 func = "_Qp_uitoq";
3316 if (TARGET_ARCH64)
3317 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3318 break;
3319 case DImode:
3320 func = "_Qp_uxtoq";
3321 break;
3322 default:
3323 gcc_unreachable ();
3325 break;
3327 case FIX:
3328 switch (GET_MODE (operands[0]))
3330 case SImode:
3331 func = "_Qp_qtoi";
3332 break;
3333 case DImode:
3334 func = "_Qp_qtox";
3335 break;
3336 default:
3337 gcc_unreachable ();
3339 break;
3341 case UNSIGNED_FIX:
3342 switch (GET_MODE (operands[0]))
3344 case SImode:
3345 func = "_Qp_qtoui";
3346 break;
3347 case DImode:
3348 func = "_Qp_qtoux";
3349 break;
3350 default:
3351 gcc_unreachable ();
3353 break;
3355 default:
3356 gcc_unreachable ();
3359 emit_soft_tfmode_libcall (func, 2, operands);
3362 /* Expand a hard-float tfmode operation. All arguments must be in
3363 registers. */
3365 static void
3366 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3368 rtx op, dest;
3370 if (GET_RTX_CLASS (code) == RTX_UNARY)
3372 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3373 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3375 else
3377 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3378 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3379 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3380 operands[1], operands[2]);
3383 if (register_operand (operands[0], VOIDmode))
3384 dest = operands[0];
3385 else
3386 dest = gen_reg_rtx (GET_MODE (operands[0]));
3388 emit_insn (gen_rtx_SET (VOIDmode, dest, op));
3390 if (dest != operands[0])
3391 emit_move_insn (operands[0], dest);
3394 void
3395 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3397 if (TARGET_HARD_QUAD)
3398 emit_hard_tfmode_operation (code, operands);
3399 else
3400 emit_soft_tfmode_binop (code, operands);
3403 void
3404 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3406 if (TARGET_HARD_QUAD)
3407 emit_hard_tfmode_operation (code, operands);
3408 else
3409 emit_soft_tfmode_unop (code, operands);
3412 void
3413 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3415 if (TARGET_HARD_QUAD)
3416 emit_hard_tfmode_operation (code, operands);
3417 else
3418 emit_soft_tfmode_cvt (code, operands);
3421 /* Return nonzero if a branch/jump/call instruction will be emitting
3422 nop into its delay slot. */
3425 empty_delay_slot (rtx_insn *insn)
3427 rtx seq;
3429 /* If no previous instruction (should not happen), return true. */
3430 if (PREV_INSN (insn) == NULL)
3431 return 1;
3433 seq = NEXT_INSN (PREV_INSN (insn));
3434 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3435 return 0;
3437 return 1;
3440 /* Return nonzero if we should emit a nop after a cbcond instruction.
3441 The cbcond instruction does not have a delay slot, however there is
3442 a severe performance penalty if a control transfer appears right
3443 after a cbcond. Therefore we emit a nop when we detect this
3444 situation. */
3447 emit_cbcond_nop (rtx insn)
3449 rtx next = next_active_insn (insn);
3451 if (!next)
3452 return 1;
3454 if (NONJUMP_INSN_P (next)
3455 && GET_CODE (PATTERN (next)) == SEQUENCE)
3456 next = XVECEXP (PATTERN (next), 0, 0);
3457 else if (CALL_P (next)
3458 && GET_CODE (PATTERN (next)) == PARALLEL)
3460 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3462 if (GET_CODE (delay) == RETURN)
3464 /* It's a sibling call. Do not emit the nop if we're going
3465 to emit something other than the jump itself as the first
3466 instruction of the sibcall sequence. */
3467 if (sparc_leaf_function_p || TARGET_FLAT)
3468 return 0;
3472 if (NONJUMP_INSN_P (next))
3473 return 0;
3475 return 1;
3478 /* Return nonzero if TRIAL can go into the call delay slot. */
3481 eligible_for_call_delay (rtx trial)
3483 rtx pat;
3485 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3486 return 0;
3488 /* Binutils allows
3489 call __tls_get_addr, %tgd_call (foo)
3490 add %l7, %o0, %o0, %tgd_add (foo)
3491 while Sun as/ld does not. */
3492 if (TARGET_GNU_TLS || !TARGET_TLS)
3493 return 1;
3495 pat = PATTERN (trial);
3497 /* We must reject tgd_add{32|64}, i.e.
3498 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3499 and tldm_add{32|64}, i.e.
3500 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3501 for Sun as/ld. */
3502 if (GET_CODE (pat) == SET
3503 && GET_CODE (SET_SRC (pat)) == PLUS)
3505 rtx unspec = XEXP (SET_SRC (pat), 1);
3507 if (GET_CODE (unspec) == UNSPEC
3508 && (XINT (unspec, 1) == UNSPEC_TLSGD
3509 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3510 return 0;
3513 return 1;
3516 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3517 instruction. RETURN_P is true if the v9 variant 'return' is to be
3518 considered in the test too.
3520 TRIAL must be a SET whose destination is a REG appropriate for the
3521 'restore' instruction or, if RETURN_P is true, for the 'return'
3522 instruction. */
3524 static int
3525 eligible_for_restore_insn (rtx trial, bool return_p)
3527 rtx pat = PATTERN (trial);
3528 rtx src = SET_SRC (pat);
3529 bool src_is_freg = false;
3530 rtx src_reg;
3532 /* Since we now can do moves between float and integer registers when
3533 VIS3 is enabled, we have to catch this case. We can allow such
3534 moves when doing a 'return' however. */
3535 src_reg = src;
3536 if (GET_CODE (src_reg) == SUBREG)
3537 src_reg = SUBREG_REG (src_reg);
3538 if (GET_CODE (src_reg) == REG
3539 && SPARC_FP_REG_P (REGNO (src_reg)))
3540 src_is_freg = true;
3542 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3543 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3544 && arith_operand (src, GET_MODE (src))
3545 && ! src_is_freg)
3547 if (TARGET_ARCH64)
3548 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3549 else
3550 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3553 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3554 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3555 && arith_double_operand (src, GET_MODE (src))
3556 && ! src_is_freg)
3557 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3559 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3560 else if (! TARGET_FPU && register_operand (src, SFmode))
3561 return 1;
3563 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3564 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3565 return 1;
3567 /* If we have the 'return' instruction, anything that does not use
3568 local or output registers and can go into a delay slot wins. */
3569 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3570 return 1;
3572 /* The 'restore src1,src2,dest' pattern for SImode. */
3573 else if (GET_CODE (src) == PLUS
3574 && register_operand (XEXP (src, 0), SImode)
3575 && arith_operand (XEXP (src, 1), SImode))
3576 return 1;
3578 /* The 'restore src1,src2,dest' pattern for DImode. */
3579 else if (GET_CODE (src) == PLUS
3580 && register_operand (XEXP (src, 0), DImode)
3581 && arith_double_operand (XEXP (src, 1), DImode))
3582 return 1;
3584 /* The 'restore src1,%lo(src2),dest' pattern. */
3585 else if (GET_CODE (src) == LO_SUM
3586 && ! TARGET_CM_MEDMID
3587 && ((register_operand (XEXP (src, 0), SImode)
3588 && immediate_operand (XEXP (src, 1), SImode))
3589 || (TARGET_ARCH64
3590 && register_operand (XEXP (src, 0), DImode)
3591 && immediate_operand (XEXP (src, 1), DImode))))
3592 return 1;
3594 /* The 'restore src,src,dest' pattern. */
3595 else if (GET_CODE (src) == ASHIFT
3596 && (register_operand (XEXP (src, 0), SImode)
3597 || register_operand (XEXP (src, 0), DImode))
3598 && XEXP (src, 1) == const1_rtx)
3599 return 1;
3601 return 0;
3604 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3607 eligible_for_return_delay (rtx trial)
3609 int regno;
3610 rtx pat;
3612 /* If the function uses __builtin_eh_return, the eh_return machinery
3613 occupies the delay slot. */
3614 if (crtl->calls_eh_return)
3615 return 0;
3617 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3618 return 0;
3620 /* In the case of a leaf or flat function, anything can go into the slot. */
3621 if (sparc_leaf_function_p || TARGET_FLAT)
3622 return 1;
3624 if (!NONJUMP_INSN_P (trial))
3625 return 0;
3627 pat = PATTERN (trial);
3628 if (GET_CODE (pat) == PARALLEL)
3630 int i;
3632 if (! TARGET_V9)
3633 return 0;
3634 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3636 rtx expr = XVECEXP (pat, 0, i);
3637 if (GET_CODE (expr) != SET)
3638 return 0;
3639 if (GET_CODE (SET_DEST (expr)) != REG)
3640 return 0;
3641 regno = REGNO (SET_DEST (expr));
3642 if (regno >= 8 && regno < 24)
3643 return 0;
3645 return !epilogue_renumber (&pat, 1);
3648 if (GET_CODE (pat) != SET)
3649 return 0;
3651 if (GET_CODE (SET_DEST (pat)) != REG)
3652 return 0;
3654 regno = REGNO (SET_DEST (pat));
3656 /* Otherwise, only operations which can be done in tandem with
3657 a `restore' or `return' insn can go into the delay slot. */
3658 if (regno >= 8 && regno < 24)
3659 return 0;
3661 /* If this instruction sets up floating point register and we have a return
3662 instruction, it can probably go in. But restore will not work
3663 with FP_REGS. */
3664 if (! SPARC_INT_REG_P (regno))
3665 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3667 return eligible_for_restore_insn (trial, true);
3670 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3673 eligible_for_sibcall_delay (rtx trial)
3675 rtx pat;
3677 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3678 return 0;
3680 if (!NONJUMP_INSN_P (trial))
3681 return 0;
3683 pat = PATTERN (trial);
3685 if (sparc_leaf_function_p || TARGET_FLAT)
3687 /* If the tail call is done using the call instruction,
3688 we have to restore %o7 in the delay slot. */
3689 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3690 return 0;
3692 /* %g1 is used to build the function address */
3693 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3694 return 0;
3696 return 1;
3699 if (GET_CODE (pat) != SET)
3700 return 0;
3702 /* Otherwise, only operations which can be done in tandem with
3703 a `restore' insn can go into the delay slot. */
3704 if (GET_CODE (SET_DEST (pat)) != REG
3705 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3706 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3707 return 0;
3709 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3710 in most cases. */
3711 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3712 return 0;
3714 return eligible_for_restore_insn (trial, false);
3717 /* Determine if it's legal to put X into the constant pool. This
3718 is not possible if X contains the address of a symbol that is
3719 not constant (TLS) or not known at final link time (PIC). */
3721 static bool
3722 sparc_cannot_force_const_mem (enum machine_mode mode, rtx x)
3724 switch (GET_CODE (x))
3726 case CONST_INT:
3727 case CONST_DOUBLE:
3728 case CONST_VECTOR:
3729 /* Accept all non-symbolic constants. */
3730 return false;
3732 case LABEL_REF:
3733 /* Labels are OK iff we are non-PIC. */
3734 return flag_pic != 0;
3736 case SYMBOL_REF:
3737 /* 'Naked' TLS symbol references are never OK,
3738 non-TLS symbols are OK iff we are non-PIC. */
3739 if (SYMBOL_REF_TLS_MODEL (x))
3740 return true;
3741 else
3742 return flag_pic != 0;
3744 case CONST:
3745 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3746 case PLUS:
3747 case MINUS:
3748 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3749 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3750 case UNSPEC:
3751 return true;
3752 default:
3753 gcc_unreachable ();
3757 /* Global Offset Table support. */
3758 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3759 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3761 /* Return the SYMBOL_REF for the Global Offset Table. */
3763 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3765 static rtx
3766 sparc_got (void)
3768 if (!sparc_got_symbol)
3769 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3771 return sparc_got_symbol;
3774 /* Ensure that we are not using patterns that are not OK with PIC. */
3777 check_pic (int i)
3779 rtx op;
3781 switch (flag_pic)
3783 case 1:
3784 op = recog_data.operand[i];
3785 gcc_assert (GET_CODE (op) != SYMBOL_REF
3786 && (GET_CODE (op) != CONST
3787 || (GET_CODE (XEXP (op, 0)) == MINUS
3788 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3789 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3790 case 2:
3791 default:
3792 return 1;
3796 /* Return true if X is an address which needs a temporary register when
3797 reloaded while generating PIC code. */
3800 pic_address_needs_scratch (rtx x)
3802 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3803 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3804 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3805 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3806 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3807 return 1;
3809 return 0;
3812 /* Determine if a given RTX is a valid constant. We already know this
3813 satisfies CONSTANT_P. */
3815 static bool
3816 sparc_legitimate_constant_p (enum machine_mode mode, rtx x)
3818 switch (GET_CODE (x))
3820 case CONST:
3821 case SYMBOL_REF:
3822 if (sparc_tls_referenced_p (x))
3823 return false;
3824 break;
3826 case CONST_DOUBLE:
3827 if (GET_MODE (x) == VOIDmode)
3828 return true;
3830 /* Floating point constants are generally not ok.
3831 The only exception is 0.0 and all-ones in VIS. */
3832 if (TARGET_VIS
3833 && SCALAR_FLOAT_MODE_P (mode)
3834 && (const_zero_operand (x, mode)
3835 || const_all_ones_operand (x, mode)))
3836 return true;
3838 return false;
3840 case CONST_VECTOR:
3841 /* Vector constants are generally not ok.
3842 The only exception is 0 or -1 in VIS. */
3843 if (TARGET_VIS
3844 && (const_zero_operand (x, mode)
3845 || const_all_ones_operand (x, mode)))
3846 return true;
3848 return false;
3850 default:
3851 break;
3854 return true;
3857 /* Determine if a given RTX is a valid constant address. */
3859 bool
3860 constant_address_p (rtx x)
3862 switch (GET_CODE (x))
3864 case LABEL_REF:
3865 case CONST_INT:
3866 case HIGH:
3867 return true;
3869 case CONST:
3870 if (flag_pic && pic_address_needs_scratch (x))
3871 return false;
3872 return sparc_legitimate_constant_p (Pmode, x);
3874 case SYMBOL_REF:
3875 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3877 default:
3878 return false;
3882 /* Nonzero if the constant value X is a legitimate general operand
3883 when generating PIC code. It is given that flag_pic is on and
3884 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
3886 bool
3887 legitimate_pic_operand_p (rtx x)
3889 if (pic_address_needs_scratch (x))
3890 return false;
3891 if (sparc_tls_referenced_p (x))
3892 return false;
3893 return true;
3896 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3897 (CONST_INT_P (X) \
3898 && INTVAL (X) >= -0x1000 \
3899 && INTVAL (X) < (0x1000 - GET_MODE_SIZE (MODE)))
3901 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3902 (CONST_INT_P (X) \
3903 && INTVAL (X) >= -0x1000 \
3904 && INTVAL (X) < (0xc00 - GET_MODE_SIZE (MODE)))
3906 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3908 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3909 ordinarily. This changes a bit when generating PIC. */
3911 static bool
3912 sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3914 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3916 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3917 rs1 = addr;
3918 else if (GET_CODE (addr) == PLUS)
3920 rs1 = XEXP (addr, 0);
3921 rs2 = XEXP (addr, 1);
3923 /* Canonicalize. REG comes first, if there are no regs,
3924 LO_SUM comes first. */
3925 if (!REG_P (rs1)
3926 && GET_CODE (rs1) != SUBREG
3927 && (REG_P (rs2)
3928 || GET_CODE (rs2) == SUBREG
3929 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3931 rs1 = XEXP (addr, 1);
3932 rs2 = XEXP (addr, 0);
3935 if ((flag_pic == 1
3936 && rs1 == pic_offset_table_rtx
3937 && !REG_P (rs2)
3938 && GET_CODE (rs2) != SUBREG
3939 && GET_CODE (rs2) != LO_SUM
3940 && GET_CODE (rs2) != MEM
3941 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3942 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3943 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3944 || ((REG_P (rs1)
3945 || GET_CODE (rs1) == SUBREG)
3946 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3948 imm1 = rs2;
3949 rs2 = NULL;
3951 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3952 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3954 /* We prohibit REG + REG for TFmode when there are no quad move insns
3955 and we consequently need to split. We do this because REG+REG
3956 is not an offsettable address. If we get the situation in reload
3957 where source and destination of a movtf pattern are both MEMs with
3958 REG+REG address, then only one of them gets converted to an
3959 offsettable address. */
3960 if (mode == TFmode
3961 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
3962 return 0;
3964 /* Likewise for TImode, but in all cases. */
3965 if (mode == TImode)
3966 return 0;
3968 /* We prohibit REG + REG on ARCH32 if not optimizing for
3969 DFmode/DImode because then mem_min_alignment is likely to be zero
3970 after reload and the forced split would lack a matching splitter
3971 pattern. */
3972 if (TARGET_ARCH32 && !optimize
3973 && (mode == DFmode || mode == DImode))
3974 return 0;
3976 else if (USE_AS_OFFSETABLE_LO10
3977 && GET_CODE (rs1) == LO_SUM
3978 && TARGET_ARCH64
3979 && ! TARGET_CM_MEDMID
3980 && RTX_OK_FOR_OLO10_P (rs2, mode))
3982 rs2 = NULL;
3983 imm1 = XEXP (rs1, 1);
3984 rs1 = XEXP (rs1, 0);
3985 if (!CONSTANT_P (imm1)
3986 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3987 return 0;
3990 else if (GET_CODE (addr) == LO_SUM)
3992 rs1 = XEXP (addr, 0);
3993 imm1 = XEXP (addr, 1);
3995 if (!CONSTANT_P (imm1)
3996 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3997 return 0;
3999 /* We can't allow TFmode in 32-bit mode, because an offset greater
4000 than the alignment (8) may cause the LO_SUM to overflow. */
4001 if (mode == TFmode && TARGET_ARCH32)
4002 return 0;
4004 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4005 return 1;
4006 else
4007 return 0;
4009 if (GET_CODE (rs1) == SUBREG)
4010 rs1 = SUBREG_REG (rs1);
4011 if (!REG_P (rs1))
4012 return 0;
4014 if (rs2)
4016 if (GET_CODE (rs2) == SUBREG)
4017 rs2 = SUBREG_REG (rs2);
4018 if (!REG_P (rs2))
4019 return 0;
4022 if (strict)
4024 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4025 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4026 return 0;
4028 else
4030 if ((! SPARC_INT_REG_P (REGNO (rs1))
4031 && REGNO (rs1) != FRAME_POINTER_REGNUM
4032 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4033 || (rs2
4034 && (! SPARC_INT_REG_P (REGNO (rs2))
4035 && REGNO (rs2) != FRAME_POINTER_REGNUM
4036 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4037 return 0;
4039 return 1;
4042 /* Return the SYMBOL_REF for the tls_get_addr function. */
4044 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4046 static rtx
4047 sparc_tls_get_addr (void)
4049 if (!sparc_tls_symbol)
4050 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4052 return sparc_tls_symbol;
4055 /* Return the Global Offset Table to be used in TLS mode. */
4057 static rtx
4058 sparc_tls_got (void)
4060 /* In PIC mode, this is just the PIC offset table. */
4061 if (flag_pic)
4063 crtl->uses_pic_offset_table = 1;
4064 return pic_offset_table_rtx;
4067 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4068 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4069 if (TARGET_SUN_TLS && TARGET_ARCH32)
4071 load_got_register ();
4072 return global_offset_table_rtx;
4075 /* In all other cases, we load a new pseudo with the GOT symbol. */
4076 return copy_to_reg (sparc_got ());
4079 /* Return true if X contains a thread-local symbol. */
4081 static bool
4082 sparc_tls_referenced_p (rtx x)
4084 if (!TARGET_HAVE_TLS)
4085 return false;
4087 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4088 x = XEXP (XEXP (x, 0), 0);
4090 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4091 return true;
4093 /* That's all we handle in sparc_legitimize_tls_address for now. */
4094 return false;
4097 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4098 this (thread-local) address. */
4100 static rtx
4101 sparc_legitimize_tls_address (rtx addr)
4103 rtx temp1, temp2, temp3, ret, o0, got;
4104 rtx_insn *insn;
4106 gcc_assert (can_create_pseudo_p ());
4108 if (GET_CODE (addr) == SYMBOL_REF)
4109 switch (SYMBOL_REF_TLS_MODEL (addr))
4111 case TLS_MODEL_GLOBAL_DYNAMIC:
4112 start_sequence ();
4113 temp1 = gen_reg_rtx (SImode);
4114 temp2 = gen_reg_rtx (SImode);
4115 ret = gen_reg_rtx (Pmode);
4116 o0 = gen_rtx_REG (Pmode, 8);
4117 got = sparc_tls_got ();
4118 emit_insn (gen_tgd_hi22 (temp1, addr));
4119 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4120 if (TARGET_ARCH32)
4122 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4123 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4124 addr, const1_rtx));
4126 else
4128 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4129 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4130 addr, const1_rtx));
4132 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4133 insn = get_insns ();
4134 end_sequence ();
4135 emit_libcall_block (insn, ret, o0, addr);
4136 break;
4138 case TLS_MODEL_LOCAL_DYNAMIC:
4139 start_sequence ();
4140 temp1 = gen_reg_rtx (SImode);
4141 temp2 = gen_reg_rtx (SImode);
4142 temp3 = gen_reg_rtx (Pmode);
4143 ret = gen_reg_rtx (Pmode);
4144 o0 = gen_rtx_REG (Pmode, 8);
4145 got = sparc_tls_got ();
4146 emit_insn (gen_tldm_hi22 (temp1));
4147 emit_insn (gen_tldm_lo10 (temp2, temp1));
4148 if (TARGET_ARCH32)
4150 emit_insn (gen_tldm_add32 (o0, got, temp2));
4151 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4152 const1_rtx));
4154 else
4156 emit_insn (gen_tldm_add64 (o0, got, temp2));
4157 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4158 const1_rtx));
4160 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4161 insn = get_insns ();
4162 end_sequence ();
4163 emit_libcall_block (insn, temp3, o0,
4164 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4165 UNSPEC_TLSLD_BASE));
4166 temp1 = gen_reg_rtx (SImode);
4167 temp2 = gen_reg_rtx (SImode);
4168 emit_insn (gen_tldo_hix22 (temp1, addr));
4169 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4170 if (TARGET_ARCH32)
4171 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4172 else
4173 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4174 break;
4176 case TLS_MODEL_INITIAL_EXEC:
4177 temp1 = gen_reg_rtx (SImode);
4178 temp2 = gen_reg_rtx (SImode);
4179 temp3 = gen_reg_rtx (Pmode);
4180 got = sparc_tls_got ();
4181 emit_insn (gen_tie_hi22 (temp1, addr));
4182 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4183 if (TARGET_ARCH32)
4184 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4185 else
4186 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4187 if (TARGET_SUN_TLS)
4189 ret = gen_reg_rtx (Pmode);
4190 if (TARGET_ARCH32)
4191 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4192 temp3, addr));
4193 else
4194 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4195 temp3, addr));
4197 else
4198 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4199 break;
4201 case TLS_MODEL_LOCAL_EXEC:
4202 temp1 = gen_reg_rtx (Pmode);
4203 temp2 = gen_reg_rtx (Pmode);
4204 if (TARGET_ARCH32)
4206 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4207 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4209 else
4211 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4212 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4214 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4215 break;
4217 default:
4218 gcc_unreachable ();
4221 else if (GET_CODE (addr) == CONST)
4223 rtx base, offset;
4225 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4227 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4228 offset = XEXP (XEXP (addr, 0), 1);
4230 base = force_operand (base, NULL_RTX);
4231 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4232 offset = force_reg (Pmode, offset);
4233 ret = gen_rtx_PLUS (Pmode, base, offset);
4236 else
4237 gcc_unreachable (); /* for now ... */
4239 return ret;
4242 /* Legitimize PIC addresses. If the address is already position-independent,
4243 we return ORIG. Newly generated position-independent addresses go into a
4244 reg. This is REG if nonzero, otherwise we allocate register(s) as
4245 necessary. */
4247 static rtx
4248 sparc_legitimize_pic_address (rtx orig, rtx reg)
4250 bool gotdata_op = false;
4252 if (GET_CODE (orig) == SYMBOL_REF
4253 /* See the comment in sparc_expand_move. */
4254 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4256 rtx pic_ref, address;
4257 rtx_insn *insn;
4259 if (reg == 0)
4261 gcc_assert (can_create_pseudo_p ());
4262 reg = gen_reg_rtx (Pmode);
4265 if (flag_pic == 2)
4267 /* If not during reload, allocate another temp reg here for loading
4268 in the address, so that these instructions can be optimized
4269 properly. */
4270 rtx temp_reg = (! can_create_pseudo_p ()
4271 ? reg : gen_reg_rtx (Pmode));
4273 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4274 won't get confused into thinking that these two instructions
4275 are loading in the true address of the symbol. If in the
4276 future a PIC rtx exists, that should be used instead. */
4277 if (TARGET_ARCH64)
4279 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4280 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4282 else
4284 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4285 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4287 address = temp_reg;
4288 gotdata_op = true;
4290 else
4291 address = orig;
4293 crtl->uses_pic_offset_table = 1;
4294 if (gotdata_op)
4296 if (TARGET_ARCH64)
4297 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4298 pic_offset_table_rtx,
4299 address, orig));
4300 else
4301 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4302 pic_offset_table_rtx,
4303 address, orig));
4305 else
4307 pic_ref
4308 = gen_const_mem (Pmode,
4309 gen_rtx_PLUS (Pmode,
4310 pic_offset_table_rtx, address));
4311 insn = emit_move_insn (reg, pic_ref);
4314 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4315 by loop. */
4316 set_unique_reg_note (insn, REG_EQUAL, orig);
4317 return reg;
4319 else if (GET_CODE (orig) == CONST)
4321 rtx base, offset;
4323 if (GET_CODE (XEXP (orig, 0)) == PLUS
4324 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4325 return orig;
4327 if (reg == 0)
4329 gcc_assert (can_create_pseudo_p ());
4330 reg = gen_reg_rtx (Pmode);
4333 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4334 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4335 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4336 base == reg ? NULL_RTX : reg);
4338 if (GET_CODE (offset) == CONST_INT)
4340 if (SMALL_INT (offset))
4341 return plus_constant (Pmode, base, INTVAL (offset));
4342 else if (can_create_pseudo_p ())
4343 offset = force_reg (Pmode, offset);
4344 else
4345 /* If we reach here, then something is seriously wrong. */
4346 gcc_unreachable ();
4348 return gen_rtx_PLUS (Pmode, base, offset);
4350 else if (GET_CODE (orig) == LABEL_REF)
4351 /* ??? We ought to be checking that the register is live instead, in case
4352 it is eliminated. */
4353 crtl->uses_pic_offset_table = 1;
4355 return orig;
4358 /* Try machine-dependent ways of modifying an illegitimate address X
4359 to be legitimate. If we find one, return the new, valid address.
4361 OLDX is the address as it was before break_out_memory_refs was called.
4362 In some cases it is useful to look at this to decide what needs to be done.
4364 MODE is the mode of the operand pointed to by X.
4366 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4368 static rtx
4369 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4370 enum machine_mode mode)
4372 rtx orig_x = x;
4374 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4375 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4376 force_operand (XEXP (x, 0), NULL_RTX));
4377 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4378 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4379 force_operand (XEXP (x, 1), NULL_RTX));
4380 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4381 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4382 XEXP (x, 1));
4383 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4384 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4385 force_operand (XEXP (x, 1), NULL_RTX));
4387 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4388 return x;
4390 if (sparc_tls_referenced_p (x))
4391 x = sparc_legitimize_tls_address (x);
4392 else if (flag_pic)
4393 x = sparc_legitimize_pic_address (x, NULL_RTX);
4394 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4395 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4396 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4397 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4398 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4399 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4400 else if (GET_CODE (x) == SYMBOL_REF
4401 || GET_CODE (x) == CONST
4402 || GET_CODE (x) == LABEL_REF)
4403 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4405 return x;
4408 /* Delegitimize an address that was legitimized by the above function. */
4410 static rtx
4411 sparc_delegitimize_address (rtx x)
4413 x = delegitimize_mem_from_attrs (x);
4415 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4416 switch (XINT (XEXP (x, 1), 1))
4418 case UNSPEC_MOVE_PIC:
4419 case UNSPEC_TLSLE:
4420 x = XVECEXP (XEXP (x, 1), 0, 0);
4421 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4422 break;
4423 default:
4424 break;
4427 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4428 if (GET_CODE (x) == MINUS
4429 && REG_P (XEXP (x, 0))
4430 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4431 && GET_CODE (XEXP (x, 1)) == LO_SUM
4432 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4433 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4435 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4436 gcc_assert (GET_CODE (x) == LABEL_REF);
4439 return x;
4442 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4443 replace the input X, or the original X if no replacement is called for.
4444 The output parameter *WIN is 1 if the calling macro should goto WIN,
4445 0 if it should not.
4447 For SPARC, we wish to handle addresses by splitting them into
4448 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4449 This cuts the number of extra insns by one.
4451 Do nothing when generating PIC code and the address is a symbolic
4452 operand or requires a scratch register. */
4455 sparc_legitimize_reload_address (rtx x, enum machine_mode mode,
4456 int opnum, int type,
4457 int ind_levels ATTRIBUTE_UNUSED, int *win)
4459 /* Decompose SImode constants into HIGH+LO_SUM. */
4460 if (CONSTANT_P (x)
4461 && (mode != TFmode || TARGET_ARCH64)
4462 && GET_MODE (x) == SImode
4463 && GET_CODE (x) != LO_SUM
4464 && GET_CODE (x) != HIGH
4465 && sparc_cmodel <= CM_MEDLOW
4466 && !(flag_pic
4467 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4469 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4470 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4471 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4472 opnum, (enum reload_type)type);
4473 *win = 1;
4474 return x;
4477 /* We have to recognize what we have already generated above. */
4478 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4480 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4481 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4482 opnum, (enum reload_type)type);
4483 *win = 1;
4484 return x;
4487 *win = 0;
4488 return x;
4491 /* Return true if ADDR (a legitimate address expression)
4492 has an effect that depends on the machine mode it is used for.
4494 In PIC mode,
4496 (mem:HI [%l7+a])
4498 is not equivalent to
4500 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4502 because [%l7+a+1] is interpreted as the address of (a+1). */
4505 static bool
4506 sparc_mode_dependent_address_p (const_rtx addr,
4507 addr_space_t as ATTRIBUTE_UNUSED)
4509 if (flag_pic && GET_CODE (addr) == PLUS)
4511 rtx op0 = XEXP (addr, 0);
4512 rtx op1 = XEXP (addr, 1);
4513 if (op0 == pic_offset_table_rtx
4514 && symbolic_operand (op1, VOIDmode))
4515 return true;
4518 return false;
4521 #ifdef HAVE_GAS_HIDDEN
4522 # define USE_HIDDEN_LINKONCE 1
4523 #else
4524 # define USE_HIDDEN_LINKONCE 0
4525 #endif
4527 static void
4528 get_pc_thunk_name (char name[32], unsigned int regno)
4530 const char *reg_name = reg_names[regno];
4532 /* Skip the leading '%' as that cannot be used in a
4533 symbol name. */
4534 reg_name += 1;
4536 if (USE_HIDDEN_LINKONCE)
4537 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4538 else
4539 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4542 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4544 static rtx
4545 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4547 int orig_flag_pic = flag_pic;
4548 rtx insn;
4550 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4551 flag_pic = 0;
4552 if (TARGET_ARCH64)
4553 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4554 else
4555 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4556 flag_pic = orig_flag_pic;
4558 return insn;
4561 /* Emit code to load the GOT register. */
4563 void
4564 load_got_register (void)
4566 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4567 if (!global_offset_table_rtx)
4568 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4570 if (TARGET_VXWORKS_RTP)
4571 emit_insn (gen_vxworks_load_got ());
4572 else
4574 /* The GOT symbol is subject to a PC-relative relocation so we need a
4575 helper function to add the PC value and thus get the final value. */
4576 if (!got_helper_rtx)
4578 char name[32];
4579 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4580 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4583 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4584 got_helper_rtx,
4585 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4588 /* Need to emit this whether or not we obey regdecls,
4589 since setjmp/longjmp can cause life info to screw up.
4590 ??? In the case where we don't obey regdecls, this is not sufficient
4591 since we may not fall out the bottom. */
4592 emit_use (global_offset_table_rtx);
4595 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4596 address of the call target. */
4598 void
4599 sparc_emit_call_insn (rtx pat, rtx addr)
4601 rtx_insn *insn;
4603 insn = emit_call_insn (pat);
4605 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4606 if (TARGET_VXWORKS_RTP
4607 && flag_pic
4608 && GET_CODE (addr) == SYMBOL_REF
4609 && (SYMBOL_REF_DECL (addr)
4610 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4611 : !SYMBOL_REF_LOCAL_P (addr)))
4613 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4614 crtl->uses_pic_offset_table = 1;
4618 /* Return 1 if RTX is a MEM which is known to be aligned to at
4619 least a DESIRED byte boundary. */
4622 mem_min_alignment (rtx mem, int desired)
4624 rtx addr, base, offset;
4626 /* If it's not a MEM we can't accept it. */
4627 if (GET_CODE (mem) != MEM)
4628 return 0;
4630 /* Obviously... */
4631 if (!TARGET_UNALIGNED_DOUBLES
4632 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4633 return 1;
4635 /* ??? The rest of the function predates MEM_ALIGN so
4636 there is probably a bit of redundancy. */
4637 addr = XEXP (mem, 0);
4638 base = offset = NULL_RTX;
4639 if (GET_CODE (addr) == PLUS)
4641 if (GET_CODE (XEXP (addr, 0)) == REG)
4643 base = XEXP (addr, 0);
4645 /* What we are saying here is that if the base
4646 REG is aligned properly, the compiler will make
4647 sure any REG based index upon it will be so
4648 as well. */
4649 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4650 offset = XEXP (addr, 1);
4651 else
4652 offset = const0_rtx;
4655 else if (GET_CODE (addr) == REG)
4657 base = addr;
4658 offset = const0_rtx;
4661 if (base != NULL_RTX)
4663 int regno = REGNO (base);
4665 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4667 /* Check if the compiler has recorded some information
4668 about the alignment of the base REG. If reload has
4669 completed, we already matched with proper alignments.
4670 If not running global_alloc, reload might give us
4671 unaligned pointer to local stack though. */
4672 if (((cfun != 0
4673 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4674 || (optimize && reload_completed))
4675 && (INTVAL (offset) & (desired - 1)) == 0)
4676 return 1;
4678 else
4680 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4681 return 1;
4684 else if (! TARGET_UNALIGNED_DOUBLES
4685 || CONSTANT_P (addr)
4686 || GET_CODE (addr) == LO_SUM)
4688 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4689 is true, in which case we can only assume that an access is aligned if
4690 it is to a constant address, or the address involves a LO_SUM. */
4691 return 1;
4694 /* An obviously unaligned address. */
4695 return 0;
4699 /* Vectors to keep interesting information about registers where it can easily
4700 be got. We used to use the actual mode value as the bit number, but there
4701 are more than 32 modes now. Instead we use two tables: one indexed by
4702 hard register number, and one indexed by mode. */
4704 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4705 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4706 mapped into one sparc_mode_class mode. */
4708 enum sparc_mode_class {
4709 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4710 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4711 CC_MODE, CCFP_MODE
4714 /* Modes for single-word and smaller quantities. */
4715 #define S_MODES \
4716 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4718 /* Modes for double-word and smaller quantities. */
4719 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4721 /* Modes for quad-word and smaller quantities. */
4722 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4724 /* Modes for 8-word and smaller quantities. */
4725 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4727 /* Modes for single-float quantities. */
4728 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4730 /* Modes for double-float and smaller quantities. */
4731 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4733 /* Modes for quad-float and smaller quantities. */
4734 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4736 /* Modes for quad-float pairs and smaller quantities. */
4737 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4739 /* Modes for double-float only quantities. */
4740 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4742 /* Modes for quad-float and double-float only quantities. */
4743 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4745 /* Modes for quad-float pairs and double-float only quantities. */
4746 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4748 /* Modes for condition codes. */
4749 #define CC_MODES (1 << (int) CC_MODE)
4750 #define CCFP_MODES (1 << (int) CCFP_MODE)
4752 /* Value is 1 if register/mode pair is acceptable on sparc.
4753 The funny mixture of D and T modes is because integer operations
4754 do not specially operate on tetra quantities, so non-quad-aligned
4755 registers can hold quadword quantities (except %o4 and %i4 because
4756 they cross fixed registers). */
4758 /* This points to either the 32 bit or the 64 bit version. */
4759 const int *hard_regno_mode_classes;
4761 static const int hard_32bit_mode_classes[] = {
4762 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4763 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4764 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4765 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4767 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4768 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4769 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4770 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4772 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4773 and none can hold SFmode/SImode values. */
4774 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4775 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4776 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4777 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4779 /* %fcc[0123] */
4780 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4782 /* %icc, %sfp, %gsr */
4783 CC_MODES, 0, D_MODES
4786 static const int hard_64bit_mode_classes[] = {
4787 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4788 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4789 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4790 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4792 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4793 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4794 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4795 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4797 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4798 and none can hold SFmode/SImode values. */
4799 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4800 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4801 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4802 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4804 /* %fcc[0123] */
4805 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4807 /* %icc, %sfp, %gsr */
4808 CC_MODES, 0, D_MODES
4811 int sparc_mode_class [NUM_MACHINE_MODES];
4813 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4815 static void
4816 sparc_init_modes (void)
4818 int i;
4820 for (i = 0; i < NUM_MACHINE_MODES; i++)
4822 enum machine_mode m = (enum machine_mode) i;
4823 unsigned int size = GET_MODE_SIZE (m);
4825 switch (GET_MODE_CLASS (m))
4827 case MODE_INT:
4828 case MODE_PARTIAL_INT:
4829 case MODE_COMPLEX_INT:
4830 if (size < 4)
4831 sparc_mode_class[i] = 1 << (int) H_MODE;
4832 else if (size == 4)
4833 sparc_mode_class[i] = 1 << (int) S_MODE;
4834 else if (size == 8)
4835 sparc_mode_class[i] = 1 << (int) D_MODE;
4836 else if (size == 16)
4837 sparc_mode_class[i] = 1 << (int) T_MODE;
4838 else if (size == 32)
4839 sparc_mode_class[i] = 1 << (int) O_MODE;
4840 else
4841 sparc_mode_class[i] = 0;
4842 break;
4843 case MODE_VECTOR_INT:
4844 if (size == 4)
4845 sparc_mode_class[i] = 1 << (int) SF_MODE;
4846 else if (size == 8)
4847 sparc_mode_class[i] = 1 << (int) DF_MODE;
4848 else
4849 sparc_mode_class[i] = 0;
4850 break;
4851 case MODE_FLOAT:
4852 case MODE_COMPLEX_FLOAT:
4853 if (size == 4)
4854 sparc_mode_class[i] = 1 << (int) SF_MODE;
4855 else if (size == 8)
4856 sparc_mode_class[i] = 1 << (int) DF_MODE;
4857 else if (size == 16)
4858 sparc_mode_class[i] = 1 << (int) TF_MODE;
4859 else if (size == 32)
4860 sparc_mode_class[i] = 1 << (int) OF_MODE;
4861 else
4862 sparc_mode_class[i] = 0;
4863 break;
4864 case MODE_CC:
4865 if (m == CCFPmode || m == CCFPEmode)
4866 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4867 else
4868 sparc_mode_class[i] = 1 << (int) CC_MODE;
4869 break;
4870 default:
4871 sparc_mode_class[i] = 0;
4872 break;
4876 if (TARGET_ARCH64)
4877 hard_regno_mode_classes = hard_64bit_mode_classes;
4878 else
4879 hard_regno_mode_classes = hard_32bit_mode_classes;
4881 /* Initialize the array used by REGNO_REG_CLASS. */
4882 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4884 if (i < 16 && TARGET_V8PLUS)
4885 sparc_regno_reg_class[i] = I64_REGS;
4886 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4887 sparc_regno_reg_class[i] = GENERAL_REGS;
4888 else if (i < 64)
4889 sparc_regno_reg_class[i] = FP_REGS;
4890 else if (i < 96)
4891 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4892 else if (i < 100)
4893 sparc_regno_reg_class[i] = FPCC_REGS;
4894 else
4895 sparc_regno_reg_class[i] = NO_REGS;
4899 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4901 static inline bool
4902 save_global_or_fp_reg_p (unsigned int regno,
4903 int leaf_function ATTRIBUTE_UNUSED)
4905 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4908 /* Return whether the return address register (%i7) is needed. */
4910 static inline bool
4911 return_addr_reg_needed_p (int leaf_function)
4913 /* If it is live, for example because of __builtin_return_address (0). */
4914 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4915 return true;
4917 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4918 if (!leaf_function
4919 /* Loading the GOT register clobbers %o7. */
4920 || crtl->uses_pic_offset_table
4921 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4922 return true;
4924 return false;
4927 /* Return whether REGNO, a local or in register, must be saved/restored. */
4929 static bool
4930 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4932 /* General case: call-saved registers live at some point. */
4933 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4934 return true;
4936 /* Frame pointer register (%fp) if needed. */
4937 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4938 return true;
4940 /* Return address register (%i7) if needed. */
4941 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4942 return true;
4944 /* GOT register (%l7) if needed. */
4945 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
4946 return true;
4948 /* If the function accesses prior frames, the frame pointer and the return
4949 address of the previous frame must be saved on the stack. */
4950 if (crtl->accesses_prior_frames
4951 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
4952 return true;
4954 return false;
4957 /* Compute the frame size required by the function. This function is called
4958 during the reload pass and also by sparc_expand_prologue. */
4960 HOST_WIDE_INT
4961 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
4963 HOST_WIDE_INT frame_size, apparent_frame_size;
4964 int args_size, n_global_fp_regs = 0;
4965 bool save_local_in_regs_p = false;
4966 unsigned int i;
4968 /* If the function allocates dynamic stack space, the dynamic offset is
4969 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
4970 if (leaf_function && !cfun->calls_alloca)
4971 args_size = 0;
4972 else
4973 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
4975 /* Calculate space needed for global registers. */
4976 if (TARGET_ARCH64)
4977 for (i = 0; i < 8; i++)
4978 if (save_global_or_fp_reg_p (i, 0))
4979 n_global_fp_regs += 2;
4980 else
4981 for (i = 0; i < 8; i += 2)
4982 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4983 n_global_fp_regs += 2;
4985 /* In the flat window model, find out which local and in registers need to
4986 be saved. We don't reserve space in the current frame for them as they
4987 will be spilled into the register window save area of the caller's frame.
4988 However, as soon as we use this register window save area, we must create
4989 that of the current frame to make it the live one. */
4990 if (TARGET_FLAT)
4991 for (i = 16; i < 32; i++)
4992 if (save_local_or_in_reg_p (i, leaf_function))
4994 save_local_in_regs_p = true;
4995 break;
4998 /* Calculate space needed for FP registers. */
4999 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5000 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5001 n_global_fp_regs += 2;
5003 if (size == 0
5004 && n_global_fp_regs == 0
5005 && args_size == 0
5006 && !save_local_in_regs_p)
5007 frame_size = apparent_frame_size = 0;
5008 else
5010 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
5011 apparent_frame_size = (size - STARTING_FRAME_OFFSET + 7) & -8;
5012 apparent_frame_size += n_global_fp_regs * 4;
5014 /* We need to add the size of the outgoing argument area. */
5015 frame_size = apparent_frame_size + ((args_size + 7) & -8);
5017 /* And that of the register window save area. */
5018 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5020 /* Finally, bump to the appropriate alignment. */
5021 frame_size = SPARC_STACK_ALIGN (frame_size);
5024 /* Set up values for use in prologue and epilogue. */
5025 sparc_frame_size = frame_size;
5026 sparc_apparent_frame_size = apparent_frame_size;
5027 sparc_n_global_fp_regs = n_global_fp_regs;
5028 sparc_save_local_in_regs_p = save_local_in_regs_p;
5030 return frame_size;
5033 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5036 sparc_initial_elimination_offset (int to)
5038 int offset;
5040 if (to == STACK_POINTER_REGNUM)
5041 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5042 else
5043 offset = 0;
5045 offset += SPARC_STACK_BIAS;
5046 return offset;
5049 /* Output any necessary .register pseudo-ops. */
5051 void
5052 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5054 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5055 int i;
5057 if (TARGET_ARCH32)
5058 return;
5060 /* Check if %g[2367] were used without
5061 .register being printed for them already. */
5062 for (i = 2; i < 8; i++)
5064 if (df_regs_ever_live_p (i)
5065 && ! sparc_hard_reg_printed [i])
5067 sparc_hard_reg_printed [i] = 1;
5068 /* %g7 is used as TLS base register, use #ignore
5069 for it instead of #scratch. */
5070 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5071 i == 7 ? "ignore" : "scratch");
5073 if (i == 3) i = 5;
5075 #endif
5078 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5080 #if PROBE_INTERVAL > 4096
5081 #error Cannot use indexed addressing mode for stack probing
5082 #endif
5084 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5085 inclusive. These are offsets from the current stack pointer.
5087 Note that we don't use the REG+REG addressing mode for the probes because
5088 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5089 so the advantages of having a single code win here. */
5091 static void
5092 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5094 rtx g1 = gen_rtx_REG (Pmode, 1);
5096 /* See if we have a constant small number of probes to generate. If so,
5097 that's the easy case. */
5098 if (size <= PROBE_INTERVAL)
5100 emit_move_insn (g1, GEN_INT (first));
5101 emit_insn (gen_rtx_SET (VOIDmode, g1,
5102 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5103 emit_stack_probe (plus_constant (Pmode, g1, -size));
5106 /* The run-time loop is made up of 10 insns in the generic case while the
5107 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5108 else if (size <= 5 * PROBE_INTERVAL)
5110 HOST_WIDE_INT i;
5112 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5113 emit_insn (gen_rtx_SET (VOIDmode, g1,
5114 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5115 emit_stack_probe (g1);
5117 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5118 it exceeds SIZE. If only two probes are needed, this will not
5119 generate any code. Then probe at FIRST + SIZE. */
5120 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5122 emit_insn (gen_rtx_SET (VOIDmode, g1,
5123 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5124 emit_stack_probe (g1);
5127 emit_stack_probe (plus_constant (Pmode, g1,
5128 (i - PROBE_INTERVAL) - size));
5131 /* Otherwise, do the same as above, but in a loop. Note that we must be
5132 extra careful with variables wrapping around because we might be at
5133 the very top (or the very bottom) of the address space and we have
5134 to be able to handle this case properly; in particular, we use an
5135 equality test for the loop condition. */
5136 else
5138 HOST_WIDE_INT rounded_size;
5139 rtx g4 = gen_rtx_REG (Pmode, 4);
5141 emit_move_insn (g1, GEN_INT (first));
5144 /* Step 1: round SIZE to the previous multiple of the interval. */
5146 rounded_size = size & -PROBE_INTERVAL;
5147 emit_move_insn (g4, GEN_INT (rounded_size));
5150 /* Step 2: compute initial and final value of the loop counter. */
5152 /* TEST_ADDR = SP + FIRST. */
5153 emit_insn (gen_rtx_SET (VOIDmode, g1,
5154 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5156 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5157 emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4)));
5160 /* Step 3: the loop
5162 while (TEST_ADDR != LAST_ADDR)
5164 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5165 probe at TEST_ADDR
5168 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5169 until it is equal to ROUNDED_SIZE. */
5171 if (TARGET_ARCH64)
5172 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5173 else
5174 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5177 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5178 that SIZE is equal to ROUNDED_SIZE. */
5180 if (size != rounded_size)
5181 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5184 /* Make sure nothing is scheduled before we are done. */
5185 emit_insn (gen_blockage ());
5188 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5189 absolute addresses. */
5191 const char *
5192 output_probe_stack_range (rtx reg1, rtx reg2)
5194 static int labelno = 0;
5195 char loop_lab[32], end_lab[32];
5196 rtx xops[2];
5198 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
5199 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
5201 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5203 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
5204 xops[0] = reg1;
5205 xops[1] = reg2;
5206 output_asm_insn ("cmp\t%0, %1", xops);
5207 if (TARGET_ARCH64)
5208 fputs ("\tbe,pn\t%xcc,", asm_out_file);
5209 else
5210 fputs ("\tbe\t", asm_out_file);
5211 assemble_name_raw (asm_out_file, end_lab);
5212 fputc ('\n', asm_out_file);
5214 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5215 xops[1] = GEN_INT (-PROBE_INTERVAL);
5216 output_asm_insn (" add\t%0, %1, %0", xops);
5218 /* Probe at TEST_ADDR and branch. */
5219 if (TARGET_ARCH64)
5220 fputs ("\tba,pt\t%xcc,", asm_out_file);
5221 else
5222 fputs ("\tba\t", asm_out_file);
5223 assemble_name_raw (asm_out_file, loop_lab);
5224 fputc ('\n', asm_out_file);
5225 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5226 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5228 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
5230 return "";
5233 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5234 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5235 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5236 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5237 the action to be performed if it returns false. Return the new offset. */
5239 typedef bool (*sorr_pred_t) (unsigned int, int);
5240 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5242 static int
5243 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5244 int offset, int leaf_function, sorr_pred_t save_p,
5245 sorr_act_t action_true, sorr_act_t action_false)
5247 unsigned int i;
5248 rtx mem;
5249 rtx_insn *insn;
5251 if (TARGET_ARCH64 && high <= 32)
5253 int fp_offset = -1;
5255 for (i = low; i < high; i++)
5257 if (save_p (i, leaf_function))
5259 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5260 base, offset));
5261 if (action_true == SORR_SAVE)
5263 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5264 RTX_FRAME_RELATED_P (insn) = 1;
5266 else /* action_true == SORR_RESTORE */
5268 /* The frame pointer must be restored last since its old
5269 value may be used as base address for the frame. This
5270 is problematic in 64-bit mode only because of the lack
5271 of double-word load instruction. */
5272 if (i == HARD_FRAME_POINTER_REGNUM)
5273 fp_offset = offset;
5274 else
5275 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5277 offset += 8;
5279 else if (action_false == SORR_ADVANCE)
5280 offset += 8;
5283 if (fp_offset >= 0)
5285 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5286 emit_move_insn (hard_frame_pointer_rtx, mem);
5289 else
5291 for (i = low; i < high; i += 2)
5293 bool reg0 = save_p (i, leaf_function);
5294 bool reg1 = save_p (i + 1, leaf_function);
5295 enum machine_mode mode;
5296 int regno;
5298 if (reg0 && reg1)
5300 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5301 regno = i;
5303 else if (reg0)
5305 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5306 regno = i;
5308 else if (reg1)
5310 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5311 regno = i + 1;
5312 offset += 4;
5314 else
5316 if (action_false == SORR_ADVANCE)
5317 offset += 8;
5318 continue;
5321 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5322 if (action_true == SORR_SAVE)
5324 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5325 RTX_FRAME_RELATED_P (insn) = 1;
5326 if (mode == DImode)
5328 rtx set1, set2;
5329 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5330 offset));
5331 set1 = gen_rtx_SET (VOIDmode, mem,
5332 gen_rtx_REG (SImode, regno));
5333 RTX_FRAME_RELATED_P (set1) = 1;
5335 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5336 offset + 4));
5337 set2 = gen_rtx_SET (VOIDmode, mem,
5338 gen_rtx_REG (SImode, regno + 1));
5339 RTX_FRAME_RELATED_P (set2) = 1;
5340 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5341 gen_rtx_PARALLEL (VOIDmode,
5342 gen_rtvec (2, set1, set2)));
5345 else /* action_true == SORR_RESTORE */
5346 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5348 /* Always preserve double-word alignment. */
5349 offset = (offset + 8) & -8;
5353 return offset;
5356 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5358 static rtx
5359 emit_adjust_base_to_offset (rtx base, int offset)
5361 /* ??? This might be optimized a little as %g1 might already have a
5362 value close enough that a single add insn will do. */
5363 /* ??? Although, all of this is probably only a temporary fix because
5364 if %g1 can hold a function result, then sparc_expand_epilogue will
5365 lose (the result will be clobbered). */
5366 rtx new_base = gen_rtx_REG (Pmode, 1);
5367 emit_move_insn (new_base, GEN_INT (offset));
5368 emit_insn (gen_rtx_SET (VOIDmode,
5369 new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5370 return new_base;
5373 /* Emit code to save/restore call-saved global and FP registers. */
5375 static void
5376 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5378 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5380 base = emit_adjust_base_to_offset (base, offset);
5381 offset = 0;
5384 offset
5385 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5386 save_global_or_fp_reg_p, action, SORR_NONE);
5387 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5388 save_global_or_fp_reg_p, action, SORR_NONE);
5391 /* Emit code to save/restore call-saved local and in registers. */
5393 static void
5394 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5396 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5398 base = emit_adjust_base_to_offset (base, offset);
5399 offset = 0;
5402 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5403 save_local_or_in_reg_p, action, SORR_ADVANCE);
5406 /* Emit a window_save insn. */
5408 static rtx_insn *
5409 emit_window_save (rtx increment)
5411 rtx_insn *insn = emit_insn (gen_window_save (increment));
5412 RTX_FRAME_RELATED_P (insn) = 1;
5414 /* The incoming return address (%o7) is saved in %i7. */
5415 add_reg_note (insn, REG_CFA_REGISTER,
5416 gen_rtx_SET (VOIDmode,
5417 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5418 gen_rtx_REG (Pmode,
5419 INCOMING_RETURN_ADDR_REGNUM)));
5421 /* The window save event. */
5422 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5424 /* The CFA is %fp, the hard frame pointer. */
5425 add_reg_note (insn, REG_CFA_DEF_CFA,
5426 plus_constant (Pmode, hard_frame_pointer_rtx,
5427 INCOMING_FRAME_SP_OFFSET));
5429 return insn;
5432 /* Generate an increment for the stack pointer. */
5434 static rtx
5435 gen_stack_pointer_inc (rtx increment)
5437 return gen_rtx_SET (VOIDmode,
5438 stack_pointer_rtx,
5439 gen_rtx_PLUS (Pmode,
5440 stack_pointer_rtx,
5441 increment));
5444 /* Expand the function prologue. The prologue is responsible for reserving
5445 storage for the frame, saving the call-saved registers and loading the
5446 GOT register if needed. */
5448 void
5449 sparc_expand_prologue (void)
5451 HOST_WIDE_INT size;
5452 rtx_insn *insn;
5454 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5455 on the final value of the flag means deferring the prologue/epilogue
5456 expansion until just before the second scheduling pass, which is too
5457 late to emit multiple epilogues or return insns.
5459 Of course we are making the assumption that the value of the flag
5460 will not change between now and its final value. Of the three parts
5461 of the formula, only the last one can reasonably vary. Let's take a
5462 closer look, after assuming that the first two ones are set to true
5463 (otherwise the last value is effectively silenced).
5465 If only_leaf_regs_used returns false, the global predicate will also
5466 be false so the actual frame size calculated below will be positive.
5467 As a consequence, the save_register_window insn will be emitted in
5468 the instruction stream; now this insn explicitly references %fp
5469 which is not a leaf register so only_leaf_regs_used will always
5470 return false subsequently.
5472 If only_leaf_regs_used returns true, we hope that the subsequent
5473 optimization passes won't cause non-leaf registers to pop up. For
5474 example, the regrename pass has special provisions to not rename to
5475 non-leaf registers in a leaf function. */
5476 sparc_leaf_function_p
5477 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5479 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5481 if (flag_stack_usage_info)
5482 current_function_static_stack_size = size;
5484 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5486 if (crtl->is_leaf && !cfun->calls_alloca)
5488 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5489 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5490 size - STACK_CHECK_PROTECT);
5492 else if (size > 0)
5493 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5496 if (size == 0)
5497 ; /* do nothing. */
5498 else if (sparc_leaf_function_p)
5500 rtx size_int_rtx = GEN_INT (-size);
5502 if (size <= 4096)
5503 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5504 else if (size <= 8192)
5506 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5507 RTX_FRAME_RELATED_P (insn) = 1;
5509 /* %sp is still the CFA register. */
5510 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5512 else
5514 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5515 emit_move_insn (size_rtx, size_int_rtx);
5516 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5517 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5518 gen_stack_pointer_inc (size_int_rtx));
5521 RTX_FRAME_RELATED_P (insn) = 1;
5523 else
5525 rtx size_int_rtx = GEN_INT (-size);
5527 if (size <= 4096)
5528 emit_window_save (size_int_rtx);
5529 else if (size <= 8192)
5531 emit_window_save (GEN_INT (-4096));
5533 /* %sp is not the CFA register anymore. */
5534 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5536 /* Make sure no %fp-based store is issued until after the frame is
5537 established. The offset between the frame pointer and the stack
5538 pointer is calculated relative to the value of the stack pointer
5539 at the end of the function prologue, and moving instructions that
5540 access the stack via the frame pointer between the instructions
5541 that decrement the stack pointer could result in accessing the
5542 register window save area, which is volatile. */
5543 emit_insn (gen_frame_blockage ());
5545 else
5547 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5548 emit_move_insn (size_rtx, size_int_rtx);
5549 emit_window_save (size_rtx);
5553 if (sparc_leaf_function_p)
5555 sparc_frame_base_reg = stack_pointer_rtx;
5556 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5558 else
5560 sparc_frame_base_reg = hard_frame_pointer_rtx;
5561 sparc_frame_base_offset = SPARC_STACK_BIAS;
5564 if (sparc_n_global_fp_regs > 0)
5565 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5566 sparc_frame_base_offset
5567 - sparc_apparent_frame_size,
5568 SORR_SAVE);
5570 /* Load the GOT register if needed. */
5571 if (crtl->uses_pic_offset_table)
5572 load_got_register ();
5574 /* Advertise that the data calculated just above are now valid. */
5575 sparc_prologue_data_valid_p = true;
5578 /* Expand the function prologue. The prologue is responsible for reserving
5579 storage for the frame, saving the call-saved registers and loading the
5580 GOT register if needed. */
5582 void
5583 sparc_flat_expand_prologue (void)
5585 HOST_WIDE_INT size;
5586 rtx_insn *insn;
5588 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5590 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5592 if (flag_stack_usage_info)
5593 current_function_static_stack_size = size;
5595 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5597 if (crtl->is_leaf && !cfun->calls_alloca)
5599 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5600 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5601 size - STACK_CHECK_PROTECT);
5603 else if (size > 0)
5604 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5607 if (sparc_save_local_in_regs_p)
5608 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5609 SORR_SAVE);
5611 if (size == 0)
5612 ; /* do nothing. */
5613 else
5615 rtx size_int_rtx, size_rtx;
5617 size_rtx = size_int_rtx = GEN_INT (-size);
5619 /* We establish the frame (i.e. decrement the stack pointer) first, even
5620 if we use a frame pointer, because we cannot clobber any call-saved
5621 registers, including the frame pointer, if we haven't created a new
5622 register save area, for the sake of compatibility with the ABI. */
5623 if (size <= 4096)
5624 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5625 else if (size <= 8192 && !frame_pointer_needed)
5627 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5628 RTX_FRAME_RELATED_P (insn) = 1;
5629 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5631 else
5633 size_rtx = gen_rtx_REG (Pmode, 1);
5634 emit_move_insn (size_rtx, size_int_rtx);
5635 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5636 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5637 gen_stack_pointer_inc (size_int_rtx));
5639 RTX_FRAME_RELATED_P (insn) = 1;
5641 /* Ensure nothing is scheduled until after the frame is established. */
5642 emit_insn (gen_blockage ());
5644 if (frame_pointer_needed)
5646 insn = emit_insn (gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5647 gen_rtx_MINUS (Pmode,
5648 stack_pointer_rtx,
5649 size_rtx)));
5650 RTX_FRAME_RELATED_P (insn) = 1;
5652 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5653 gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5654 plus_constant (Pmode, stack_pointer_rtx,
5655 size)));
5658 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5660 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5661 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5663 insn = emit_move_insn (i7, o7);
5664 RTX_FRAME_RELATED_P (insn) = 1;
5666 add_reg_note (insn, REG_CFA_REGISTER,
5667 gen_rtx_SET (VOIDmode, i7, o7));
5669 /* Prevent this instruction from ever being considered dead,
5670 even if this function has no epilogue. */
5671 emit_use (i7);
5675 if (frame_pointer_needed)
5677 sparc_frame_base_reg = hard_frame_pointer_rtx;
5678 sparc_frame_base_offset = SPARC_STACK_BIAS;
5680 else
5682 sparc_frame_base_reg = stack_pointer_rtx;
5683 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5686 if (sparc_n_global_fp_regs > 0)
5687 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5688 sparc_frame_base_offset
5689 - sparc_apparent_frame_size,
5690 SORR_SAVE);
5692 /* Load the GOT register if needed. */
5693 if (crtl->uses_pic_offset_table)
5694 load_got_register ();
5696 /* Advertise that the data calculated just above are now valid. */
5697 sparc_prologue_data_valid_p = true;
5700 /* This function generates the assembly code for function entry, which boils
5701 down to emitting the necessary .register directives. */
5703 static void
5704 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5706 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5707 if (!TARGET_FLAT)
5708 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5710 sparc_output_scratch_registers (file);
5713 /* Expand the function epilogue, either normal or part of a sibcall.
5714 We emit all the instructions except the return or the call. */
5716 void
5717 sparc_expand_epilogue (bool for_eh)
5719 HOST_WIDE_INT size = sparc_frame_size;
5721 if (sparc_n_global_fp_regs > 0)
5722 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5723 sparc_frame_base_offset
5724 - sparc_apparent_frame_size,
5725 SORR_RESTORE);
5727 if (size == 0 || for_eh)
5728 ; /* do nothing. */
5729 else if (sparc_leaf_function_p)
5731 if (size <= 4096)
5732 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5733 else if (size <= 8192)
5735 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5736 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5738 else
5740 rtx reg = gen_rtx_REG (Pmode, 1);
5741 emit_move_insn (reg, GEN_INT (size));
5742 emit_insn (gen_stack_pointer_inc (reg));
5747 /* Expand the function epilogue, either normal or part of a sibcall.
5748 We emit all the instructions except the return or the call. */
5750 void
5751 sparc_flat_expand_epilogue (bool for_eh)
5753 HOST_WIDE_INT size = sparc_frame_size;
5755 if (sparc_n_global_fp_regs > 0)
5756 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5757 sparc_frame_base_offset
5758 - sparc_apparent_frame_size,
5759 SORR_RESTORE);
5761 /* If we have a frame pointer, we'll need both to restore it before the
5762 frame is destroyed and use its current value in destroying the frame.
5763 Since we don't have an atomic way to do that in the flat window model,
5764 we save the current value into a temporary register (%g1). */
5765 if (frame_pointer_needed && !for_eh)
5766 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5768 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5769 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5770 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5772 if (sparc_save_local_in_regs_p)
5773 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5774 sparc_frame_base_offset,
5775 SORR_RESTORE);
5777 if (size == 0 || for_eh)
5778 ; /* do nothing. */
5779 else if (frame_pointer_needed)
5781 /* Make sure the frame is destroyed after everything else is done. */
5782 emit_insn (gen_blockage ());
5784 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5786 else
5788 /* Likewise. */
5789 emit_insn (gen_blockage ());
5791 if (size <= 4096)
5792 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5793 else if (size <= 8192)
5795 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5796 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5798 else
5800 rtx reg = gen_rtx_REG (Pmode, 1);
5801 emit_move_insn (reg, GEN_INT (size));
5802 emit_insn (gen_stack_pointer_inc (reg));
5807 /* Return true if it is appropriate to emit `return' instructions in the
5808 body of a function. */
5810 bool
5811 sparc_can_use_return_insn_p (void)
5813 return sparc_prologue_data_valid_p
5814 && sparc_n_global_fp_regs == 0
5815 && TARGET_FLAT
5816 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5817 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5820 /* This function generates the assembly code for function exit. */
5822 static void
5823 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5825 /* If the last two instructions of a function are "call foo; dslot;"
5826 the return address might point to the first instruction in the next
5827 function and we have to output a dummy nop for the sake of sane
5828 backtraces in such cases. This is pointless for sibling calls since
5829 the return address is explicitly adjusted. */
5831 rtx insn, last_real_insn;
5833 insn = get_last_insn ();
5835 last_real_insn = prev_real_insn (insn);
5836 if (last_real_insn
5837 && NONJUMP_INSN_P (last_real_insn)
5838 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5839 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5841 if (last_real_insn
5842 && CALL_P (last_real_insn)
5843 && !SIBLING_CALL_P (last_real_insn))
5844 fputs("\tnop\n", file);
5846 sparc_output_deferred_case_vectors ();
5849 /* Output a 'restore' instruction. */
5851 static void
5852 output_restore (rtx pat)
5854 rtx operands[3];
5856 if (! pat)
5858 fputs ("\t restore\n", asm_out_file);
5859 return;
5862 gcc_assert (GET_CODE (pat) == SET);
5864 operands[0] = SET_DEST (pat);
5865 pat = SET_SRC (pat);
5867 switch (GET_CODE (pat))
5869 case PLUS:
5870 operands[1] = XEXP (pat, 0);
5871 operands[2] = XEXP (pat, 1);
5872 output_asm_insn (" restore %r1, %2, %Y0", operands);
5873 break;
5874 case LO_SUM:
5875 operands[1] = XEXP (pat, 0);
5876 operands[2] = XEXP (pat, 1);
5877 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5878 break;
5879 case ASHIFT:
5880 operands[1] = XEXP (pat, 0);
5881 gcc_assert (XEXP (pat, 1) == const1_rtx);
5882 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5883 break;
5884 default:
5885 operands[1] = pat;
5886 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5887 break;
5891 /* Output a return. */
5893 const char *
5894 output_return (rtx_insn *insn)
5896 if (crtl->calls_eh_return)
5898 /* If the function uses __builtin_eh_return, the eh_return
5899 machinery occupies the delay slot. */
5900 gcc_assert (!final_sequence);
5902 if (flag_delayed_branch)
5904 if (!TARGET_FLAT && TARGET_V9)
5905 fputs ("\treturn\t%i7+8\n", asm_out_file);
5906 else
5908 if (!TARGET_FLAT)
5909 fputs ("\trestore\n", asm_out_file);
5911 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5914 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5916 else
5918 if (!TARGET_FLAT)
5919 fputs ("\trestore\n", asm_out_file);
5921 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5922 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5925 else if (sparc_leaf_function_p || TARGET_FLAT)
5927 /* This is a leaf or flat function so we don't have to bother restoring
5928 the register window, which frees us from dealing with the convoluted
5929 semantics of restore/return. We simply output the jump to the
5930 return address and the insn in the delay slot (if any). */
5932 return "jmp\t%%o7+%)%#";
5934 else
5936 /* This is a regular function so we have to restore the register window.
5937 We may have a pending insn for the delay slot, which will be either
5938 combined with the 'restore' instruction or put in the delay slot of
5939 the 'return' instruction. */
5941 if (final_sequence)
5943 rtx delay, pat;
5945 delay = NEXT_INSN (insn);
5946 gcc_assert (delay);
5948 pat = PATTERN (delay);
5950 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5952 epilogue_renumber (&pat, 0);
5953 return "return\t%%i7+%)%#";
5955 else
5957 output_asm_insn ("jmp\t%%i7+%)", NULL);
5958 output_restore (pat);
5959 PATTERN (delay) = gen_blockage ();
5960 INSN_CODE (delay) = -1;
5963 else
5965 /* The delay slot is empty. */
5966 if (TARGET_V9)
5967 return "return\t%%i7+%)\n\t nop";
5968 else if (flag_delayed_branch)
5969 return "jmp\t%%i7+%)\n\t restore";
5970 else
5971 return "restore\n\tjmp\t%%o7+%)\n\t nop";
5975 return "";
5978 /* Output a sibling call. */
5980 const char *
5981 output_sibcall (rtx_insn *insn, rtx call_operand)
5983 rtx operands[1];
5985 gcc_assert (flag_delayed_branch);
5987 operands[0] = call_operand;
5989 if (sparc_leaf_function_p || TARGET_FLAT)
5991 /* This is a leaf or flat function so we don't have to bother restoring
5992 the register window. We simply output the jump to the function and
5993 the insn in the delay slot (if any). */
5995 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
5997 if (final_sequence)
5998 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
5999 operands);
6000 else
6001 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6002 it into branch if possible. */
6003 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6004 operands);
6006 else
6008 /* This is a regular function so we have to restore the register window.
6009 We may have a pending insn for the delay slot, which will be combined
6010 with the 'restore' instruction. */
6012 output_asm_insn ("call\t%a0, 0", operands);
6014 if (final_sequence)
6016 rtx_insn *delay = NEXT_INSN (insn);
6017 gcc_assert (delay);
6019 output_restore (PATTERN (delay));
6021 PATTERN (delay) = gen_blockage ();
6022 INSN_CODE (delay) = -1;
6024 else
6025 output_restore (NULL_RTX);
6028 return "";
6031 /* Functions for handling argument passing.
6033 For 32-bit, the first 6 args are normally in registers and the rest are
6034 pushed. Any arg that starts within the first 6 words is at least
6035 partially passed in a register unless its data type forbids.
6037 For 64-bit, the argument registers are laid out as an array of 16 elements
6038 and arguments are added sequentially. The first 6 int args and up to the
6039 first 16 fp args (depending on size) are passed in regs.
6041 Slot Stack Integral Float Float in structure Double Long Double
6042 ---- ----- -------- ----- ------------------ ------ -----------
6043 15 [SP+248] %f31 %f30,%f31 %d30
6044 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6045 13 [SP+232] %f27 %f26,%f27 %d26
6046 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6047 11 [SP+216] %f23 %f22,%f23 %d22
6048 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6049 9 [SP+200] %f19 %f18,%f19 %d18
6050 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6051 7 [SP+184] %f15 %f14,%f15 %d14
6052 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6053 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6054 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6055 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6056 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6057 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6058 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6060 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6062 Integral arguments are always passed as 64-bit quantities appropriately
6063 extended.
6065 Passing of floating point values is handled as follows.
6066 If a prototype is in scope:
6067 If the value is in a named argument (i.e. not a stdarg function or a
6068 value not part of the `...') then the value is passed in the appropriate
6069 fp reg.
6070 If the value is part of the `...' and is passed in one of the first 6
6071 slots then the value is passed in the appropriate int reg.
6072 If the value is part of the `...' and is not passed in one of the first 6
6073 slots then the value is passed in memory.
6074 If a prototype is not in scope:
6075 If the value is one of the first 6 arguments the value is passed in the
6076 appropriate integer reg and the appropriate fp reg.
6077 If the value is not one of the first 6 arguments the value is passed in
6078 the appropriate fp reg and in memory.
6081 Summary of the calling conventions implemented by GCC on the SPARC:
6083 32-bit ABI:
6084 size argument return value
6086 small integer <4 int. reg. int. reg.
6087 word 4 int. reg. int. reg.
6088 double word 8 int. reg. int. reg.
6090 _Complex small integer <8 int. reg. int. reg.
6091 _Complex word 8 int. reg. int. reg.
6092 _Complex double word 16 memory int. reg.
6094 vector integer <=8 int. reg. FP reg.
6095 vector integer >8 memory memory
6097 float 4 int. reg. FP reg.
6098 double 8 int. reg. FP reg.
6099 long double 16 memory memory
6101 _Complex float 8 memory FP reg.
6102 _Complex double 16 memory FP reg.
6103 _Complex long double 32 memory FP reg.
6105 vector float any memory memory
6107 aggregate any memory memory
6111 64-bit ABI:
6112 size argument return value
6114 small integer <8 int. reg. int. reg.
6115 word 8 int. reg. int. reg.
6116 double word 16 int. reg. int. reg.
6118 _Complex small integer <16 int. reg. int. reg.
6119 _Complex word 16 int. reg. int. reg.
6120 _Complex double word 32 memory int. reg.
6122 vector integer <=16 FP reg. FP reg.
6123 vector integer 16<s<=32 memory FP reg.
6124 vector integer >32 memory memory
6126 float 4 FP reg. FP reg.
6127 double 8 FP reg. FP reg.
6128 long double 16 FP reg. FP reg.
6130 _Complex float 8 FP reg. FP reg.
6131 _Complex double 16 FP reg. FP reg.
6132 _Complex long double 32 memory FP reg.
6134 vector float <=16 FP reg. FP reg.
6135 vector float 16<s<=32 memory FP reg.
6136 vector float >32 memory memory
6138 aggregate <=16 reg. reg.
6139 aggregate 16<s<=32 memory reg.
6140 aggregate >32 memory memory
6144 Note #1: complex floating-point types follow the extended SPARC ABIs as
6145 implemented by the Sun compiler.
6147 Note #2: integral vector types follow the scalar floating-point types
6148 conventions to match what is implemented by the Sun VIS SDK.
6150 Note #3: floating-point vector types follow the aggregate types
6151 conventions. */
6154 /* Maximum number of int regs for args. */
6155 #define SPARC_INT_ARG_MAX 6
6156 /* Maximum number of fp regs for args. */
6157 #define SPARC_FP_ARG_MAX 16
6159 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
6161 /* Handle the INIT_CUMULATIVE_ARGS macro.
6162 Initialize a variable CUM of type CUMULATIVE_ARGS
6163 for a call to a function whose data type is FNTYPE.
6164 For a library call, FNTYPE is 0. */
6166 void
6167 init_cumulative_args (struct sparc_args *cum, tree fntype,
6168 rtx libname ATTRIBUTE_UNUSED,
6169 tree fndecl ATTRIBUTE_UNUSED)
6171 cum->words = 0;
6172 cum->prototype_p = fntype && prototype_p (fntype);
6173 cum->libcall_p = fntype == 0;
6176 /* Handle promotion of pointer and integer arguments. */
6178 static enum machine_mode
6179 sparc_promote_function_mode (const_tree type,
6180 enum machine_mode mode,
6181 int *punsignedp,
6182 const_tree fntype ATTRIBUTE_UNUSED,
6183 int for_return ATTRIBUTE_UNUSED)
6185 if (type != NULL_TREE && POINTER_TYPE_P (type))
6187 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6188 return Pmode;
6191 /* Integral arguments are passed as full words, as per the ABI. */
6192 if (GET_MODE_CLASS (mode) == MODE_INT
6193 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6194 return word_mode;
6196 return mode;
6199 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6201 static bool
6202 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6204 return TARGET_ARCH64 ? true : false;
6207 /* Scan the record type TYPE and return the following predicates:
6208 - INTREGS_P: the record contains at least one field or sub-field
6209 that is eligible for promotion in integer registers.
6210 - FP_REGS_P: the record contains at least one field or sub-field
6211 that is eligible for promotion in floating-point registers.
6212 - PACKED_P: the record contains at least one field that is packed.
6214 Sub-fields are not taken into account for the PACKED_P predicate. */
6216 static void
6217 scan_record_type (const_tree type, int *intregs_p, int *fpregs_p,
6218 int *packed_p)
6220 tree field;
6222 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6224 if (TREE_CODE (field) == FIELD_DECL)
6226 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6227 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
6228 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6229 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6230 && TARGET_FPU)
6231 *fpregs_p = 1;
6232 else
6233 *intregs_p = 1;
6235 if (packed_p && DECL_PACKED (field))
6236 *packed_p = 1;
6241 /* Compute the slot number to pass an argument in.
6242 Return the slot number or -1 if passing on the stack.
6244 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6245 the preceding args and about the function being called.
6246 MODE is the argument's machine mode.
6247 TYPE is the data type of the argument (as a tree).
6248 This is null for libcalls where that information may
6249 not be available.
6250 NAMED is nonzero if this argument is a named parameter
6251 (otherwise it is an extra parameter matching an ellipsis).
6252 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6253 *PREGNO records the register number to use if scalar type.
6254 *PPADDING records the amount of padding needed in words. */
6256 static int
6257 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
6258 const_tree type, bool named, bool incoming_p,
6259 int *pregno, int *ppadding)
6261 int regbase = (incoming_p
6262 ? SPARC_INCOMING_INT_ARG_FIRST
6263 : SPARC_OUTGOING_INT_ARG_FIRST);
6264 int slotno = cum->words;
6265 enum mode_class mclass;
6266 int regno;
6268 *ppadding = 0;
6270 if (type && TREE_ADDRESSABLE (type))
6271 return -1;
6273 if (TARGET_ARCH32
6274 && mode == BLKmode
6275 && type
6276 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6277 return -1;
6279 /* For SPARC64, objects requiring 16-byte alignment get it. */
6280 if (TARGET_ARCH64
6281 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6282 && (slotno & 1) != 0)
6283 slotno++, *ppadding = 1;
6285 mclass = GET_MODE_CLASS (mode);
6286 if (type && TREE_CODE (type) == VECTOR_TYPE)
6288 /* Vector types deserve special treatment because they are
6289 polymorphic wrt their mode, depending upon whether VIS
6290 instructions are enabled. */
6291 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6293 /* The SPARC port defines no floating-point vector modes. */
6294 gcc_assert (mode == BLKmode);
6296 else
6298 /* Integral vector types should either have a vector
6299 mode or an integral mode, because we are guaranteed
6300 by pass_by_reference that their size is not greater
6301 than 16 bytes and TImode is 16-byte wide. */
6302 gcc_assert (mode != BLKmode);
6304 /* Vector integers are handled like floats according to
6305 the Sun VIS SDK. */
6306 mclass = MODE_FLOAT;
6310 switch (mclass)
6312 case MODE_FLOAT:
6313 case MODE_COMPLEX_FLOAT:
6314 case MODE_VECTOR_INT:
6315 if (TARGET_ARCH64 && TARGET_FPU && named)
6317 if (slotno >= SPARC_FP_ARG_MAX)
6318 return -1;
6319 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6320 /* Arguments filling only one single FP register are
6321 right-justified in the outer double FP register. */
6322 if (GET_MODE_SIZE (mode) <= 4)
6323 regno++;
6324 break;
6326 /* fallthrough */
6328 case MODE_INT:
6329 case MODE_COMPLEX_INT:
6330 if (slotno >= SPARC_INT_ARG_MAX)
6331 return -1;
6332 regno = regbase + slotno;
6333 break;
6335 case MODE_RANDOM:
6336 if (mode == VOIDmode)
6337 /* MODE is VOIDmode when generating the actual call. */
6338 return -1;
6340 gcc_assert (mode == BLKmode);
6342 if (TARGET_ARCH32
6343 || !type
6344 || (TREE_CODE (type) != VECTOR_TYPE
6345 && TREE_CODE (type) != RECORD_TYPE))
6347 if (slotno >= SPARC_INT_ARG_MAX)
6348 return -1;
6349 regno = regbase + slotno;
6351 else /* TARGET_ARCH64 && type */
6353 int intregs_p = 0, fpregs_p = 0, packed_p = 0;
6355 /* First see what kinds of registers we would need. */
6356 if (TREE_CODE (type) == VECTOR_TYPE)
6357 fpregs_p = 1;
6358 else
6359 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
6361 /* The ABI obviously doesn't specify how packed structures
6362 are passed. These are defined to be passed in int regs
6363 if possible, otherwise memory. */
6364 if (packed_p || !named)
6365 fpregs_p = 0, intregs_p = 1;
6367 /* If all arg slots are filled, then must pass on stack. */
6368 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
6369 return -1;
6371 /* If there are only int args and all int arg slots are filled,
6372 then must pass on stack. */
6373 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
6374 return -1;
6376 /* Note that even if all int arg slots are filled, fp members may
6377 still be passed in regs if such regs are available.
6378 *PREGNO isn't set because there may be more than one, it's up
6379 to the caller to compute them. */
6380 return slotno;
6382 break;
6384 default :
6385 gcc_unreachable ();
6388 *pregno = regno;
6389 return slotno;
6392 /* Handle recursive register counting for structure field layout. */
6394 struct function_arg_record_value_parms
6396 rtx ret; /* return expression being built. */
6397 int slotno; /* slot number of the argument. */
6398 int named; /* whether the argument is named. */
6399 int regbase; /* regno of the base register. */
6400 int stack; /* 1 if part of the argument is on the stack. */
6401 int intoffset; /* offset of the first pending integer field. */
6402 unsigned int nregs; /* number of words passed in registers. */
6405 static void function_arg_record_value_3
6406 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
6407 static void function_arg_record_value_2
6408 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6409 static void function_arg_record_value_1
6410 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6411 static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
6412 static rtx function_arg_union_value (int, enum machine_mode, int, int);
6414 /* A subroutine of function_arg_record_value. Traverse the structure
6415 recursively and determine how many registers will be required. */
6417 static void
6418 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
6419 struct function_arg_record_value_parms *parms,
6420 bool packed_p)
6422 tree field;
6424 /* We need to compute how many registers are needed so we can
6425 allocate the PARALLEL but before we can do that we need to know
6426 whether there are any packed fields. The ABI obviously doesn't
6427 specify how structures are passed in this case, so they are
6428 defined to be passed in int regs if possible, otherwise memory,
6429 regardless of whether there are fp values present. */
6431 if (! packed_p)
6432 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6434 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6436 packed_p = true;
6437 break;
6441 /* Compute how many registers we need. */
6442 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6444 if (TREE_CODE (field) == FIELD_DECL)
6446 HOST_WIDE_INT bitpos = startbitpos;
6448 if (DECL_SIZE (field) != 0)
6450 if (integer_zerop (DECL_SIZE (field)))
6451 continue;
6453 if (tree_fits_uhwi_p (bit_position (field)))
6454 bitpos += int_bit_position (field);
6457 /* ??? FIXME: else assume zero offset. */
6459 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6460 function_arg_record_value_1 (TREE_TYPE (field),
6461 bitpos,
6462 parms,
6463 packed_p);
6464 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6465 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6466 && TARGET_FPU
6467 && parms->named
6468 && ! packed_p)
6470 if (parms->intoffset != -1)
6472 unsigned int startbit, endbit;
6473 int intslots, this_slotno;
6475 startbit = parms->intoffset & -BITS_PER_WORD;
6476 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6478 intslots = (endbit - startbit) / BITS_PER_WORD;
6479 this_slotno = parms->slotno + parms->intoffset
6480 / BITS_PER_WORD;
6482 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6484 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6485 /* We need to pass this field on the stack. */
6486 parms->stack = 1;
6489 parms->nregs += intslots;
6490 parms->intoffset = -1;
6493 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
6494 If it wasn't true we wouldn't be here. */
6495 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6496 && DECL_MODE (field) == BLKmode)
6497 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6498 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6499 parms->nregs += 2;
6500 else
6501 parms->nregs += 1;
6503 else
6505 if (parms->intoffset == -1)
6506 parms->intoffset = bitpos;
6512 /* A subroutine of function_arg_record_value. Assign the bits of the
6513 structure between parms->intoffset and bitpos to integer registers. */
6515 static void
6516 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
6517 struct function_arg_record_value_parms *parms)
6519 enum machine_mode mode;
6520 unsigned int regno;
6521 unsigned int startbit, endbit;
6522 int this_slotno, intslots, intoffset;
6523 rtx reg;
6525 if (parms->intoffset == -1)
6526 return;
6528 intoffset = parms->intoffset;
6529 parms->intoffset = -1;
6531 startbit = intoffset & -BITS_PER_WORD;
6532 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6533 intslots = (endbit - startbit) / BITS_PER_WORD;
6534 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
6536 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
6537 if (intslots <= 0)
6538 return;
6540 /* If this is the trailing part of a word, only load that much into
6541 the register. Otherwise load the whole register. Note that in
6542 the latter case we may pick up unwanted bits. It's not a problem
6543 at the moment but may wish to revisit. */
6545 if (intoffset % BITS_PER_WORD != 0)
6546 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6547 MODE_INT);
6548 else
6549 mode = word_mode;
6551 intoffset /= BITS_PER_UNIT;
6554 regno = parms->regbase + this_slotno;
6555 reg = gen_rtx_REG (mode, regno);
6556 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6557 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6559 this_slotno += 1;
6560 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
6561 mode = word_mode;
6562 parms->nregs += 1;
6563 intslots -= 1;
6565 while (intslots > 0);
6568 /* A subroutine of function_arg_record_value. Traverse the structure
6569 recursively and assign bits to floating point registers. Track which
6570 bits in between need integer registers; invoke function_arg_record_value_3
6571 to make that happen. */
6573 static void
6574 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
6575 struct function_arg_record_value_parms *parms,
6576 bool packed_p)
6578 tree field;
6580 if (! packed_p)
6581 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6583 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6585 packed_p = true;
6586 break;
6590 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6592 if (TREE_CODE (field) == FIELD_DECL)
6594 HOST_WIDE_INT bitpos = startbitpos;
6596 if (DECL_SIZE (field) != 0)
6598 if (integer_zerop (DECL_SIZE (field)))
6599 continue;
6601 if (tree_fits_uhwi_p (bit_position (field)))
6602 bitpos += int_bit_position (field);
6605 /* ??? FIXME: else assume zero offset. */
6607 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6608 function_arg_record_value_2 (TREE_TYPE (field),
6609 bitpos,
6610 parms,
6611 packed_p);
6612 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6613 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6614 && TARGET_FPU
6615 && parms->named
6616 && ! packed_p)
6618 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
6619 int regno, nregs, pos;
6620 enum machine_mode mode = DECL_MODE (field);
6621 rtx reg;
6623 function_arg_record_value_3 (bitpos, parms);
6625 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6626 && mode == BLKmode)
6628 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6629 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6631 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6633 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6634 nregs = 2;
6636 else
6637 nregs = 1;
6639 regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6640 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6641 regno++;
6642 reg = gen_rtx_REG (mode, regno);
6643 pos = bitpos / BITS_PER_UNIT;
6644 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6645 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6646 parms->nregs += 1;
6647 while (--nregs > 0)
6649 regno += GET_MODE_SIZE (mode) / 4;
6650 reg = gen_rtx_REG (mode, regno);
6651 pos += GET_MODE_SIZE (mode);
6652 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6653 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6654 parms->nregs += 1;
6657 else
6659 if (parms->intoffset == -1)
6660 parms->intoffset = bitpos;
6666 /* Used by function_arg and sparc_function_value_1 to implement the complex
6667 conventions of the 64-bit ABI for passing and returning structures.
6668 Return an expression valid as a return value for the FUNCTION_ARG
6669 and TARGET_FUNCTION_VALUE.
6671 TYPE is the data type of the argument (as a tree).
6672 This is null for libcalls where that information may
6673 not be available.
6674 MODE is the argument's machine mode.
6675 SLOTNO is the index number of the argument's slot in the parameter array.
6676 NAMED is nonzero if this argument is a named parameter
6677 (otherwise it is an extra parameter matching an ellipsis).
6678 REGBASE is the regno of the base register for the parameter array. */
6680 static rtx
6681 function_arg_record_value (const_tree type, enum machine_mode mode,
6682 int slotno, int named, int regbase)
6684 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6685 struct function_arg_record_value_parms parms;
6686 unsigned int nregs;
6688 parms.ret = NULL_RTX;
6689 parms.slotno = slotno;
6690 parms.named = named;
6691 parms.regbase = regbase;
6692 parms.stack = 0;
6694 /* Compute how many registers we need. */
6695 parms.nregs = 0;
6696 parms.intoffset = 0;
6697 function_arg_record_value_1 (type, 0, &parms, false);
6699 /* Take into account pending integer fields. */
6700 if (parms.intoffset != -1)
6702 unsigned int startbit, endbit;
6703 int intslots, this_slotno;
6705 startbit = parms.intoffset & -BITS_PER_WORD;
6706 endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6707 intslots = (endbit - startbit) / BITS_PER_WORD;
6708 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
6710 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6712 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6713 /* We need to pass this field on the stack. */
6714 parms.stack = 1;
6717 parms.nregs += intslots;
6719 nregs = parms.nregs;
6721 /* Allocate the vector and handle some annoying special cases. */
6722 if (nregs == 0)
6724 /* ??? Empty structure has no value? Duh? */
6725 if (typesize <= 0)
6727 /* Though there's nothing really to store, return a word register
6728 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6729 leads to breakage due to the fact that there are zero bytes to
6730 load. */
6731 return gen_rtx_REG (mode, regbase);
6733 else
6735 /* ??? C++ has structures with no fields, and yet a size. Give up
6736 for now and pass everything back in integer registers. */
6737 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6739 if (nregs + slotno > SPARC_INT_ARG_MAX)
6740 nregs = SPARC_INT_ARG_MAX - slotno;
6742 gcc_assert (nregs != 0);
6744 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
6746 /* If at least one field must be passed on the stack, generate
6747 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6748 also be passed on the stack. We can't do much better because the
6749 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6750 of structures for which the fields passed exclusively in registers
6751 are not at the beginning of the structure. */
6752 if (parms.stack)
6753 XVECEXP (parms.ret, 0, 0)
6754 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6756 /* Fill in the entries. */
6757 parms.nregs = 0;
6758 parms.intoffset = 0;
6759 function_arg_record_value_2 (type, 0, &parms, false);
6760 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
6762 gcc_assert (parms.nregs == nregs);
6764 return parms.ret;
6767 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6768 of the 64-bit ABI for passing and returning unions.
6769 Return an expression valid as a return value for the FUNCTION_ARG
6770 and TARGET_FUNCTION_VALUE.
6772 SIZE is the size in bytes of the union.
6773 MODE is the argument's machine mode.
6774 REGNO is the hard register the union will be passed in. */
6776 static rtx
6777 function_arg_union_value (int size, enum machine_mode mode, int slotno,
6778 int regno)
6780 int nwords = ROUND_ADVANCE (size), i;
6781 rtx regs;
6783 /* See comment in previous function for empty structures. */
6784 if (nwords == 0)
6785 return gen_rtx_REG (mode, regno);
6787 if (slotno == SPARC_INT_ARG_MAX - 1)
6788 nwords = 1;
6790 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6792 for (i = 0; i < nwords; i++)
6794 /* Unions are passed left-justified. */
6795 XVECEXP (regs, 0, i)
6796 = gen_rtx_EXPR_LIST (VOIDmode,
6797 gen_rtx_REG (word_mode, regno),
6798 GEN_INT (UNITS_PER_WORD * i));
6799 regno++;
6802 return regs;
6805 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6806 for passing and returning large (BLKmode) vectors.
6807 Return an expression valid as a return value for the FUNCTION_ARG
6808 and TARGET_FUNCTION_VALUE.
6810 SIZE is the size in bytes of the vector (at least 8 bytes).
6811 REGNO is the FP hard register the vector will be passed in. */
6813 static rtx
6814 function_arg_vector_value (int size, int regno)
6816 int i, nregs = size / 8;
6817 rtx regs;
6819 regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6821 for (i = 0; i < nregs; i++)
6823 XVECEXP (regs, 0, i)
6824 = gen_rtx_EXPR_LIST (VOIDmode,
6825 gen_rtx_REG (DImode, regno + 2*i),
6826 GEN_INT (i*8));
6829 return regs;
6832 /* Determine where to put an argument to a function.
6833 Value is zero to push the argument on the stack,
6834 or a hard register in which to store the argument.
6836 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6837 the preceding args and about the function being called.
6838 MODE is the argument's machine mode.
6839 TYPE is the data type of the argument (as a tree).
6840 This is null for libcalls where that information may
6841 not be available.
6842 NAMED is true if this argument is a named parameter
6843 (otherwise it is an extra parameter matching an ellipsis).
6844 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6845 TARGET_FUNCTION_INCOMING_ARG. */
6847 static rtx
6848 sparc_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
6849 const_tree type, bool named, bool incoming_p)
6851 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6853 int regbase = (incoming_p
6854 ? SPARC_INCOMING_INT_ARG_FIRST
6855 : SPARC_OUTGOING_INT_ARG_FIRST);
6856 int slotno, regno, padding;
6857 enum mode_class mclass = GET_MODE_CLASS (mode);
6859 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
6860 &regno, &padding);
6861 if (slotno == -1)
6862 return 0;
6864 /* Vector types deserve special treatment because they are polymorphic wrt
6865 their mode, depending upon whether VIS instructions are enabled. */
6866 if (type && TREE_CODE (type) == VECTOR_TYPE)
6868 HOST_WIDE_INT size = int_size_in_bytes (type);
6869 gcc_assert ((TARGET_ARCH32 && size <= 8)
6870 || (TARGET_ARCH64 && size <= 16));
6872 if (mode == BLKmode)
6873 return function_arg_vector_value (size,
6874 SPARC_FP_ARG_FIRST + 2*slotno);
6875 else
6876 mclass = MODE_FLOAT;
6879 if (TARGET_ARCH32)
6880 return gen_rtx_REG (mode, regno);
6882 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6883 and are promoted to registers if possible. */
6884 if (type && TREE_CODE (type) == RECORD_TYPE)
6886 HOST_WIDE_INT size = int_size_in_bytes (type);
6887 gcc_assert (size <= 16);
6889 return function_arg_record_value (type, mode, slotno, named, regbase);
6892 /* Unions up to 16 bytes in size are passed in integer registers. */
6893 else if (type && TREE_CODE (type) == UNION_TYPE)
6895 HOST_WIDE_INT size = int_size_in_bytes (type);
6896 gcc_assert (size <= 16);
6898 return function_arg_union_value (size, mode, slotno, regno);
6901 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6902 but also have the slot allocated for them.
6903 If no prototype is in scope fp values in register slots get passed
6904 in two places, either fp regs and int regs or fp regs and memory. */
6905 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6906 && SPARC_FP_REG_P (regno))
6908 rtx reg = gen_rtx_REG (mode, regno);
6909 if (cum->prototype_p || cum->libcall_p)
6911 /* "* 2" because fp reg numbers are recorded in 4 byte
6912 quantities. */
6913 #if 0
6914 /* ??? This will cause the value to be passed in the fp reg and
6915 in the stack. When a prototype exists we want to pass the
6916 value in the reg but reserve space on the stack. That's an
6917 optimization, and is deferred [for a bit]. */
6918 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
6919 return gen_rtx_PARALLEL (mode,
6920 gen_rtvec (2,
6921 gen_rtx_EXPR_LIST (VOIDmode,
6922 NULL_RTX, const0_rtx),
6923 gen_rtx_EXPR_LIST (VOIDmode,
6924 reg, const0_rtx)));
6925 else
6926 #else
6927 /* ??? It seems that passing back a register even when past
6928 the area declared by REG_PARM_STACK_SPACE will allocate
6929 space appropriately, and will not copy the data onto the
6930 stack, exactly as we desire.
6932 This is due to locate_and_pad_parm being called in
6933 expand_call whenever reg_parm_stack_space > 0, which
6934 while beneficial to our example here, would seem to be
6935 in error from what had been intended. Ho hum... -- r~ */
6936 #endif
6937 return reg;
6939 else
6941 rtx v0, v1;
6943 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6945 int intreg;
6947 /* On incoming, we don't need to know that the value
6948 is passed in %f0 and %i0, and it confuses other parts
6949 causing needless spillage even on the simplest cases. */
6950 if (incoming_p)
6951 return reg;
6953 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6954 + (regno - SPARC_FP_ARG_FIRST) / 2);
6956 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6957 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6958 const0_rtx);
6959 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6961 else
6963 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6964 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6965 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6970 /* All other aggregate types are passed in an integer register in a mode
6971 corresponding to the size of the type. */
6972 else if (type && AGGREGATE_TYPE_P (type))
6974 HOST_WIDE_INT size = int_size_in_bytes (type);
6975 gcc_assert (size <= 16);
6977 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6980 return gen_rtx_REG (mode, regno);
6983 /* Handle the TARGET_FUNCTION_ARG target hook. */
6985 static rtx
6986 sparc_function_arg (cumulative_args_t cum, enum machine_mode mode,
6987 const_tree type, bool named)
6989 return sparc_function_arg_1 (cum, mode, type, named, false);
6992 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6994 static rtx
6995 sparc_function_incoming_arg (cumulative_args_t cum, enum machine_mode mode,
6996 const_tree type, bool named)
6998 return sparc_function_arg_1 (cum, mode, type, named, true);
7001 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7003 static unsigned int
7004 sparc_function_arg_boundary (enum machine_mode mode, const_tree type)
7006 return ((TARGET_ARCH64
7007 && (GET_MODE_ALIGNMENT (mode) == 128
7008 || (type && TYPE_ALIGN (type) == 128)))
7009 ? 128
7010 : PARM_BOUNDARY);
7013 /* For an arg passed partly in registers and partly in memory,
7014 this is the number of bytes of registers used.
7015 For args passed entirely in registers or entirely in memory, zero.
7017 Any arg that starts in the first 6 regs but won't entirely fit in them
7018 needs partial registers on v8. On v9, structures with integer
7019 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7020 values that begin in the last fp reg [where "last fp reg" varies with the
7021 mode] will be split between that reg and memory. */
7023 static int
7024 sparc_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode,
7025 tree type, bool named)
7027 int slotno, regno, padding;
7029 /* We pass false for incoming_p here, it doesn't matter. */
7030 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7031 false, &regno, &padding);
7033 if (slotno == -1)
7034 return 0;
7036 if (TARGET_ARCH32)
7038 if ((slotno + (mode == BLKmode
7039 ? ROUND_ADVANCE (int_size_in_bytes (type))
7040 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
7041 > SPARC_INT_ARG_MAX)
7042 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7044 else
7046 /* We are guaranteed by pass_by_reference that the size of the
7047 argument is not greater than 16 bytes, so we only need to return
7048 one word if the argument is partially passed in registers. */
7050 if (type && AGGREGATE_TYPE_P (type))
7052 int size = int_size_in_bytes (type);
7054 if (size > UNITS_PER_WORD
7055 && slotno == SPARC_INT_ARG_MAX - 1)
7056 return UNITS_PER_WORD;
7058 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7059 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7060 && ! (TARGET_FPU && named)))
7062 /* The complex types are passed as packed types. */
7063 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7064 && slotno == SPARC_INT_ARG_MAX - 1)
7065 return UNITS_PER_WORD;
7067 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7069 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7070 > SPARC_FP_ARG_MAX)
7071 return UNITS_PER_WORD;
7075 return 0;
7078 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7079 Specify whether to pass the argument by reference. */
7081 static bool
7082 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7083 enum machine_mode mode, const_tree type,
7084 bool named ATTRIBUTE_UNUSED)
7086 if (TARGET_ARCH32)
7087 /* Original SPARC 32-bit ABI says that structures and unions,
7088 and quad-precision floats are passed by reference. For Pascal,
7089 also pass arrays by reference. All other base types are passed
7090 in registers.
7092 Extended ABI (as implemented by the Sun compiler) says that all
7093 complex floats are passed by reference. Pass complex integers
7094 in registers up to 8 bytes. More generally, enforce the 2-word
7095 cap for passing arguments in registers.
7097 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7098 integers are passed like floats of the same size, that is in
7099 registers up to 8 bytes. Pass all vector floats by reference
7100 like structure and unions. */
7101 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7102 || mode == SCmode
7103 /* Catch CDImode, TFmode, DCmode and TCmode. */
7104 || GET_MODE_SIZE (mode) > 8
7105 || (type
7106 && TREE_CODE (type) == VECTOR_TYPE
7107 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7108 else
7109 /* Original SPARC 64-bit ABI says that structures and unions
7110 smaller than 16 bytes are passed in registers, as well as
7111 all other base types.
7113 Extended ABI (as implemented by the Sun compiler) says that
7114 complex floats are passed in registers up to 16 bytes. Pass
7115 all complex integers in registers up to 16 bytes. More generally,
7116 enforce the 2-word cap for passing arguments in registers.
7118 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7119 integers are passed like floats of the same size, that is in
7120 registers (up to 16 bytes). Pass all vector floats like structure
7121 and unions. */
7122 return ((type
7123 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7124 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7125 /* Catch CTImode and TCmode. */
7126 || GET_MODE_SIZE (mode) > 16);
7129 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7130 Update the data in CUM to advance over an argument
7131 of mode MODE and data type TYPE.
7132 TYPE is null for libcalls where that information may not be available. */
7134 static void
7135 sparc_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7136 const_tree type, bool named)
7138 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7139 int regno, padding;
7141 /* We pass false for incoming_p here, it doesn't matter. */
7142 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7144 /* If argument requires leading padding, add it. */
7145 cum->words += padding;
7147 if (TARGET_ARCH32)
7149 cum->words += (mode != BLKmode
7150 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7151 : ROUND_ADVANCE (int_size_in_bytes (type)));
7153 else
7155 if (type && AGGREGATE_TYPE_P (type))
7157 int size = int_size_in_bytes (type);
7159 if (size <= 8)
7160 ++cum->words;
7161 else if (size <= 16)
7162 cum->words += 2;
7163 else /* passed by reference */
7164 ++cum->words;
7166 else
7168 cum->words += (mode != BLKmode
7169 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7170 : ROUND_ADVANCE (int_size_in_bytes (type)));
7175 /* Handle the FUNCTION_ARG_PADDING macro.
7176 For the 64 bit ABI structs are always stored left shifted in their
7177 argument slot. */
7179 enum direction
7180 function_arg_padding (enum machine_mode mode, const_tree type)
7182 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
7183 return upward;
7185 /* Fall back to the default. */
7186 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7189 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7190 Specify whether to return the return value in memory. */
7192 static bool
7193 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7195 if (TARGET_ARCH32)
7196 /* Original SPARC 32-bit ABI says that structures and unions,
7197 and quad-precision floats are returned in memory. All other
7198 base types are returned in registers.
7200 Extended ABI (as implemented by the Sun compiler) says that
7201 all complex floats are returned in registers (8 FP registers
7202 at most for '_Complex long double'). Return all complex integers
7203 in registers (4 at most for '_Complex long long').
7205 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7206 integers are returned like floats of the same size, that is in
7207 registers up to 8 bytes and in memory otherwise. Return all
7208 vector floats in memory like structure and unions; note that
7209 they always have BLKmode like the latter. */
7210 return (TYPE_MODE (type) == BLKmode
7211 || TYPE_MODE (type) == TFmode
7212 || (TREE_CODE (type) == VECTOR_TYPE
7213 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7214 else
7215 /* Original SPARC 64-bit ABI says that structures and unions
7216 smaller than 32 bytes are returned in registers, as well as
7217 all other base types.
7219 Extended ABI (as implemented by the Sun compiler) says that all
7220 complex floats are returned in registers (8 FP registers at most
7221 for '_Complex long double'). Return all complex integers in
7222 registers (4 at most for '_Complex TItype').
7224 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7225 integers are returned like floats of the same size, that is in
7226 registers. Return all vector floats like structure and unions;
7227 note that they always have BLKmode like the latter. */
7228 return (TYPE_MODE (type) == BLKmode
7229 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7232 /* Handle the TARGET_STRUCT_VALUE target hook.
7233 Return where to find the structure return value address. */
7235 static rtx
7236 sparc_struct_value_rtx (tree fndecl, int incoming)
7238 if (TARGET_ARCH64)
7239 return 0;
7240 else
7242 rtx mem;
7244 if (incoming)
7245 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7246 STRUCT_VALUE_OFFSET));
7247 else
7248 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7249 STRUCT_VALUE_OFFSET));
7251 /* Only follow the SPARC ABI for fixed-size structure returns.
7252 Variable size structure returns are handled per the normal
7253 procedures in GCC. This is enabled by -mstd-struct-return */
7254 if (incoming == 2
7255 && sparc_std_struct_return
7256 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7257 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7259 /* We must check and adjust the return address, as it is
7260 optional as to whether the return object is really
7261 provided. */
7262 rtx ret_reg = gen_rtx_REG (Pmode, 31);
7263 rtx scratch = gen_reg_rtx (SImode);
7264 rtx_code_label *endlab = gen_label_rtx ();
7266 /* Calculate the return object size */
7267 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7268 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7269 /* Construct a temporary return value */
7270 rtx temp_val
7271 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7273 /* Implement SPARC 32-bit psABI callee return struct checking:
7275 Fetch the instruction where we will return to and see if
7276 it's an unimp instruction (the most significant 10 bits
7277 will be zero). */
7278 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7279 plus_constant (Pmode,
7280 ret_reg, 8)));
7281 /* Assume the size is valid and pre-adjust */
7282 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7283 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7284 0, endlab);
7285 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7286 /* Write the address of the memory pointed to by temp_val into
7287 the memory pointed to by mem */
7288 emit_move_insn (mem, XEXP (temp_val, 0));
7289 emit_label (endlab);
7292 return mem;
7296 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7297 For v9, function return values are subject to the same rules as arguments,
7298 except that up to 32 bytes may be returned in registers. */
7300 static rtx
7301 sparc_function_value_1 (const_tree type, enum machine_mode mode,
7302 bool outgoing)
7304 /* Beware that the two values are swapped here wrt function_arg. */
7305 int regbase = (outgoing
7306 ? SPARC_INCOMING_INT_ARG_FIRST
7307 : SPARC_OUTGOING_INT_ARG_FIRST);
7308 enum mode_class mclass = GET_MODE_CLASS (mode);
7309 int regno;
7311 /* Vector types deserve special treatment because they are polymorphic wrt
7312 their mode, depending upon whether VIS instructions are enabled. */
7313 if (type && TREE_CODE (type) == VECTOR_TYPE)
7315 HOST_WIDE_INT size = int_size_in_bytes (type);
7316 gcc_assert ((TARGET_ARCH32 && size <= 8)
7317 || (TARGET_ARCH64 && size <= 32));
7319 if (mode == BLKmode)
7320 return function_arg_vector_value (size,
7321 SPARC_FP_ARG_FIRST);
7322 else
7323 mclass = MODE_FLOAT;
7326 if (TARGET_ARCH64 && type)
7328 /* Structures up to 32 bytes in size are returned in registers. */
7329 if (TREE_CODE (type) == RECORD_TYPE)
7331 HOST_WIDE_INT size = int_size_in_bytes (type);
7332 gcc_assert (size <= 32);
7334 return function_arg_record_value (type, mode, 0, 1, regbase);
7337 /* Unions up to 32 bytes in size are returned in integer registers. */
7338 else if (TREE_CODE (type) == UNION_TYPE)
7340 HOST_WIDE_INT size = int_size_in_bytes (type);
7341 gcc_assert (size <= 32);
7343 return function_arg_union_value (size, mode, 0, regbase);
7346 /* Objects that require it are returned in FP registers. */
7347 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7350 /* All other aggregate types are returned in an integer register in a
7351 mode corresponding to the size of the type. */
7352 else if (AGGREGATE_TYPE_P (type))
7354 /* All other aggregate types are passed in an integer register
7355 in a mode corresponding to the size of the type. */
7356 HOST_WIDE_INT size = int_size_in_bytes (type);
7357 gcc_assert (size <= 32);
7359 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7361 /* ??? We probably should have made the same ABI change in
7362 3.4.0 as the one we made for unions. The latter was
7363 required by the SCD though, while the former is not
7364 specified, so we favored compatibility and efficiency.
7366 Now we're stuck for aggregates larger than 16 bytes,
7367 because OImode vanished in the meantime. Let's not
7368 try to be unduly clever, and simply follow the ABI
7369 for unions in that case. */
7370 if (mode == BLKmode)
7371 return function_arg_union_value (size, mode, 0, regbase);
7372 else
7373 mclass = MODE_INT;
7376 /* We should only have pointer and integer types at this point. This
7377 must match sparc_promote_function_mode. */
7378 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7379 mode = word_mode;
7382 /* We should only have pointer and integer types at this point. This must
7383 match sparc_promote_function_mode. */
7384 else if (TARGET_ARCH32
7385 && mclass == MODE_INT
7386 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7387 mode = word_mode;
7389 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7390 regno = SPARC_FP_ARG_FIRST;
7391 else
7392 regno = regbase;
7394 return gen_rtx_REG (mode, regno);
7397 /* Handle TARGET_FUNCTION_VALUE.
7398 On the SPARC, the value is found in the first "output" register, but the
7399 called function leaves it in the first "input" register. */
7401 static rtx
7402 sparc_function_value (const_tree valtype,
7403 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7404 bool outgoing)
7406 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7409 /* Handle TARGET_LIBCALL_VALUE. */
7411 static rtx
7412 sparc_libcall_value (enum machine_mode mode,
7413 const_rtx fun ATTRIBUTE_UNUSED)
7415 return sparc_function_value_1 (NULL_TREE, mode, false);
7418 /* Handle FUNCTION_VALUE_REGNO_P.
7419 On the SPARC, the first "output" reg is used for integer values, and the
7420 first floating point register is used for floating point values. */
7422 static bool
7423 sparc_function_value_regno_p (const unsigned int regno)
7425 return (regno == 8 || regno == 32);
7428 /* Do what is necessary for `va_start'. We look at the current function
7429 to determine if stdarg or varargs is used and return the address of
7430 the first unnamed parameter. */
7432 static rtx
7433 sparc_builtin_saveregs (void)
7435 int first_reg = crtl->args.info.words;
7436 rtx address;
7437 int regno;
7439 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7440 emit_move_insn (gen_rtx_MEM (word_mode,
7441 gen_rtx_PLUS (Pmode,
7442 frame_pointer_rtx,
7443 GEN_INT (FIRST_PARM_OFFSET (0)
7444 + (UNITS_PER_WORD
7445 * regno)))),
7446 gen_rtx_REG (word_mode,
7447 SPARC_INCOMING_INT_ARG_FIRST + regno));
7449 address = gen_rtx_PLUS (Pmode,
7450 frame_pointer_rtx,
7451 GEN_INT (FIRST_PARM_OFFSET (0)
7452 + UNITS_PER_WORD * first_reg));
7454 return address;
7457 /* Implement `va_start' for stdarg. */
7459 static void
7460 sparc_va_start (tree valist, rtx nextarg)
7462 nextarg = expand_builtin_saveregs ();
7463 std_expand_builtin_va_start (valist, nextarg);
7466 /* Implement `va_arg' for stdarg. */
7468 static tree
7469 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7470 gimple_seq *post_p)
7472 HOST_WIDE_INT size, rsize, align;
7473 tree addr, incr;
7474 bool indirect;
7475 tree ptrtype = build_pointer_type (type);
7477 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7479 indirect = true;
7480 size = rsize = UNITS_PER_WORD;
7481 align = 0;
7483 else
7485 indirect = false;
7486 size = int_size_in_bytes (type);
7487 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7488 align = 0;
7490 if (TARGET_ARCH64)
7492 /* For SPARC64, objects requiring 16-byte alignment get it. */
7493 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7494 align = 2 * UNITS_PER_WORD;
7496 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7497 are left-justified in their slots. */
7498 if (AGGREGATE_TYPE_P (type))
7500 if (size == 0)
7501 size = rsize = UNITS_PER_WORD;
7502 else
7503 size = rsize;
7508 incr = valist;
7509 if (align)
7511 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7512 incr = fold_convert (sizetype, incr);
7513 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7514 size_int (-align));
7515 incr = fold_convert (ptr_type_node, incr);
7518 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7519 addr = incr;
7521 if (BYTES_BIG_ENDIAN && size < rsize)
7522 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7524 if (indirect)
7526 addr = fold_convert (build_pointer_type (ptrtype), addr);
7527 addr = build_va_arg_indirect_ref (addr);
7530 /* If the address isn't aligned properly for the type, we need a temporary.
7531 FIXME: This is inefficient, usually we can do this in registers. */
7532 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7534 tree tmp = create_tmp_var (type, "va_arg_tmp");
7535 tree dest_addr = build_fold_addr_expr (tmp);
7536 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7537 3, dest_addr, addr, size_int (rsize));
7538 TREE_ADDRESSABLE (tmp) = 1;
7539 gimplify_and_add (copy, pre_p);
7540 addr = dest_addr;
7543 else
7544 addr = fold_convert (ptrtype, addr);
7546 incr = fold_build_pointer_plus_hwi (incr, rsize);
7547 gimplify_assign (valist, incr, post_p);
7549 return build_va_arg_indirect_ref (addr);
7552 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7553 Specify whether the vector mode is supported by the hardware. */
7555 static bool
7556 sparc_vector_mode_supported_p (enum machine_mode mode)
7558 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7561 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7563 static enum machine_mode
7564 sparc_preferred_simd_mode (enum machine_mode mode)
7566 if (TARGET_VIS)
7567 switch (mode)
7569 case SImode:
7570 return V2SImode;
7571 case HImode:
7572 return V4HImode;
7573 case QImode:
7574 return V8QImode;
7576 default:;
7579 return word_mode;
7582 /* Return the string to output an unconditional branch to LABEL, which is
7583 the operand number of the label.
7585 DEST is the destination insn (i.e. the label), INSN is the source. */
7587 const char *
7588 output_ubranch (rtx dest, rtx_insn *insn)
7590 static char string[64];
7591 bool v9_form = false;
7592 int delta;
7593 char *p;
7595 /* Even if we are trying to use cbcond for this, evaluate
7596 whether we can use V9 branches as our backup plan. */
7598 delta = 5000000;
7599 if (INSN_ADDRESSES_SET_P ())
7600 delta = (INSN_ADDRESSES (INSN_UID (dest))
7601 - INSN_ADDRESSES (INSN_UID (insn)));
7603 /* Leave some instructions for "slop". */
7604 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7605 v9_form = true;
7607 if (TARGET_CBCOND)
7609 bool emit_nop = emit_cbcond_nop (insn);
7610 bool far = false;
7611 const char *rval;
7613 if (delta < -500 || delta > 500)
7614 far = true;
7616 if (far)
7618 if (v9_form)
7619 rval = "ba,a,pt\t%%xcc, %l0";
7620 else
7621 rval = "b,a\t%l0";
7623 else
7625 if (emit_nop)
7626 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7627 else
7628 rval = "cwbe\t%%g0, %%g0, %l0";
7630 return rval;
7633 if (v9_form)
7634 strcpy (string, "ba%*,pt\t%%xcc, ");
7635 else
7636 strcpy (string, "b%*\t");
7638 p = strchr (string, '\0');
7639 *p++ = '%';
7640 *p++ = 'l';
7641 *p++ = '0';
7642 *p++ = '%';
7643 *p++ = '(';
7644 *p = '\0';
7646 return string;
7649 /* Return the string to output a conditional branch to LABEL, which is
7650 the operand number of the label. OP is the conditional expression.
7651 XEXP (OP, 0) is assumed to be a condition code register (integer or
7652 floating point) and its mode specifies what kind of comparison we made.
7654 DEST is the destination insn (i.e. the label), INSN is the source.
7656 REVERSED is nonzero if we should reverse the sense of the comparison.
7658 ANNUL is nonzero if we should generate an annulling branch. */
7660 const char *
7661 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7662 rtx_insn *insn)
7664 static char string[64];
7665 enum rtx_code code = GET_CODE (op);
7666 rtx cc_reg = XEXP (op, 0);
7667 enum machine_mode mode = GET_MODE (cc_reg);
7668 const char *labelno, *branch;
7669 int spaces = 8, far;
7670 char *p;
7672 /* v9 branches are limited to +-1MB. If it is too far away,
7673 change
7675 bne,pt %xcc, .LC30
7679 be,pn %xcc, .+12
7681 ba .LC30
7685 fbne,a,pn %fcc2, .LC29
7689 fbe,pt %fcc2, .+16
7691 ba .LC29 */
7693 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7694 if (reversed ^ far)
7696 /* Reversal of FP compares takes care -- an ordered compare
7697 becomes an unordered compare and vice versa. */
7698 if (mode == CCFPmode || mode == CCFPEmode)
7699 code = reverse_condition_maybe_unordered (code);
7700 else
7701 code = reverse_condition (code);
7704 /* Start by writing the branch condition. */
7705 if (mode == CCFPmode || mode == CCFPEmode)
7707 switch (code)
7709 case NE:
7710 branch = "fbne";
7711 break;
7712 case EQ:
7713 branch = "fbe";
7714 break;
7715 case GE:
7716 branch = "fbge";
7717 break;
7718 case GT:
7719 branch = "fbg";
7720 break;
7721 case LE:
7722 branch = "fble";
7723 break;
7724 case LT:
7725 branch = "fbl";
7726 break;
7727 case UNORDERED:
7728 branch = "fbu";
7729 break;
7730 case ORDERED:
7731 branch = "fbo";
7732 break;
7733 case UNGT:
7734 branch = "fbug";
7735 break;
7736 case UNLT:
7737 branch = "fbul";
7738 break;
7739 case UNEQ:
7740 branch = "fbue";
7741 break;
7742 case UNGE:
7743 branch = "fbuge";
7744 break;
7745 case UNLE:
7746 branch = "fbule";
7747 break;
7748 case LTGT:
7749 branch = "fblg";
7750 break;
7752 default:
7753 gcc_unreachable ();
7756 /* ??? !v9: FP branches cannot be preceded by another floating point
7757 insn. Because there is currently no concept of pre-delay slots,
7758 we can fix this only by always emitting a nop before a floating
7759 point branch. */
7761 string[0] = '\0';
7762 if (! TARGET_V9)
7763 strcpy (string, "nop\n\t");
7764 strcat (string, branch);
7766 else
7768 switch (code)
7770 case NE:
7771 branch = "bne";
7772 break;
7773 case EQ:
7774 branch = "be";
7775 break;
7776 case GE:
7777 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7778 branch = "bpos";
7779 else
7780 branch = "bge";
7781 break;
7782 case GT:
7783 branch = "bg";
7784 break;
7785 case LE:
7786 branch = "ble";
7787 break;
7788 case LT:
7789 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7790 branch = "bneg";
7791 else
7792 branch = "bl";
7793 break;
7794 case GEU:
7795 branch = "bgeu";
7796 break;
7797 case GTU:
7798 branch = "bgu";
7799 break;
7800 case LEU:
7801 branch = "bleu";
7802 break;
7803 case LTU:
7804 branch = "blu";
7805 break;
7807 default:
7808 gcc_unreachable ();
7810 strcpy (string, branch);
7812 spaces -= strlen (branch);
7813 p = strchr (string, '\0');
7815 /* Now add the annulling, the label, and a possible noop. */
7816 if (annul && ! far)
7818 strcpy (p, ",a");
7819 p += 2;
7820 spaces -= 2;
7823 if (TARGET_V9)
7825 rtx note;
7826 int v8 = 0;
7828 if (! far && insn && INSN_ADDRESSES_SET_P ())
7830 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7831 - INSN_ADDRESSES (INSN_UID (insn)));
7832 /* Leave some instructions for "slop". */
7833 if (delta < -260000 || delta >= 260000)
7834 v8 = 1;
7837 if (mode == CCFPmode || mode == CCFPEmode)
7839 static char v9_fcc_labelno[] = "%%fccX, ";
7840 /* Set the char indicating the number of the fcc reg to use. */
7841 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7842 labelno = v9_fcc_labelno;
7843 if (v8)
7845 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7846 labelno = "";
7849 else if (mode == CCXmode || mode == CCX_NOOVmode)
7851 labelno = "%%xcc, ";
7852 gcc_assert (! v8);
7854 else
7856 labelno = "%%icc, ";
7857 if (v8)
7858 labelno = "";
7861 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7863 strcpy (p,
7864 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
7865 ? ",pt" : ",pn");
7866 p += 3;
7867 spaces -= 3;
7870 else
7871 labelno = "";
7873 if (spaces > 0)
7874 *p++ = '\t';
7875 else
7876 *p++ = ' ';
7877 strcpy (p, labelno);
7878 p = strchr (p, '\0');
7879 if (far)
7881 strcpy (p, ".+12\n\t nop\n\tb\t");
7882 /* Skip the next insn if requested or
7883 if we know that it will be a nop. */
7884 if (annul || ! final_sequence)
7885 p[3] = '6';
7886 p += 14;
7888 *p++ = '%';
7889 *p++ = 'l';
7890 *p++ = label + '0';
7891 *p++ = '%';
7892 *p++ = '#';
7893 *p = '\0';
7895 return string;
7898 /* Emit a library call comparison between floating point X and Y.
7899 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7900 Return the new operator to be used in the comparison sequence.
7902 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7903 values as arguments instead of the TFmode registers themselves,
7904 that's why we cannot call emit_float_lib_cmp. */
7907 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7909 const char *qpfunc;
7910 rtx slot0, slot1, result, tem, tem2, libfunc;
7911 enum machine_mode mode;
7912 enum rtx_code new_comparison;
7914 switch (comparison)
7916 case EQ:
7917 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7918 break;
7920 case NE:
7921 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7922 break;
7924 case GT:
7925 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7926 break;
7928 case GE:
7929 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7930 break;
7932 case LT:
7933 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7934 break;
7936 case LE:
7937 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7938 break;
7940 case ORDERED:
7941 case UNORDERED:
7942 case UNGT:
7943 case UNLT:
7944 case UNEQ:
7945 case UNGE:
7946 case UNLE:
7947 case LTGT:
7948 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7949 break;
7951 default:
7952 gcc_unreachable ();
7955 if (TARGET_ARCH64)
7957 if (MEM_P (x))
7959 tree expr = MEM_EXPR (x);
7960 if (expr)
7961 mark_addressable (expr);
7962 slot0 = x;
7964 else
7966 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7967 emit_move_insn (slot0, x);
7970 if (MEM_P (y))
7972 tree expr = MEM_EXPR (y);
7973 if (expr)
7974 mark_addressable (expr);
7975 slot1 = y;
7977 else
7979 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7980 emit_move_insn (slot1, y);
7983 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7984 emit_library_call (libfunc, LCT_NORMAL,
7985 DImode, 2,
7986 XEXP (slot0, 0), Pmode,
7987 XEXP (slot1, 0), Pmode);
7988 mode = DImode;
7990 else
7992 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7993 emit_library_call (libfunc, LCT_NORMAL,
7994 SImode, 2,
7995 x, TFmode, y, TFmode);
7996 mode = SImode;
8000 /* Immediately move the result of the libcall into a pseudo
8001 register so reload doesn't clobber the value if it needs
8002 the return register for a spill reg. */
8003 result = gen_reg_rtx (mode);
8004 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8006 switch (comparison)
8008 default:
8009 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8010 case ORDERED:
8011 case UNORDERED:
8012 new_comparison = (comparison == UNORDERED ? EQ : NE);
8013 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8014 case UNGT:
8015 case UNGE:
8016 new_comparison = (comparison == UNGT ? GT : NE);
8017 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8018 case UNLE:
8019 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8020 case UNLT:
8021 tem = gen_reg_rtx (mode);
8022 if (TARGET_ARCH32)
8023 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8024 else
8025 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8026 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8027 case UNEQ:
8028 case LTGT:
8029 tem = gen_reg_rtx (mode);
8030 if (TARGET_ARCH32)
8031 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8032 else
8033 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8034 tem2 = gen_reg_rtx (mode);
8035 if (TARGET_ARCH32)
8036 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8037 else
8038 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8039 new_comparison = (comparison == UNEQ ? EQ : NE);
8040 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8043 gcc_unreachable ();
8046 /* Generate an unsigned DImode to FP conversion. This is the same code
8047 optabs would emit if we didn't have TFmode patterns. */
8049 void
8050 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
8052 rtx i0, i1, f0, in, out;
8054 out = operands[0];
8055 in = force_reg (DImode, operands[1]);
8056 rtx_code_label *neglab = gen_label_rtx ();
8057 rtx_code_label *donelab = gen_label_rtx ();
8058 i0 = gen_reg_rtx (DImode);
8059 i1 = gen_reg_rtx (DImode);
8060 f0 = gen_reg_rtx (mode);
8062 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8064 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
8065 emit_jump_insn (gen_jump (donelab));
8066 emit_barrier ();
8068 emit_label (neglab);
8070 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8071 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8072 emit_insn (gen_iordi3 (i0, i0, i1));
8073 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
8074 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
8076 emit_label (donelab);
8079 /* Generate an FP to unsigned DImode conversion. This is the same code
8080 optabs would emit if we didn't have TFmode patterns. */
8082 void
8083 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
8085 rtx i0, i1, f0, in, out, limit;
8087 out = operands[0];
8088 in = force_reg (mode, operands[1]);
8089 rtx_code_label *neglab = gen_label_rtx ();
8090 rtx_code_label *donelab = gen_label_rtx ();
8091 i0 = gen_reg_rtx (DImode);
8092 i1 = gen_reg_rtx (DImode);
8093 limit = gen_reg_rtx (mode);
8094 f0 = gen_reg_rtx (mode);
8096 emit_move_insn (limit,
8097 CONST_DOUBLE_FROM_REAL_VALUE (
8098 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8099 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8101 emit_insn (gen_rtx_SET (VOIDmode,
8102 out,
8103 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8104 emit_jump_insn (gen_jump (donelab));
8105 emit_barrier ();
8107 emit_label (neglab);
8109 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
8110 emit_insn (gen_rtx_SET (VOIDmode,
8112 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8113 emit_insn (gen_movdi (i1, const1_rtx));
8114 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8115 emit_insn (gen_xordi3 (out, i0, i1));
8117 emit_label (donelab);
8120 /* Return the string to output a compare and branch instruction to DEST.
8121 DEST is the destination insn (i.e. the label), INSN is the source,
8122 and OP is the conditional expression. */
8124 const char *
8125 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8127 enum machine_mode mode = GET_MODE (XEXP (op, 0));
8128 enum rtx_code code = GET_CODE (op);
8129 const char *cond_str, *tmpl;
8130 int far, emit_nop, len;
8131 static char string[64];
8132 char size_char;
8134 /* Compare and Branch is limited to +-2KB. If it is too far away,
8135 change
8137 cxbne X, Y, .LC30
8141 cxbe X, Y, .+16
8143 ba,pt xcc, .LC30
8144 nop */
8146 len = get_attr_length (insn);
8148 far = len == 4;
8149 emit_nop = len == 2;
8151 if (far)
8152 code = reverse_condition (code);
8154 size_char = ((mode == SImode) ? 'w' : 'x');
8156 switch (code)
8158 case NE:
8159 cond_str = "ne";
8160 break;
8162 case EQ:
8163 cond_str = "e";
8164 break;
8166 case GE:
8167 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8168 cond_str = "pos";
8169 else
8170 cond_str = "ge";
8171 break;
8173 case GT:
8174 cond_str = "g";
8175 break;
8177 case LE:
8178 cond_str = "le";
8179 break;
8181 case LT:
8182 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8183 cond_str = "neg";
8184 else
8185 cond_str = "l";
8186 break;
8188 case GEU:
8189 cond_str = "cc";
8190 break;
8192 case GTU:
8193 cond_str = "gu";
8194 break;
8196 case LEU:
8197 cond_str = "leu";
8198 break;
8200 case LTU:
8201 cond_str = "cs";
8202 break;
8204 default:
8205 gcc_unreachable ();
8208 if (far)
8210 int veryfar = 1, delta;
8212 if (INSN_ADDRESSES_SET_P ())
8214 delta = (INSN_ADDRESSES (INSN_UID (dest))
8215 - INSN_ADDRESSES (INSN_UID (insn)));
8216 /* Leave some instructions for "slop". */
8217 if (delta >= -260000 && delta < 260000)
8218 veryfar = 0;
8221 if (veryfar)
8222 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8223 else
8224 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8226 else
8228 if (emit_nop)
8229 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8230 else
8231 tmpl = "c%cb%s\t%%1, %%2, %%3";
8234 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8236 return string;
8239 /* Return the string to output a conditional branch to LABEL, testing
8240 register REG. LABEL is the operand number of the label; REG is the
8241 operand number of the reg. OP is the conditional expression. The mode
8242 of REG says what kind of comparison we made.
8244 DEST is the destination insn (i.e. the label), INSN is the source.
8246 REVERSED is nonzero if we should reverse the sense of the comparison.
8248 ANNUL is nonzero if we should generate an annulling branch. */
8250 const char *
8251 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8252 int annul, rtx_insn *insn)
8254 static char string[64];
8255 enum rtx_code code = GET_CODE (op);
8256 enum machine_mode mode = GET_MODE (XEXP (op, 0));
8257 rtx note;
8258 int far;
8259 char *p;
8261 /* branch on register are limited to +-128KB. If it is too far away,
8262 change
8264 brnz,pt %g1, .LC30
8268 brz,pn %g1, .+12
8270 ba,pt %xcc, .LC30
8274 brgez,a,pn %o1, .LC29
8278 brlz,pt %o1, .+16
8280 ba,pt %xcc, .LC29 */
8282 far = get_attr_length (insn) >= 3;
8284 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8285 if (reversed ^ far)
8286 code = reverse_condition (code);
8288 /* Only 64 bit versions of these instructions exist. */
8289 gcc_assert (mode == DImode);
8291 /* Start by writing the branch condition. */
8293 switch (code)
8295 case NE:
8296 strcpy (string, "brnz");
8297 break;
8299 case EQ:
8300 strcpy (string, "brz");
8301 break;
8303 case GE:
8304 strcpy (string, "brgez");
8305 break;
8307 case LT:
8308 strcpy (string, "brlz");
8309 break;
8311 case LE:
8312 strcpy (string, "brlez");
8313 break;
8315 case GT:
8316 strcpy (string, "brgz");
8317 break;
8319 default:
8320 gcc_unreachable ();
8323 p = strchr (string, '\0');
8325 /* Now add the annulling, reg, label, and nop. */
8326 if (annul && ! far)
8328 strcpy (p, ",a");
8329 p += 2;
8332 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8334 strcpy (p,
8335 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
8336 ? ",pt" : ",pn");
8337 p += 3;
8340 *p = p < string + 8 ? '\t' : ' ';
8341 p++;
8342 *p++ = '%';
8343 *p++ = '0' + reg;
8344 *p++ = ',';
8345 *p++ = ' ';
8346 if (far)
8348 int veryfar = 1, delta;
8350 if (INSN_ADDRESSES_SET_P ())
8352 delta = (INSN_ADDRESSES (INSN_UID (dest))
8353 - INSN_ADDRESSES (INSN_UID (insn)));
8354 /* Leave some instructions for "slop". */
8355 if (delta >= -260000 && delta < 260000)
8356 veryfar = 0;
8359 strcpy (p, ".+12\n\t nop\n\t");
8360 /* Skip the next insn if requested or
8361 if we know that it will be a nop. */
8362 if (annul || ! final_sequence)
8363 p[3] = '6';
8364 p += 12;
8365 if (veryfar)
8367 strcpy (p, "b\t");
8368 p += 2;
8370 else
8372 strcpy (p, "ba,pt\t%%xcc, ");
8373 p += 13;
8376 *p++ = '%';
8377 *p++ = 'l';
8378 *p++ = '0' + label;
8379 *p++ = '%';
8380 *p++ = '#';
8381 *p = '\0';
8383 return string;
8386 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8387 Such instructions cannot be used in the delay slot of return insn on v9.
8388 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8391 static int
8392 epilogue_renumber (register rtx *where, int test)
8394 register const char *fmt;
8395 register int i;
8396 register enum rtx_code code;
8398 if (*where == 0)
8399 return 0;
8401 code = GET_CODE (*where);
8403 switch (code)
8405 case REG:
8406 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8407 return 1;
8408 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8409 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8410 case SCRATCH:
8411 case CC0:
8412 case PC:
8413 case CONST_INT:
8414 case CONST_DOUBLE:
8415 return 0;
8417 /* Do not replace the frame pointer with the stack pointer because
8418 it can cause the delayed instruction to load below the stack.
8419 This occurs when instructions like:
8421 (set (reg/i:SI 24 %i0)
8422 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8423 (const_int -20 [0xffffffec])) 0))
8425 are in the return delayed slot. */
8426 case PLUS:
8427 if (GET_CODE (XEXP (*where, 0)) == REG
8428 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8429 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8430 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8431 return 1;
8432 break;
8434 case MEM:
8435 if (SPARC_STACK_BIAS
8436 && GET_CODE (XEXP (*where, 0)) == REG
8437 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8438 return 1;
8439 break;
8441 default:
8442 break;
8445 fmt = GET_RTX_FORMAT (code);
8447 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8449 if (fmt[i] == 'E')
8451 register int j;
8452 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8453 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8454 return 1;
8456 else if (fmt[i] == 'e'
8457 && epilogue_renumber (&(XEXP (*where, i)), test))
8458 return 1;
8460 return 0;
8463 /* Leaf functions and non-leaf functions have different needs. */
8465 static const int
8466 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8468 static const int
8469 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8471 static const int *const reg_alloc_orders[] = {
8472 reg_leaf_alloc_order,
8473 reg_nonleaf_alloc_order};
8475 void
8476 order_regs_for_local_alloc (void)
8478 static int last_order_nonleaf = 1;
8480 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8482 last_order_nonleaf = !last_order_nonleaf;
8483 memcpy ((char *) reg_alloc_order,
8484 (const char *) reg_alloc_orders[last_order_nonleaf],
8485 FIRST_PSEUDO_REGISTER * sizeof (int));
8489 /* Return 1 if REG and MEM are legitimate enough to allow the various
8490 mem<-->reg splits to be run. */
8493 sparc_splitdi_legitimate (rtx reg, rtx mem)
8495 /* Punt if we are here by mistake. */
8496 gcc_assert (reload_completed);
8498 /* We must have an offsettable memory reference. */
8499 if (! offsettable_memref_p (mem))
8500 return 0;
8502 /* If we have legitimate args for ldd/std, we do not want
8503 the split to happen. */
8504 if ((REGNO (reg) % 2) == 0
8505 && mem_min_alignment (mem, 8))
8506 return 0;
8508 /* Success. */
8509 return 1;
8512 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8515 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8517 int regno1, regno2;
8519 if (GET_CODE (reg1) == SUBREG)
8520 reg1 = SUBREG_REG (reg1);
8521 if (GET_CODE (reg1) != REG)
8522 return 0;
8523 regno1 = REGNO (reg1);
8525 if (GET_CODE (reg2) == SUBREG)
8526 reg2 = SUBREG_REG (reg2);
8527 if (GET_CODE (reg2) != REG)
8528 return 0;
8529 regno2 = REGNO (reg2);
8531 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8532 return 1;
8534 if (TARGET_VIS3)
8536 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8537 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8538 return 1;
8541 return 0;
8544 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8545 This makes them candidates for using ldd and std insns.
8547 Note reg1 and reg2 *must* be hard registers. */
8550 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8552 /* We might have been passed a SUBREG. */
8553 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8554 return 0;
8556 if (REGNO (reg1) % 2 != 0)
8557 return 0;
8559 /* Integer ldd is deprecated in SPARC V9 */
8560 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8561 return 0;
8563 return (REGNO (reg1) == REGNO (reg2) - 1);
8566 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8567 an ldd or std insn.
8569 This can only happen when addr1 and addr2, the addresses in mem1
8570 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8571 addr1 must also be aligned on a 64-bit boundary.
8573 Also iff dependent_reg_rtx is not null it should not be used to
8574 compute the address for mem1, i.e. we cannot optimize a sequence
8575 like:
8576 ld [%o0], %o0
8577 ld [%o0 + 4], %o1
8579 ldd [%o0], %o0
8580 nor:
8581 ld [%g3 + 4], %g3
8582 ld [%g3], %g2
8584 ldd [%g3], %g2
8586 But, note that the transformation from:
8587 ld [%g2 + 4], %g3
8588 ld [%g2], %g2
8590 ldd [%g2], %g2
8591 is perfectly fine. Thus, the peephole2 patterns always pass us
8592 the destination register of the first load, never the second one.
8594 For stores we don't have a similar problem, so dependent_reg_rtx is
8595 NULL_RTX. */
8598 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8600 rtx addr1, addr2;
8601 unsigned int reg1;
8602 HOST_WIDE_INT offset1;
8604 /* The mems cannot be volatile. */
8605 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8606 return 0;
8608 /* MEM1 should be aligned on a 64-bit boundary. */
8609 if (MEM_ALIGN (mem1) < 64)
8610 return 0;
8612 addr1 = XEXP (mem1, 0);
8613 addr2 = XEXP (mem2, 0);
8615 /* Extract a register number and offset (if used) from the first addr. */
8616 if (GET_CODE (addr1) == PLUS)
8618 /* If not a REG, return zero. */
8619 if (GET_CODE (XEXP (addr1, 0)) != REG)
8620 return 0;
8621 else
8623 reg1 = REGNO (XEXP (addr1, 0));
8624 /* The offset must be constant! */
8625 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8626 return 0;
8627 offset1 = INTVAL (XEXP (addr1, 1));
8630 else if (GET_CODE (addr1) != REG)
8631 return 0;
8632 else
8634 reg1 = REGNO (addr1);
8635 /* This was a simple (mem (reg)) expression. Offset is 0. */
8636 offset1 = 0;
8639 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8640 if (GET_CODE (addr2) != PLUS)
8641 return 0;
8643 if (GET_CODE (XEXP (addr2, 0)) != REG
8644 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8645 return 0;
8647 if (reg1 != REGNO (XEXP (addr2, 0)))
8648 return 0;
8650 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8651 return 0;
8653 /* The first offset must be evenly divisible by 8 to ensure the
8654 address is 64 bit aligned. */
8655 if (offset1 % 8 != 0)
8656 return 0;
8658 /* The offset for the second addr must be 4 more than the first addr. */
8659 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8660 return 0;
8662 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8663 instructions. */
8664 return 1;
8667 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8670 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, enum machine_mode mode)
8672 rtx x = widen_memory_access (mem1, mode, 0);
8673 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8674 return x;
8677 /* Return 1 if reg is a pseudo, or is the first register in
8678 a hard register pair. This makes it suitable for use in
8679 ldd and std insns. */
8682 register_ok_for_ldd (rtx reg)
8684 /* We might have been passed a SUBREG. */
8685 if (!REG_P (reg))
8686 return 0;
8688 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8689 return (REGNO (reg) % 2 == 0);
8691 return 1;
8694 /* Return 1 if OP, a MEM, has an address which is known to be
8695 aligned to an 8-byte boundary. */
8698 memory_ok_for_ldd (rtx op)
8700 /* In 64-bit mode, we assume that the address is word-aligned. */
8701 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8702 return 0;
8704 if (! can_create_pseudo_p ()
8705 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8706 return 0;
8708 return 1;
8711 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8713 static bool
8714 sparc_print_operand_punct_valid_p (unsigned char code)
8716 if (code == '#'
8717 || code == '*'
8718 || code == '('
8719 || code == ')'
8720 || code == '_'
8721 || code == '&')
8722 return true;
8724 return false;
8727 /* Implement TARGET_PRINT_OPERAND.
8728 Print operand X (an rtx) in assembler syntax to file FILE.
8729 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8730 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8732 static void
8733 sparc_print_operand (FILE *file, rtx x, int code)
8735 switch (code)
8737 case '#':
8738 /* Output an insn in a delay slot. */
8739 if (final_sequence)
8740 sparc_indent_opcode = 1;
8741 else
8742 fputs ("\n\t nop", file);
8743 return;
8744 case '*':
8745 /* Output an annul flag if there's nothing for the delay slot and we
8746 are optimizing. This is always used with '(' below.
8747 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8748 this is a dbx bug. So, we only do this when optimizing.
8749 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8750 Always emit a nop in case the next instruction is a branch. */
8751 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8752 fputs (",a", file);
8753 return;
8754 case '(':
8755 /* Output a 'nop' if there's nothing for the delay slot and we are
8756 not optimizing. This is always used with '*' above. */
8757 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8758 fputs ("\n\t nop", file);
8759 else if (final_sequence)
8760 sparc_indent_opcode = 1;
8761 return;
8762 case ')':
8763 /* Output the right displacement from the saved PC on function return.
8764 The caller may have placed an "unimp" insn immediately after the call
8765 so we have to account for it. This insn is used in the 32-bit ABI
8766 when calling a function that returns a non zero-sized structure. The
8767 64-bit ABI doesn't have it. Be careful to have this test be the same
8768 as that for the call. The exception is when sparc_std_struct_return
8769 is enabled, the psABI is followed exactly and the adjustment is made
8770 by the code in sparc_struct_value_rtx. The call emitted is the same
8771 when sparc_std_struct_return is enabled. */
8772 if (!TARGET_ARCH64
8773 && cfun->returns_struct
8774 && !sparc_std_struct_return
8775 && DECL_SIZE (DECL_RESULT (current_function_decl))
8776 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8777 == INTEGER_CST
8778 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8779 fputs ("12", file);
8780 else
8781 fputc ('8', file);
8782 return;
8783 case '_':
8784 /* Output the Embedded Medium/Anywhere code model base register. */
8785 fputs (EMBMEDANY_BASE_REG, file);
8786 return;
8787 case '&':
8788 /* Print some local dynamic TLS name. */
8789 if (const char *name = get_some_local_dynamic_name ())
8790 assemble_name (file, name);
8791 else
8792 output_operand_lossage ("'%%&' used without any "
8793 "local dynamic TLS references");
8794 return;
8796 case 'Y':
8797 /* Adjust the operand to take into account a RESTORE operation. */
8798 if (GET_CODE (x) == CONST_INT)
8799 break;
8800 else if (GET_CODE (x) != REG)
8801 output_operand_lossage ("invalid %%Y operand");
8802 else if (REGNO (x) < 8)
8803 fputs (reg_names[REGNO (x)], file);
8804 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8805 fputs (reg_names[REGNO (x)-16], file);
8806 else
8807 output_operand_lossage ("invalid %%Y operand");
8808 return;
8809 case 'L':
8810 /* Print out the low order register name of a register pair. */
8811 if (WORDS_BIG_ENDIAN)
8812 fputs (reg_names[REGNO (x)+1], file);
8813 else
8814 fputs (reg_names[REGNO (x)], file);
8815 return;
8816 case 'H':
8817 /* Print out the high order register name of a register pair. */
8818 if (WORDS_BIG_ENDIAN)
8819 fputs (reg_names[REGNO (x)], file);
8820 else
8821 fputs (reg_names[REGNO (x)+1], file);
8822 return;
8823 case 'R':
8824 /* Print out the second register name of a register pair or quad.
8825 I.e., R (%o0) => %o1. */
8826 fputs (reg_names[REGNO (x)+1], file);
8827 return;
8828 case 'S':
8829 /* Print out the third register name of a register quad.
8830 I.e., S (%o0) => %o2. */
8831 fputs (reg_names[REGNO (x)+2], file);
8832 return;
8833 case 'T':
8834 /* Print out the fourth register name of a register quad.
8835 I.e., T (%o0) => %o3. */
8836 fputs (reg_names[REGNO (x)+3], file);
8837 return;
8838 case 'x':
8839 /* Print a condition code register. */
8840 if (REGNO (x) == SPARC_ICC_REG)
8842 /* We don't handle CC[X]_NOOVmode because they're not supposed
8843 to occur here. */
8844 if (GET_MODE (x) == CCmode)
8845 fputs ("%icc", file);
8846 else if (GET_MODE (x) == CCXmode)
8847 fputs ("%xcc", file);
8848 else
8849 gcc_unreachable ();
8851 else
8852 /* %fccN register */
8853 fputs (reg_names[REGNO (x)], file);
8854 return;
8855 case 'm':
8856 /* Print the operand's address only. */
8857 output_address (XEXP (x, 0));
8858 return;
8859 case 'r':
8860 /* In this case we need a register. Use %g0 if the
8861 operand is const0_rtx. */
8862 if (x == const0_rtx
8863 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8865 fputs ("%g0", file);
8866 return;
8868 else
8869 break;
8871 case 'A':
8872 switch (GET_CODE (x))
8874 case IOR: fputs ("or", file); break;
8875 case AND: fputs ("and", file); break;
8876 case XOR: fputs ("xor", file); break;
8877 default: output_operand_lossage ("invalid %%A operand");
8879 return;
8881 case 'B':
8882 switch (GET_CODE (x))
8884 case IOR: fputs ("orn", file); break;
8885 case AND: fputs ("andn", file); break;
8886 case XOR: fputs ("xnor", file); break;
8887 default: output_operand_lossage ("invalid %%B operand");
8889 return;
8891 /* This is used by the conditional move instructions. */
8892 case 'C':
8894 enum rtx_code rc = GET_CODE (x);
8896 switch (rc)
8898 case NE: fputs ("ne", file); break;
8899 case EQ: fputs ("e", file); break;
8900 case GE: fputs ("ge", file); break;
8901 case GT: fputs ("g", file); break;
8902 case LE: fputs ("le", file); break;
8903 case LT: fputs ("l", file); break;
8904 case GEU: fputs ("geu", file); break;
8905 case GTU: fputs ("gu", file); break;
8906 case LEU: fputs ("leu", file); break;
8907 case LTU: fputs ("lu", file); break;
8908 case LTGT: fputs ("lg", file); break;
8909 case UNORDERED: fputs ("u", file); break;
8910 case ORDERED: fputs ("o", file); break;
8911 case UNLT: fputs ("ul", file); break;
8912 case UNLE: fputs ("ule", file); break;
8913 case UNGT: fputs ("ug", file); break;
8914 case UNGE: fputs ("uge", file); break;
8915 case UNEQ: fputs ("ue", file); break;
8916 default: output_operand_lossage ("invalid %%C operand");
8918 return;
8921 /* This are used by the movr instruction pattern. */
8922 case 'D':
8924 enum rtx_code rc = GET_CODE (x);
8925 switch (rc)
8927 case NE: fputs ("ne", file); break;
8928 case EQ: fputs ("e", file); break;
8929 case GE: fputs ("gez", file); break;
8930 case LT: fputs ("lz", file); break;
8931 case LE: fputs ("lez", file); break;
8932 case GT: fputs ("gz", file); break;
8933 default: output_operand_lossage ("invalid %%D operand");
8935 return;
8938 case 'b':
8940 /* Print a sign-extended character. */
8941 int i = trunc_int_for_mode (INTVAL (x), QImode);
8942 fprintf (file, "%d", i);
8943 return;
8946 case 'f':
8947 /* Operand must be a MEM; write its address. */
8948 if (GET_CODE (x) != MEM)
8949 output_operand_lossage ("invalid %%f operand");
8950 output_address (XEXP (x, 0));
8951 return;
8953 case 's':
8955 /* Print a sign-extended 32-bit value. */
8956 HOST_WIDE_INT i;
8957 if (GET_CODE(x) == CONST_INT)
8958 i = INTVAL (x);
8959 else if (GET_CODE(x) == CONST_DOUBLE)
8960 i = CONST_DOUBLE_LOW (x);
8961 else
8963 output_operand_lossage ("invalid %%s operand");
8964 return;
8966 i = trunc_int_for_mode (i, SImode);
8967 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
8968 return;
8971 case 0:
8972 /* Do nothing special. */
8973 break;
8975 default:
8976 /* Undocumented flag. */
8977 output_operand_lossage ("invalid operand output code");
8980 if (GET_CODE (x) == REG)
8981 fputs (reg_names[REGNO (x)], file);
8982 else if (GET_CODE (x) == MEM)
8984 fputc ('[', file);
8985 /* Poor Sun assembler doesn't understand absolute addressing. */
8986 if (CONSTANT_P (XEXP (x, 0)))
8987 fputs ("%g0+", file);
8988 output_address (XEXP (x, 0));
8989 fputc (']', file);
8991 else if (GET_CODE (x) == HIGH)
8993 fputs ("%hi(", file);
8994 output_addr_const (file, XEXP (x, 0));
8995 fputc (')', file);
8997 else if (GET_CODE (x) == LO_SUM)
8999 sparc_print_operand (file, XEXP (x, 0), 0);
9000 if (TARGET_CM_MEDMID)
9001 fputs ("+%l44(", file);
9002 else
9003 fputs ("+%lo(", file);
9004 output_addr_const (file, XEXP (x, 1));
9005 fputc (')', file);
9007 else if (GET_CODE (x) == CONST_DOUBLE
9008 && (GET_MODE (x) == VOIDmode
9009 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
9011 if (CONST_DOUBLE_HIGH (x) == 0)
9012 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
9013 else if (CONST_DOUBLE_HIGH (x) == -1
9014 && CONST_DOUBLE_LOW (x) < 0)
9015 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
9016 else
9017 output_operand_lossage ("long long constant not a valid immediate operand");
9019 else if (GET_CODE (x) == CONST_DOUBLE)
9020 output_operand_lossage ("floating point constant not a valid immediate operand");
9021 else { output_addr_const (file, x); }
9024 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9026 static void
9027 sparc_print_operand_address (FILE *file, rtx x)
9029 register rtx base, index = 0;
9030 int offset = 0;
9031 register rtx addr = x;
9033 if (REG_P (addr))
9034 fputs (reg_names[REGNO (addr)], file);
9035 else if (GET_CODE (addr) == PLUS)
9037 if (CONST_INT_P (XEXP (addr, 0)))
9038 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9039 else if (CONST_INT_P (XEXP (addr, 1)))
9040 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9041 else
9042 base = XEXP (addr, 0), index = XEXP (addr, 1);
9043 if (GET_CODE (base) == LO_SUM)
9045 gcc_assert (USE_AS_OFFSETABLE_LO10
9046 && TARGET_ARCH64
9047 && ! TARGET_CM_MEDMID);
9048 output_operand (XEXP (base, 0), 0);
9049 fputs ("+%lo(", file);
9050 output_address (XEXP (base, 1));
9051 fprintf (file, ")+%d", offset);
9053 else
9055 fputs (reg_names[REGNO (base)], file);
9056 if (index == 0)
9057 fprintf (file, "%+d", offset);
9058 else if (REG_P (index))
9059 fprintf (file, "+%s", reg_names[REGNO (index)]);
9060 else if (GET_CODE (index) == SYMBOL_REF
9061 || GET_CODE (index) == LABEL_REF
9062 || GET_CODE (index) == CONST)
9063 fputc ('+', file), output_addr_const (file, index);
9064 else gcc_unreachable ();
9067 else if (GET_CODE (addr) == MINUS
9068 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9070 output_addr_const (file, XEXP (addr, 0));
9071 fputs ("-(", file);
9072 output_addr_const (file, XEXP (addr, 1));
9073 fputs ("-.)", file);
9075 else if (GET_CODE (addr) == LO_SUM)
9077 output_operand (XEXP (addr, 0), 0);
9078 if (TARGET_CM_MEDMID)
9079 fputs ("+%l44(", file);
9080 else
9081 fputs ("+%lo(", file);
9082 output_address (XEXP (addr, 1));
9083 fputc (')', file);
9085 else if (flag_pic
9086 && GET_CODE (addr) == CONST
9087 && GET_CODE (XEXP (addr, 0)) == MINUS
9088 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9089 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9090 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9092 addr = XEXP (addr, 0);
9093 output_addr_const (file, XEXP (addr, 0));
9094 /* Group the args of the second CONST in parenthesis. */
9095 fputs ("-(", file);
9096 /* Skip past the second CONST--it does nothing for us. */
9097 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9098 /* Close the parenthesis. */
9099 fputc (')', file);
9101 else
9103 output_addr_const (file, addr);
9107 /* Target hook for assembling integer objects. The sparc version has
9108 special handling for aligned DI-mode objects. */
9110 static bool
9111 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9113 /* ??? We only output .xword's for symbols and only then in environments
9114 where the assembler can handle them. */
9115 if (aligned_p && size == 8
9116 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
9118 if (TARGET_V9)
9120 assemble_integer_with_op ("\t.xword\t", x);
9121 return true;
9123 else
9125 assemble_aligned_integer (4, const0_rtx);
9126 assemble_aligned_integer (4, x);
9127 return true;
9130 return default_assemble_integer (x, size, aligned_p);
9133 /* Return the value of a code used in the .proc pseudo-op that says
9134 what kind of result this function returns. For non-C types, we pick
9135 the closest C type. */
9137 #ifndef SHORT_TYPE_SIZE
9138 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9139 #endif
9141 #ifndef INT_TYPE_SIZE
9142 #define INT_TYPE_SIZE BITS_PER_WORD
9143 #endif
9145 #ifndef LONG_TYPE_SIZE
9146 #define LONG_TYPE_SIZE BITS_PER_WORD
9147 #endif
9149 #ifndef LONG_LONG_TYPE_SIZE
9150 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9151 #endif
9153 #ifndef FLOAT_TYPE_SIZE
9154 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9155 #endif
9157 #ifndef DOUBLE_TYPE_SIZE
9158 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9159 #endif
9161 #ifndef LONG_DOUBLE_TYPE_SIZE
9162 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9163 #endif
9165 unsigned long
9166 sparc_type_code (register tree type)
9168 register unsigned long qualifiers = 0;
9169 register unsigned shift;
9171 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9172 setting more, since some assemblers will give an error for this. Also,
9173 we must be careful to avoid shifts of 32 bits or more to avoid getting
9174 unpredictable results. */
9176 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9178 switch (TREE_CODE (type))
9180 case ERROR_MARK:
9181 return qualifiers;
9183 case ARRAY_TYPE:
9184 qualifiers |= (3 << shift);
9185 break;
9187 case FUNCTION_TYPE:
9188 case METHOD_TYPE:
9189 qualifiers |= (2 << shift);
9190 break;
9192 case POINTER_TYPE:
9193 case REFERENCE_TYPE:
9194 case OFFSET_TYPE:
9195 qualifiers |= (1 << shift);
9196 break;
9198 case RECORD_TYPE:
9199 return (qualifiers | 8);
9201 case UNION_TYPE:
9202 case QUAL_UNION_TYPE:
9203 return (qualifiers | 9);
9205 case ENUMERAL_TYPE:
9206 return (qualifiers | 10);
9208 case VOID_TYPE:
9209 return (qualifiers | 16);
9211 case INTEGER_TYPE:
9212 /* If this is a range type, consider it to be the underlying
9213 type. */
9214 if (TREE_TYPE (type) != 0)
9215 break;
9217 /* Carefully distinguish all the standard types of C,
9218 without messing up if the language is not C. We do this by
9219 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9220 look at both the names and the above fields, but that's redundant.
9221 Any type whose size is between two C types will be considered
9222 to be the wider of the two types. Also, we do not have a
9223 special code to use for "long long", so anything wider than
9224 long is treated the same. Note that we can't distinguish
9225 between "int" and "long" in this code if they are the same
9226 size, but that's fine, since neither can the assembler. */
9228 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9229 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9231 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9232 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9234 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9235 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9237 else
9238 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9240 case REAL_TYPE:
9241 /* If this is a range type, consider it to be the underlying
9242 type. */
9243 if (TREE_TYPE (type) != 0)
9244 break;
9246 /* Carefully distinguish all the standard types of C,
9247 without messing up if the language is not C. */
9249 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9250 return (qualifiers | 6);
9252 else
9253 return (qualifiers | 7);
9255 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9256 /* ??? We need to distinguish between double and float complex types,
9257 but I don't know how yet because I can't reach this code from
9258 existing front-ends. */
9259 return (qualifiers | 7); /* Who knows? */
9261 case VECTOR_TYPE:
9262 case BOOLEAN_TYPE: /* Boolean truth value type. */
9263 case LANG_TYPE:
9264 case NULLPTR_TYPE:
9265 return qualifiers;
9267 default:
9268 gcc_unreachable (); /* Not a type! */
9272 return qualifiers;
9275 /* Nested function support. */
9277 /* Emit RTL insns to initialize the variable parts of a trampoline.
9278 FNADDR is an RTX for the address of the function's pure code.
9279 CXT is an RTX for the static chain value for the function.
9281 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9282 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9283 (to store insns). This is a bit excessive. Perhaps a different
9284 mechanism would be better here.
9286 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9288 static void
9289 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9291 /* SPARC 32-bit trampoline:
9293 sethi %hi(fn), %g1
9294 sethi %hi(static), %g2
9295 jmp %g1+%lo(fn)
9296 or %g2, %lo(static), %g2
9298 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9299 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9302 emit_move_insn
9303 (adjust_address (m_tramp, SImode, 0),
9304 expand_binop (SImode, ior_optab,
9305 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9306 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9307 NULL_RTX, 1, OPTAB_DIRECT));
9309 emit_move_insn
9310 (adjust_address (m_tramp, SImode, 4),
9311 expand_binop (SImode, ior_optab,
9312 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9313 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9314 NULL_RTX, 1, OPTAB_DIRECT));
9316 emit_move_insn
9317 (adjust_address (m_tramp, SImode, 8),
9318 expand_binop (SImode, ior_optab,
9319 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9320 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9321 NULL_RTX, 1, OPTAB_DIRECT));
9323 emit_move_insn
9324 (adjust_address (m_tramp, SImode, 12),
9325 expand_binop (SImode, ior_optab,
9326 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9327 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9328 NULL_RTX, 1, OPTAB_DIRECT));
9330 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9331 aligned on a 16 byte boundary so one flush clears it all. */
9332 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9333 if (sparc_cpu != PROCESSOR_ULTRASPARC
9334 && sparc_cpu != PROCESSOR_ULTRASPARC3
9335 && sparc_cpu != PROCESSOR_NIAGARA
9336 && sparc_cpu != PROCESSOR_NIAGARA2
9337 && sparc_cpu != PROCESSOR_NIAGARA3
9338 && sparc_cpu != PROCESSOR_NIAGARA4)
9339 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9341 /* Call __enable_execute_stack after writing onto the stack to make sure
9342 the stack address is accessible. */
9343 #ifdef HAVE_ENABLE_EXECUTE_STACK
9344 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9345 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9346 #endif
9350 /* The 64-bit version is simpler because it makes more sense to load the
9351 values as "immediate" data out of the trampoline. It's also easier since
9352 we can read the PC without clobbering a register. */
9354 static void
9355 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9357 /* SPARC 64-bit trampoline:
9359 rd %pc, %g1
9360 ldx [%g1+24], %g5
9361 jmp %g5
9362 ldx [%g1+16], %g5
9363 +16 bytes data
9366 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9367 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9368 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9369 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9370 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9371 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9372 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9373 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9374 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9375 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9376 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9378 if (sparc_cpu != PROCESSOR_ULTRASPARC
9379 && sparc_cpu != PROCESSOR_ULTRASPARC3
9380 && sparc_cpu != PROCESSOR_NIAGARA
9381 && sparc_cpu != PROCESSOR_NIAGARA2
9382 && sparc_cpu != PROCESSOR_NIAGARA3
9383 && sparc_cpu != PROCESSOR_NIAGARA4)
9384 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9386 /* Call __enable_execute_stack after writing onto the stack to make sure
9387 the stack address is accessible. */
9388 #ifdef HAVE_ENABLE_EXECUTE_STACK
9389 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9390 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9391 #endif
9394 /* Worker for TARGET_TRAMPOLINE_INIT. */
9396 static void
9397 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9399 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9400 cxt = force_reg (Pmode, cxt);
9401 if (TARGET_ARCH64)
9402 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9403 else
9404 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9407 /* Adjust the cost of a scheduling dependency. Return the new cost of
9408 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9410 static int
9411 supersparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
9413 enum attr_type insn_type;
9415 if (! recog_memoized (insn))
9416 return 0;
9418 insn_type = get_attr_type (insn);
9420 if (REG_NOTE_KIND (link) == 0)
9422 /* Data dependency; DEP_INSN writes a register that INSN reads some
9423 cycles later. */
9425 /* if a load, then the dependence must be on the memory address;
9426 add an extra "cycle". Note that the cost could be two cycles
9427 if the reg was written late in an instruction group; we ca not tell
9428 here. */
9429 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9430 return cost + 3;
9432 /* Get the delay only if the address of the store is the dependence. */
9433 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9435 rtx pat = PATTERN(insn);
9436 rtx dep_pat = PATTERN (dep_insn);
9438 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9439 return cost; /* This should not happen! */
9441 /* The dependency between the two instructions was on the data that
9442 is being stored. Assume that this implies that the address of the
9443 store is not dependent. */
9444 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9445 return cost;
9447 return cost + 3; /* An approximation. */
9450 /* A shift instruction cannot receive its data from an instruction
9451 in the same cycle; add a one cycle penalty. */
9452 if (insn_type == TYPE_SHIFT)
9453 return cost + 3; /* Split before cascade into shift. */
9455 else
9457 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9458 INSN writes some cycles later. */
9460 /* These are only significant for the fpu unit; writing a fp reg before
9461 the fpu has finished with it stalls the processor. */
9463 /* Reusing an integer register causes no problems. */
9464 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9465 return 0;
9468 return cost;
9471 static int
9472 hypersparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
9474 enum attr_type insn_type, dep_type;
9475 rtx pat = PATTERN(insn);
9476 rtx dep_pat = PATTERN (dep_insn);
9478 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9479 return cost;
9481 insn_type = get_attr_type (insn);
9482 dep_type = get_attr_type (dep_insn);
9484 switch (REG_NOTE_KIND (link))
9486 case 0:
9487 /* Data dependency; DEP_INSN writes a register that INSN reads some
9488 cycles later. */
9490 switch (insn_type)
9492 case TYPE_STORE:
9493 case TYPE_FPSTORE:
9494 /* Get the delay iff the address of the store is the dependence. */
9495 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9496 return cost;
9498 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9499 return cost;
9500 return cost + 3;
9502 case TYPE_LOAD:
9503 case TYPE_SLOAD:
9504 case TYPE_FPLOAD:
9505 /* If a load, then the dependence must be on the memory address. If
9506 the addresses aren't equal, then it might be a false dependency */
9507 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9509 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9510 || GET_CODE (SET_DEST (dep_pat)) != MEM
9511 || GET_CODE (SET_SRC (pat)) != MEM
9512 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9513 XEXP (SET_SRC (pat), 0)))
9514 return cost + 2;
9516 return cost + 8;
9518 break;
9520 case TYPE_BRANCH:
9521 /* Compare to branch latency is 0. There is no benefit from
9522 separating compare and branch. */
9523 if (dep_type == TYPE_COMPARE)
9524 return 0;
9525 /* Floating point compare to branch latency is less than
9526 compare to conditional move. */
9527 if (dep_type == TYPE_FPCMP)
9528 return cost - 1;
9529 break;
9530 default:
9531 break;
9533 break;
9535 case REG_DEP_ANTI:
9536 /* Anti-dependencies only penalize the fpu unit. */
9537 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9538 return 0;
9539 break;
9541 default:
9542 break;
9545 return cost;
9548 static int
9549 sparc_adjust_cost(rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
9551 switch (sparc_cpu)
9553 case PROCESSOR_SUPERSPARC:
9554 cost = supersparc_adjust_cost (insn, link, dep, cost);
9555 break;
9556 case PROCESSOR_HYPERSPARC:
9557 case PROCESSOR_SPARCLITE86X:
9558 cost = hypersparc_adjust_cost (insn, link, dep, cost);
9559 break;
9560 default:
9561 break;
9563 return cost;
9566 static void
9567 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9568 int sched_verbose ATTRIBUTE_UNUSED,
9569 int max_ready ATTRIBUTE_UNUSED)
9572 static int
9573 sparc_use_sched_lookahead (void)
9575 if (sparc_cpu == PROCESSOR_NIAGARA
9576 || sparc_cpu == PROCESSOR_NIAGARA2
9577 || sparc_cpu == PROCESSOR_NIAGARA3)
9578 return 0;
9579 if (sparc_cpu == PROCESSOR_NIAGARA4)
9580 return 2;
9581 if (sparc_cpu == PROCESSOR_ULTRASPARC
9582 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9583 return 4;
9584 if ((1 << sparc_cpu) &
9585 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9586 (1 << PROCESSOR_SPARCLITE86X)))
9587 return 3;
9588 return 0;
9591 static int
9592 sparc_issue_rate (void)
9594 switch (sparc_cpu)
9596 case PROCESSOR_NIAGARA:
9597 case PROCESSOR_NIAGARA2:
9598 case PROCESSOR_NIAGARA3:
9599 default:
9600 return 1;
9601 case PROCESSOR_NIAGARA4:
9602 case PROCESSOR_V9:
9603 /* Assume V9 processors are capable of at least dual-issue. */
9604 return 2;
9605 case PROCESSOR_SUPERSPARC:
9606 return 3;
9607 case PROCESSOR_HYPERSPARC:
9608 case PROCESSOR_SPARCLITE86X:
9609 return 2;
9610 case PROCESSOR_ULTRASPARC:
9611 case PROCESSOR_ULTRASPARC3:
9612 return 4;
9616 static int
9617 set_extends (rtx_insn *insn)
9619 register rtx pat = PATTERN (insn);
9621 switch (GET_CODE (SET_SRC (pat)))
9623 /* Load and some shift instructions zero extend. */
9624 case MEM:
9625 case ZERO_EXTEND:
9626 /* sethi clears the high bits */
9627 case HIGH:
9628 /* LO_SUM is used with sethi. sethi cleared the high
9629 bits and the values used with lo_sum are positive */
9630 case LO_SUM:
9631 /* Store flag stores 0 or 1 */
9632 case LT: case LTU:
9633 case GT: case GTU:
9634 case LE: case LEU:
9635 case GE: case GEU:
9636 case EQ:
9637 case NE:
9638 return 1;
9639 case AND:
9641 rtx op0 = XEXP (SET_SRC (pat), 0);
9642 rtx op1 = XEXP (SET_SRC (pat), 1);
9643 if (GET_CODE (op1) == CONST_INT)
9644 return INTVAL (op1) >= 0;
9645 if (GET_CODE (op0) != REG)
9646 return 0;
9647 if (sparc_check_64 (op0, insn) == 1)
9648 return 1;
9649 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9651 case IOR:
9652 case XOR:
9654 rtx op0 = XEXP (SET_SRC (pat), 0);
9655 rtx op1 = XEXP (SET_SRC (pat), 1);
9656 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9657 return 0;
9658 if (GET_CODE (op1) == CONST_INT)
9659 return INTVAL (op1) >= 0;
9660 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9662 case LSHIFTRT:
9663 return GET_MODE (SET_SRC (pat)) == SImode;
9664 /* Positive integers leave the high bits zero. */
9665 case CONST_DOUBLE:
9666 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
9667 case CONST_INT:
9668 return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
9669 case ASHIFTRT:
9670 case SIGN_EXTEND:
9671 return - (GET_MODE (SET_SRC (pat)) == SImode);
9672 case REG:
9673 return sparc_check_64 (SET_SRC (pat), insn);
9674 default:
9675 return 0;
9679 /* We _ought_ to have only one kind per function, but... */
9680 static GTY(()) rtx sparc_addr_diff_list;
9681 static GTY(()) rtx sparc_addr_list;
9683 void
9684 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9686 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9687 if (diff)
9688 sparc_addr_diff_list
9689 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9690 else
9691 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9694 static void
9695 sparc_output_addr_vec (rtx vec)
9697 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9698 int idx, vlen = XVECLEN (body, 0);
9700 #ifdef ASM_OUTPUT_ADDR_VEC_START
9701 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9702 #endif
9704 #ifdef ASM_OUTPUT_CASE_LABEL
9705 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9706 NEXT_INSN (lab));
9707 #else
9708 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9709 #endif
9711 for (idx = 0; idx < vlen; idx++)
9713 ASM_OUTPUT_ADDR_VEC_ELT
9714 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9717 #ifdef ASM_OUTPUT_ADDR_VEC_END
9718 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9719 #endif
9722 static void
9723 sparc_output_addr_diff_vec (rtx vec)
9725 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9726 rtx base = XEXP (XEXP (body, 0), 0);
9727 int idx, vlen = XVECLEN (body, 1);
9729 #ifdef ASM_OUTPUT_ADDR_VEC_START
9730 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9731 #endif
9733 #ifdef ASM_OUTPUT_CASE_LABEL
9734 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9735 NEXT_INSN (lab));
9736 #else
9737 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9738 #endif
9740 for (idx = 0; idx < vlen; idx++)
9742 ASM_OUTPUT_ADDR_DIFF_ELT
9743 (asm_out_file,
9744 body,
9745 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9746 CODE_LABEL_NUMBER (base));
9749 #ifdef ASM_OUTPUT_ADDR_VEC_END
9750 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9751 #endif
9754 static void
9755 sparc_output_deferred_case_vectors (void)
9757 rtx t;
9758 int align;
9760 if (sparc_addr_list == NULL_RTX
9761 && sparc_addr_diff_list == NULL_RTX)
9762 return;
9764 /* Align to cache line in the function's code section. */
9765 switch_to_section (current_function_section ());
9767 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9768 if (align > 0)
9769 ASM_OUTPUT_ALIGN (asm_out_file, align);
9771 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9772 sparc_output_addr_vec (XEXP (t, 0));
9773 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9774 sparc_output_addr_diff_vec (XEXP (t, 0));
9776 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9779 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9780 unknown. Return 1 if the high bits are zero, -1 if the register is
9781 sign extended. */
9783 sparc_check_64 (rtx x, rtx_insn *insn)
9785 /* If a register is set only once it is safe to ignore insns this
9786 code does not know how to handle. The loop will either recognize
9787 the single set and return the correct value or fail to recognize
9788 it and return 0. */
9789 int set_once = 0;
9790 rtx y = x;
9792 gcc_assert (GET_CODE (x) == REG);
9794 if (GET_MODE (x) == DImode)
9795 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9797 if (flag_expensive_optimizations
9798 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9799 set_once = 1;
9801 if (insn == 0)
9803 if (set_once)
9804 insn = get_last_insn_anywhere ();
9805 else
9806 return 0;
9809 while ((insn = PREV_INSN (insn)))
9811 switch (GET_CODE (insn))
9813 case JUMP_INSN:
9814 case NOTE:
9815 break;
9816 case CODE_LABEL:
9817 case CALL_INSN:
9818 default:
9819 if (! set_once)
9820 return 0;
9821 break;
9822 case INSN:
9824 rtx pat = PATTERN (insn);
9825 if (GET_CODE (pat) != SET)
9826 return 0;
9827 if (rtx_equal_p (x, SET_DEST (pat)))
9828 return set_extends (insn);
9829 if (y && rtx_equal_p (y, SET_DEST (pat)))
9830 return set_extends (insn);
9831 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9832 return 0;
9836 return 0;
9839 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9840 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9842 const char *
9843 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
9845 static char asm_code[60];
9847 /* The scratch register is only required when the destination
9848 register is not a 64-bit global or out register. */
9849 if (which_alternative != 2)
9850 operands[3] = operands[0];
9852 /* We can only shift by constants <= 63. */
9853 if (GET_CODE (operands[2]) == CONST_INT)
9854 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9856 if (GET_CODE (operands[1]) == CONST_INT)
9858 output_asm_insn ("mov\t%1, %3", operands);
9860 else
9862 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9863 if (sparc_check_64 (operands[1], insn) <= 0)
9864 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9865 output_asm_insn ("or\t%L1, %3, %3", operands);
9868 strcpy (asm_code, opcode);
9870 if (which_alternative != 2)
9871 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9872 else
9873 return
9874 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9877 /* Output rtl to increment the profiler label LABELNO
9878 for profiling a function entry. */
9880 void
9881 sparc_profile_hook (int labelno)
9883 char buf[32];
9884 rtx lab, fun;
9886 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9887 if (NO_PROFILE_COUNTERS)
9889 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9891 else
9893 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9894 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9895 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9899 #ifdef TARGET_SOLARIS
9900 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9902 static void
9903 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9904 tree decl ATTRIBUTE_UNUSED)
9906 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9908 solaris_elf_asm_comdat_section (name, flags, decl);
9909 return;
9912 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9914 if (!(flags & SECTION_DEBUG))
9915 fputs (",#alloc", asm_out_file);
9916 if (flags & SECTION_WRITE)
9917 fputs (",#write", asm_out_file);
9918 if (flags & SECTION_TLS)
9919 fputs (",#tls", asm_out_file);
9920 if (flags & SECTION_CODE)
9921 fputs (",#execinstr", asm_out_file);
9923 /* Sun as only supports #nobits/#progbits since Solaris 10. */
9924 if (HAVE_AS_SPARC_NOBITS)
9926 if (flags & SECTION_BSS)
9927 fputs (",#nobits", asm_out_file);
9928 else
9929 fputs (",#progbits", asm_out_file);
9932 fputc ('\n', asm_out_file);
9934 #endif /* TARGET_SOLARIS */
9936 /* We do not allow indirect calls to be optimized into sibling calls.
9938 We cannot use sibling calls when delayed branches are disabled
9939 because they will likely require the call delay slot to be filled.
9941 Also, on SPARC 32-bit we cannot emit a sibling call when the
9942 current function returns a structure. This is because the "unimp
9943 after call" convention would cause the callee to return to the
9944 wrong place. The generic code already disallows cases where the
9945 function being called returns a structure.
9947 It may seem strange how this last case could occur. Usually there
9948 is code after the call which jumps to epilogue code which dumps the
9949 return value into the struct return area. That ought to invalidate
9950 the sibling call right? Well, in the C++ case we can end up passing
9951 the pointer to the struct return area to a constructor (which returns
9952 void) and then nothing else happens. Such a sibling call would look
9953 valid without the added check here.
9955 VxWorks PIC PLT entries require the global pointer to be initialized
9956 on entry. We therefore can't emit sibling calls to them. */
9957 static bool
9958 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9960 return (decl
9961 && flag_delayed_branch
9962 && (TARGET_ARCH64 || ! cfun->returns_struct)
9963 && !(TARGET_VXWORKS_RTP
9964 && flag_pic
9965 && !targetm.binds_local_p (decl)));
9968 /* libfunc renaming. */
9970 static void
9971 sparc_init_libfuncs (void)
9973 if (TARGET_ARCH32)
9975 /* Use the subroutines that Sun's library provides for integer
9976 multiply and divide. The `*' prevents an underscore from
9977 being prepended by the compiler. .umul is a little faster
9978 than .mul. */
9979 set_optab_libfunc (smul_optab, SImode, "*.umul");
9980 set_optab_libfunc (sdiv_optab, SImode, "*.div");
9981 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
9982 set_optab_libfunc (smod_optab, SImode, "*.rem");
9983 set_optab_libfunc (umod_optab, SImode, "*.urem");
9985 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
9986 set_optab_libfunc (add_optab, TFmode, "_Q_add");
9987 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
9988 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
9989 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
9990 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
9992 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
9993 is because with soft-float, the SFmode and DFmode sqrt
9994 instructions will be absent, and the compiler will notice and
9995 try to use the TFmode sqrt instruction for calls to the
9996 builtin function sqrt, but this fails. */
9997 if (TARGET_FPU)
9998 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10000 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10001 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10002 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10003 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10004 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10005 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10007 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10008 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10009 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10010 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10012 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10013 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10014 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10015 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10017 if (DITF_CONVERSION_LIBFUNCS)
10019 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10020 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10021 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10022 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10025 if (SUN_CONVERSION_LIBFUNCS)
10027 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10028 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10029 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10030 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10033 if (TARGET_ARCH64)
10035 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10036 do not exist in the library. Make sure the compiler does not
10037 emit calls to them by accident. (It should always use the
10038 hardware instructions.) */
10039 set_optab_libfunc (smul_optab, SImode, 0);
10040 set_optab_libfunc (sdiv_optab, SImode, 0);
10041 set_optab_libfunc (udiv_optab, SImode, 0);
10042 set_optab_libfunc (smod_optab, SImode, 0);
10043 set_optab_libfunc (umod_optab, SImode, 0);
10045 if (SUN_INTEGER_MULTIPLY_64)
10047 set_optab_libfunc (smul_optab, DImode, "__mul64");
10048 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10049 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10050 set_optab_libfunc (smod_optab, DImode, "__rem64");
10051 set_optab_libfunc (umod_optab, DImode, "__urem64");
10054 if (SUN_CONVERSION_LIBFUNCS)
10056 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10057 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10058 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10059 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10064 /* SPARC builtins. */
10065 enum sparc_builtins
10067 /* FPU builtins. */
10068 SPARC_BUILTIN_LDFSR,
10069 SPARC_BUILTIN_STFSR,
10071 /* VIS 1.0 builtins. */
10072 SPARC_BUILTIN_FPACK16,
10073 SPARC_BUILTIN_FPACK32,
10074 SPARC_BUILTIN_FPACKFIX,
10075 SPARC_BUILTIN_FEXPAND,
10076 SPARC_BUILTIN_FPMERGE,
10077 SPARC_BUILTIN_FMUL8X16,
10078 SPARC_BUILTIN_FMUL8X16AU,
10079 SPARC_BUILTIN_FMUL8X16AL,
10080 SPARC_BUILTIN_FMUL8SUX16,
10081 SPARC_BUILTIN_FMUL8ULX16,
10082 SPARC_BUILTIN_FMULD8SUX16,
10083 SPARC_BUILTIN_FMULD8ULX16,
10084 SPARC_BUILTIN_FALIGNDATAV4HI,
10085 SPARC_BUILTIN_FALIGNDATAV8QI,
10086 SPARC_BUILTIN_FALIGNDATAV2SI,
10087 SPARC_BUILTIN_FALIGNDATADI,
10088 SPARC_BUILTIN_WRGSR,
10089 SPARC_BUILTIN_RDGSR,
10090 SPARC_BUILTIN_ALIGNADDR,
10091 SPARC_BUILTIN_ALIGNADDRL,
10092 SPARC_BUILTIN_PDIST,
10093 SPARC_BUILTIN_EDGE8,
10094 SPARC_BUILTIN_EDGE8L,
10095 SPARC_BUILTIN_EDGE16,
10096 SPARC_BUILTIN_EDGE16L,
10097 SPARC_BUILTIN_EDGE32,
10098 SPARC_BUILTIN_EDGE32L,
10099 SPARC_BUILTIN_FCMPLE16,
10100 SPARC_BUILTIN_FCMPLE32,
10101 SPARC_BUILTIN_FCMPNE16,
10102 SPARC_BUILTIN_FCMPNE32,
10103 SPARC_BUILTIN_FCMPGT16,
10104 SPARC_BUILTIN_FCMPGT32,
10105 SPARC_BUILTIN_FCMPEQ16,
10106 SPARC_BUILTIN_FCMPEQ32,
10107 SPARC_BUILTIN_FPADD16,
10108 SPARC_BUILTIN_FPADD16S,
10109 SPARC_BUILTIN_FPADD32,
10110 SPARC_BUILTIN_FPADD32S,
10111 SPARC_BUILTIN_FPSUB16,
10112 SPARC_BUILTIN_FPSUB16S,
10113 SPARC_BUILTIN_FPSUB32,
10114 SPARC_BUILTIN_FPSUB32S,
10115 SPARC_BUILTIN_ARRAY8,
10116 SPARC_BUILTIN_ARRAY16,
10117 SPARC_BUILTIN_ARRAY32,
10119 /* VIS 2.0 builtins. */
10120 SPARC_BUILTIN_EDGE8N,
10121 SPARC_BUILTIN_EDGE8LN,
10122 SPARC_BUILTIN_EDGE16N,
10123 SPARC_BUILTIN_EDGE16LN,
10124 SPARC_BUILTIN_EDGE32N,
10125 SPARC_BUILTIN_EDGE32LN,
10126 SPARC_BUILTIN_BMASK,
10127 SPARC_BUILTIN_BSHUFFLEV4HI,
10128 SPARC_BUILTIN_BSHUFFLEV8QI,
10129 SPARC_BUILTIN_BSHUFFLEV2SI,
10130 SPARC_BUILTIN_BSHUFFLEDI,
10132 /* VIS 3.0 builtins. */
10133 SPARC_BUILTIN_CMASK8,
10134 SPARC_BUILTIN_CMASK16,
10135 SPARC_BUILTIN_CMASK32,
10136 SPARC_BUILTIN_FCHKSM16,
10137 SPARC_BUILTIN_FSLL16,
10138 SPARC_BUILTIN_FSLAS16,
10139 SPARC_BUILTIN_FSRL16,
10140 SPARC_BUILTIN_FSRA16,
10141 SPARC_BUILTIN_FSLL32,
10142 SPARC_BUILTIN_FSLAS32,
10143 SPARC_BUILTIN_FSRL32,
10144 SPARC_BUILTIN_FSRA32,
10145 SPARC_BUILTIN_PDISTN,
10146 SPARC_BUILTIN_FMEAN16,
10147 SPARC_BUILTIN_FPADD64,
10148 SPARC_BUILTIN_FPSUB64,
10149 SPARC_BUILTIN_FPADDS16,
10150 SPARC_BUILTIN_FPADDS16S,
10151 SPARC_BUILTIN_FPSUBS16,
10152 SPARC_BUILTIN_FPSUBS16S,
10153 SPARC_BUILTIN_FPADDS32,
10154 SPARC_BUILTIN_FPADDS32S,
10155 SPARC_BUILTIN_FPSUBS32,
10156 SPARC_BUILTIN_FPSUBS32S,
10157 SPARC_BUILTIN_FUCMPLE8,
10158 SPARC_BUILTIN_FUCMPNE8,
10159 SPARC_BUILTIN_FUCMPGT8,
10160 SPARC_BUILTIN_FUCMPEQ8,
10161 SPARC_BUILTIN_FHADDS,
10162 SPARC_BUILTIN_FHADDD,
10163 SPARC_BUILTIN_FHSUBS,
10164 SPARC_BUILTIN_FHSUBD,
10165 SPARC_BUILTIN_FNHADDS,
10166 SPARC_BUILTIN_FNHADDD,
10167 SPARC_BUILTIN_UMULXHI,
10168 SPARC_BUILTIN_XMULX,
10169 SPARC_BUILTIN_XMULXHI,
10171 SPARC_BUILTIN_MAX
10174 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10175 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10177 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10178 function decl or NULL_TREE if the builtin was not added. */
10180 static tree
10181 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10182 tree type)
10184 tree t
10185 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10187 if (t)
10189 sparc_builtins[code] = t;
10190 sparc_builtins_icode[code] = icode;
10193 return t;
10196 /* Likewise, but also marks the function as "const". */
10198 static tree
10199 def_builtin_const (const char *name, enum insn_code icode,
10200 enum sparc_builtins code, tree type)
10202 tree t = def_builtin (name, icode, code, type);
10204 if (t)
10205 TREE_READONLY (t) = 1;
10207 return t;
10210 /* Implement the TARGET_INIT_BUILTINS target hook.
10211 Create builtin functions for special SPARC instructions. */
10213 static void
10214 sparc_init_builtins (void)
10216 if (TARGET_FPU)
10217 sparc_fpu_init_builtins ();
10219 if (TARGET_VIS)
10220 sparc_vis_init_builtins ();
10223 /* Create builtin functions for FPU instructions. */
10225 static void
10226 sparc_fpu_init_builtins (void)
10228 tree ftype
10229 = build_function_type_list (void_type_node,
10230 build_pointer_type (unsigned_type_node), 0);
10231 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10232 SPARC_BUILTIN_LDFSR, ftype);
10233 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10234 SPARC_BUILTIN_STFSR, ftype);
10237 /* Create builtin functions for VIS instructions. */
10239 static void
10240 sparc_vis_init_builtins (void)
10242 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10243 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10244 tree v4hi = build_vector_type (intHI_type_node, 4);
10245 tree v2hi = build_vector_type (intHI_type_node, 2);
10246 tree v2si = build_vector_type (intSI_type_node, 2);
10247 tree v1si = build_vector_type (intSI_type_node, 1);
10249 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10250 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10251 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10252 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10253 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10254 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10255 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10256 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10257 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10258 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10259 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10260 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10261 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10262 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10263 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10264 v8qi, v8qi,
10265 intDI_type_node, 0);
10266 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10267 v8qi, v8qi, 0);
10268 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10269 v8qi, v8qi, 0);
10270 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10271 intDI_type_node,
10272 intDI_type_node, 0);
10273 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10274 intSI_type_node,
10275 intSI_type_node, 0);
10276 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10277 ptr_type_node,
10278 intSI_type_node, 0);
10279 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10280 ptr_type_node,
10281 intDI_type_node, 0);
10282 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10283 ptr_type_node,
10284 ptr_type_node, 0);
10285 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10286 ptr_type_node,
10287 ptr_type_node, 0);
10288 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10289 v4hi, v4hi, 0);
10290 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10291 v2si, v2si, 0);
10292 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10293 v4hi, v4hi, 0);
10294 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10295 v2si, v2si, 0);
10296 tree void_ftype_di = build_function_type_list (void_type_node,
10297 intDI_type_node, 0);
10298 tree di_ftype_void = build_function_type_list (intDI_type_node,
10299 void_type_node, 0);
10300 tree void_ftype_si = build_function_type_list (void_type_node,
10301 intSI_type_node, 0);
10302 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10303 float_type_node,
10304 float_type_node, 0);
10305 tree df_ftype_df_df = build_function_type_list (double_type_node,
10306 double_type_node,
10307 double_type_node, 0);
10309 /* Packing and expanding vectors. */
10310 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10311 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10312 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10313 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10314 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10315 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10316 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10317 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10318 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10319 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10321 /* Multiplications. */
10322 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10323 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10324 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10325 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10326 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10327 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10328 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10329 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10330 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10331 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10332 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10333 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10334 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10335 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10337 /* Data aligning. */
10338 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10339 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10340 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10341 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10342 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10343 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10344 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10345 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10347 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10348 SPARC_BUILTIN_WRGSR, void_ftype_di);
10349 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10350 SPARC_BUILTIN_RDGSR, di_ftype_void);
10352 if (TARGET_ARCH64)
10354 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10355 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10356 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10357 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10359 else
10361 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10362 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10363 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10364 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10367 /* Pixel distance. */
10368 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10369 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10371 /* Edge handling. */
10372 if (TARGET_ARCH64)
10374 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10375 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10376 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10377 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10378 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10379 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10380 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10381 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10382 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10383 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10384 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10385 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10387 else
10389 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10390 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10391 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10392 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10393 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10394 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10395 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10396 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10397 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10398 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10399 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10400 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10403 /* Pixel compare. */
10404 if (TARGET_ARCH64)
10406 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10407 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10408 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10409 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10410 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10411 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10412 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10413 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10414 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10415 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10416 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10417 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10418 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10419 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10420 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10421 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10423 else
10425 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10426 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10427 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10428 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10429 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10430 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10431 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10432 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10433 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10434 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10435 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10436 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10437 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10438 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10439 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10440 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10443 /* Addition and subtraction. */
10444 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10445 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10446 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10447 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10448 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10449 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10450 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10451 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10452 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10453 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10454 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10455 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10456 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10457 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10458 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10459 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10461 /* Three-dimensional array addressing. */
10462 if (TARGET_ARCH64)
10464 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10465 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10466 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10467 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10468 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10469 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10471 else
10473 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10474 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10475 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10476 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10477 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10478 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10481 if (TARGET_VIS2)
10483 /* Edge handling. */
10484 if (TARGET_ARCH64)
10486 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10487 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10488 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10489 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10490 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10491 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10492 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10493 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10494 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10495 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10496 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10497 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10499 else
10501 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10502 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10503 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10504 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10505 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10506 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10507 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10508 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10509 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10510 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10511 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10512 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10515 /* Byte mask and shuffle. */
10516 if (TARGET_ARCH64)
10517 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10518 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10519 else
10520 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10521 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10522 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10523 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10524 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10525 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10526 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10527 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10528 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10529 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10532 if (TARGET_VIS3)
10534 if (TARGET_ARCH64)
10536 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10537 SPARC_BUILTIN_CMASK8, void_ftype_di);
10538 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10539 SPARC_BUILTIN_CMASK16, void_ftype_di);
10540 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10541 SPARC_BUILTIN_CMASK32, void_ftype_di);
10543 else
10545 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10546 SPARC_BUILTIN_CMASK8, void_ftype_si);
10547 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10548 SPARC_BUILTIN_CMASK16, void_ftype_si);
10549 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10550 SPARC_BUILTIN_CMASK32, void_ftype_si);
10553 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10554 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
10556 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10557 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
10558 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10559 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
10560 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10561 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
10562 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10563 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
10564 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10565 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
10566 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10567 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
10568 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10569 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
10570 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10571 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
10573 if (TARGET_ARCH64)
10574 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10575 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
10576 else
10577 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10578 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
10580 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10581 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
10582 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10583 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
10584 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10585 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
10587 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10588 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
10589 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10590 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
10591 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10592 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
10593 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10594 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
10595 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10596 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
10597 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10598 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
10599 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10600 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
10601 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10602 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
10604 if (TARGET_ARCH64)
10606 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10607 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
10608 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10609 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
10610 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10611 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
10612 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10613 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
10615 else
10617 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10618 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
10619 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10620 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
10621 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10622 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
10623 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10624 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
10627 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10628 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
10629 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10630 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
10631 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10632 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
10633 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10634 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
10635 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10636 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
10637 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10638 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
10640 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10641 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
10642 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10643 SPARC_BUILTIN_XMULX, di_ftype_di_di);
10644 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10645 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
10649 /* Implement TARGET_BUILTIN_DECL hook. */
10651 static tree
10652 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10654 if (code >= SPARC_BUILTIN_MAX)
10655 return error_mark_node;
10657 return sparc_builtins[code];
10660 /* Implemented TARGET_EXPAND_BUILTIN hook. */
10662 static rtx
10663 sparc_expand_builtin (tree exp, rtx target,
10664 rtx subtarget ATTRIBUTE_UNUSED,
10665 enum machine_mode tmode ATTRIBUTE_UNUSED,
10666 int ignore ATTRIBUTE_UNUSED)
10668 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10669 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10670 enum insn_code icode = sparc_builtins_icode[code];
10671 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10672 call_expr_arg_iterator iter;
10673 int arg_count = 0;
10674 rtx pat, op[4];
10675 tree arg;
10677 if (nonvoid)
10679 enum machine_mode tmode = insn_data[icode].operand[0].mode;
10680 if (!target
10681 || GET_MODE (target) != tmode
10682 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10683 op[0] = gen_reg_rtx (tmode);
10684 else
10685 op[0] = target;
10688 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10690 const struct insn_operand_data *insn_op;
10691 int idx;
10693 if (arg == error_mark_node)
10694 return NULL_RTX;
10696 arg_count++;
10697 idx = arg_count - !nonvoid;
10698 insn_op = &insn_data[icode].operand[idx];
10699 op[arg_count] = expand_normal (arg);
10701 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
10703 if (!address_operand (op[arg_count], SImode))
10705 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
10706 op[arg_count] = copy_addr_to_reg (op[arg_count]);
10708 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
10711 else if (insn_op->mode == V1DImode
10712 && GET_MODE (op[arg_count]) == DImode)
10713 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10715 else if (insn_op->mode == V1SImode
10716 && GET_MODE (op[arg_count]) == SImode)
10717 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10719 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10720 insn_op->mode))
10721 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10724 switch (arg_count)
10726 case 0:
10727 pat = GEN_FCN (icode) (op[0]);
10728 break;
10729 case 1:
10730 if (nonvoid)
10731 pat = GEN_FCN (icode) (op[0], op[1]);
10732 else
10733 pat = GEN_FCN (icode) (op[1]);
10734 break;
10735 case 2:
10736 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10737 break;
10738 case 3:
10739 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10740 break;
10741 default:
10742 gcc_unreachable ();
10745 if (!pat)
10746 return NULL_RTX;
10748 emit_insn (pat);
10750 return (nonvoid ? op[0] : const0_rtx);
10753 /* Return the upper 16 bits of the 8x16 multiplication. */
10755 static int
10756 sparc_vis_mul8x16 (int e8, int e16)
10758 return (e8 * e16 + 128) / 256;
10761 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10762 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10764 static void
10765 sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode,
10766 tree inner_type, tree cst0, tree cst1)
10768 unsigned i, num = VECTOR_CST_NELTS (cst0);
10769 int scale;
10771 switch (fncode)
10773 case SPARC_BUILTIN_FMUL8X16:
10774 for (i = 0; i < num; ++i)
10776 int val
10777 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10778 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10779 n_elts[i] = build_int_cst (inner_type, val);
10781 break;
10783 case SPARC_BUILTIN_FMUL8X16AU:
10784 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10786 for (i = 0; i < num; ++i)
10788 int val
10789 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10790 scale);
10791 n_elts[i] = build_int_cst (inner_type, val);
10793 break;
10795 case SPARC_BUILTIN_FMUL8X16AL:
10796 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10798 for (i = 0; i < num; ++i)
10800 int val
10801 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10802 scale);
10803 n_elts[i] = build_int_cst (inner_type, val);
10805 break;
10807 default:
10808 gcc_unreachable ();
10812 /* Implement TARGET_FOLD_BUILTIN hook.
10814 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
10815 result of the function call is ignored. NULL_TREE is returned if the
10816 function could not be folded. */
10818 static tree
10819 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10820 tree *args, bool ignore)
10822 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10823 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10824 tree arg0, arg1, arg2;
10826 if (ignore)
10827 switch (code)
10829 case SPARC_BUILTIN_LDFSR:
10830 case SPARC_BUILTIN_STFSR:
10831 case SPARC_BUILTIN_ALIGNADDR:
10832 case SPARC_BUILTIN_WRGSR:
10833 case SPARC_BUILTIN_BMASK:
10834 case SPARC_BUILTIN_CMASK8:
10835 case SPARC_BUILTIN_CMASK16:
10836 case SPARC_BUILTIN_CMASK32:
10837 break;
10839 default:
10840 return build_zero_cst (rtype);
10843 switch (code)
10845 case SPARC_BUILTIN_FEXPAND:
10846 arg0 = args[0];
10847 STRIP_NOPS (arg0);
10849 if (TREE_CODE (arg0) == VECTOR_CST)
10851 tree inner_type = TREE_TYPE (rtype);
10852 tree *n_elts;
10853 unsigned i;
10855 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10856 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10857 n_elts[i] = build_int_cst (inner_type,
10858 TREE_INT_CST_LOW
10859 (VECTOR_CST_ELT (arg0, i)) << 4);
10860 return build_vector (rtype, n_elts);
10862 break;
10864 case SPARC_BUILTIN_FMUL8X16:
10865 case SPARC_BUILTIN_FMUL8X16AU:
10866 case SPARC_BUILTIN_FMUL8X16AL:
10867 arg0 = args[0];
10868 arg1 = args[1];
10869 STRIP_NOPS (arg0);
10870 STRIP_NOPS (arg1);
10872 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10874 tree inner_type = TREE_TYPE (rtype);
10875 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10876 sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1);
10877 return build_vector (rtype, n_elts);
10879 break;
10881 case SPARC_BUILTIN_FPMERGE:
10882 arg0 = args[0];
10883 arg1 = args[1];
10884 STRIP_NOPS (arg0);
10885 STRIP_NOPS (arg1);
10887 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10889 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
10890 unsigned i;
10891 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10893 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
10894 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
10897 return build_vector (rtype, n_elts);
10899 break;
10901 case SPARC_BUILTIN_PDIST:
10902 case SPARC_BUILTIN_PDISTN:
10903 arg0 = args[0];
10904 arg1 = args[1];
10905 STRIP_NOPS (arg0);
10906 STRIP_NOPS (arg1);
10907 if (code == SPARC_BUILTIN_PDIST)
10909 arg2 = args[2];
10910 STRIP_NOPS (arg2);
10912 else
10913 arg2 = integer_zero_node;
10915 if (TREE_CODE (arg0) == VECTOR_CST
10916 && TREE_CODE (arg1) == VECTOR_CST
10917 && TREE_CODE (arg2) == INTEGER_CST)
10919 bool overflow = false;
10920 widest_int result = wi::to_widest (arg2);
10921 widest_int tmp;
10922 unsigned i;
10924 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10926 tree e0 = VECTOR_CST_ELT (arg0, i);
10927 tree e1 = VECTOR_CST_ELT (arg1, i);
10929 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
10931 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
10932 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
10933 if (wi::neg_p (tmp))
10934 tmp = wi::neg (tmp, &neg2_ovf);
10935 else
10936 neg2_ovf = false;
10937 result = wi::add (result, tmp, SIGNED, &add2_ovf);
10938 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
10941 gcc_assert (!overflow);
10943 return wide_int_to_tree (rtype, result);
10946 default:
10947 break;
10950 return NULL_TREE;
10953 /* ??? This duplicates information provided to the compiler by the
10954 ??? scheduler description. Some day, teach genautomata to output
10955 ??? the latencies and then CSE will just use that. */
10957 static bool
10958 sparc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10959 int *total, bool speed ATTRIBUTE_UNUSED)
10961 enum machine_mode mode = GET_MODE (x);
10962 bool float_mode_p = FLOAT_MODE_P (mode);
10964 switch (code)
10966 case CONST_INT:
10967 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
10969 *total = 0;
10970 return true;
10972 /* FALLTHRU */
10974 case HIGH:
10975 *total = 2;
10976 return true;
10978 case CONST:
10979 case LABEL_REF:
10980 case SYMBOL_REF:
10981 *total = 4;
10982 return true;
10984 case CONST_DOUBLE:
10985 if (GET_MODE (x) == VOIDmode
10986 && ((CONST_DOUBLE_HIGH (x) == 0
10987 && CONST_DOUBLE_LOW (x) < 0x1000)
10988 || (CONST_DOUBLE_HIGH (x) == -1
10989 && CONST_DOUBLE_LOW (x) < 0
10990 && CONST_DOUBLE_LOW (x) >= -0x1000)))
10991 *total = 0;
10992 else
10993 *total = 8;
10994 return true;
10996 case MEM:
10997 /* If outer-code was a sign or zero extension, a cost
10998 of COSTS_N_INSNS (1) was already added in. This is
10999 why we are subtracting it back out. */
11000 if (outer_code == ZERO_EXTEND)
11002 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11004 else if (outer_code == SIGN_EXTEND)
11006 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11008 else if (float_mode_p)
11010 *total = sparc_costs->float_load;
11012 else
11014 *total = sparc_costs->int_load;
11017 return true;
11019 case PLUS:
11020 case MINUS:
11021 if (float_mode_p)
11022 *total = sparc_costs->float_plusminus;
11023 else
11024 *total = COSTS_N_INSNS (1);
11025 return false;
11027 case FMA:
11029 rtx sub;
11031 gcc_assert (float_mode_p);
11032 *total = sparc_costs->float_mul;
11034 sub = XEXP (x, 0);
11035 if (GET_CODE (sub) == NEG)
11036 sub = XEXP (sub, 0);
11037 *total += rtx_cost (sub, FMA, 0, speed);
11039 sub = XEXP (x, 2);
11040 if (GET_CODE (sub) == NEG)
11041 sub = XEXP (sub, 0);
11042 *total += rtx_cost (sub, FMA, 2, speed);
11043 return true;
11046 case MULT:
11047 if (float_mode_p)
11048 *total = sparc_costs->float_mul;
11049 else if (! TARGET_HARD_MUL)
11050 *total = COSTS_N_INSNS (25);
11051 else
11053 int bit_cost;
11055 bit_cost = 0;
11056 if (sparc_costs->int_mul_bit_factor)
11058 int nbits;
11060 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11062 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11063 for (nbits = 0; value != 0; value &= value - 1)
11064 nbits++;
11066 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
11067 && GET_MODE (XEXP (x, 1)) == VOIDmode)
11069 rtx x1 = XEXP (x, 1);
11070 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
11071 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
11073 for (nbits = 0; value1 != 0; value1 &= value1 - 1)
11074 nbits++;
11075 for (; value2 != 0; value2 &= value2 - 1)
11076 nbits++;
11078 else
11079 nbits = 7;
11081 if (nbits < 3)
11082 nbits = 3;
11083 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11084 bit_cost = COSTS_N_INSNS (bit_cost);
11087 if (mode == DImode)
11088 *total = sparc_costs->int_mulX + bit_cost;
11089 else
11090 *total = sparc_costs->int_mul + bit_cost;
11092 return false;
11094 case ASHIFT:
11095 case ASHIFTRT:
11096 case LSHIFTRT:
11097 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11098 return false;
11100 case DIV:
11101 case UDIV:
11102 case MOD:
11103 case UMOD:
11104 if (float_mode_p)
11106 if (mode == DFmode)
11107 *total = sparc_costs->float_div_df;
11108 else
11109 *total = sparc_costs->float_div_sf;
11111 else
11113 if (mode == DImode)
11114 *total = sparc_costs->int_divX;
11115 else
11116 *total = sparc_costs->int_div;
11118 return false;
11120 case NEG:
11121 if (! float_mode_p)
11123 *total = COSTS_N_INSNS (1);
11124 return false;
11126 /* FALLTHRU */
11128 case ABS:
11129 case FLOAT:
11130 case UNSIGNED_FLOAT:
11131 case FIX:
11132 case UNSIGNED_FIX:
11133 case FLOAT_EXTEND:
11134 case FLOAT_TRUNCATE:
11135 *total = sparc_costs->float_move;
11136 return false;
11138 case SQRT:
11139 if (mode == DFmode)
11140 *total = sparc_costs->float_sqrt_df;
11141 else
11142 *total = sparc_costs->float_sqrt_sf;
11143 return false;
11145 case COMPARE:
11146 if (float_mode_p)
11147 *total = sparc_costs->float_cmp;
11148 else
11149 *total = COSTS_N_INSNS (1);
11150 return false;
11152 case IF_THEN_ELSE:
11153 if (float_mode_p)
11154 *total = sparc_costs->float_cmove;
11155 else
11156 *total = sparc_costs->int_cmove;
11157 return false;
11159 case IOR:
11160 /* Handle the NAND vector patterns. */
11161 if (sparc_vector_mode_supported_p (GET_MODE (x))
11162 && GET_CODE (XEXP (x, 0)) == NOT
11163 && GET_CODE (XEXP (x, 1)) == NOT)
11165 *total = COSTS_N_INSNS (1);
11166 return true;
11168 else
11169 return false;
11171 default:
11172 return false;
11176 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11178 static inline bool
11179 general_or_i64_p (reg_class_t rclass)
11181 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11184 /* Implement TARGET_REGISTER_MOVE_COST. */
11186 static int
11187 sparc_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
11188 reg_class_t from, reg_class_t to)
11190 bool need_memory = false;
11192 if (from == FPCC_REGS || to == FPCC_REGS)
11193 need_memory = true;
11194 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11195 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11197 if (TARGET_VIS3)
11199 int size = GET_MODE_SIZE (mode);
11200 if (size == 8 || size == 4)
11202 if (! TARGET_ARCH32 || size == 4)
11203 return 4;
11204 else
11205 return 6;
11208 need_memory = true;
11211 if (need_memory)
11213 if (sparc_cpu == PROCESSOR_ULTRASPARC
11214 || sparc_cpu == PROCESSOR_ULTRASPARC3
11215 || sparc_cpu == PROCESSOR_NIAGARA
11216 || sparc_cpu == PROCESSOR_NIAGARA2
11217 || sparc_cpu == PROCESSOR_NIAGARA3
11218 || sparc_cpu == PROCESSOR_NIAGARA4)
11219 return 12;
11221 return 6;
11224 return 2;
11227 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11228 This is achieved by means of a manual dynamic stack space allocation in
11229 the current frame. We make the assumption that SEQ doesn't contain any
11230 function calls, with the possible exception of calls to the GOT helper. */
11232 static void
11233 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11235 /* We must preserve the lowest 16 words for the register save area. */
11236 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11237 /* We really need only 2 words of fresh stack space. */
11238 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11240 rtx slot
11241 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11242 SPARC_STACK_BIAS + offset));
11244 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11245 emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
11246 if (reg2)
11247 emit_insn (gen_rtx_SET (VOIDmode,
11248 adjust_address (slot, word_mode, UNITS_PER_WORD),
11249 reg2));
11250 emit_insn (seq);
11251 if (reg2)
11252 emit_insn (gen_rtx_SET (VOIDmode,
11253 reg2,
11254 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11255 emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
11256 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11259 /* Output the assembler code for a thunk function. THUNK_DECL is the
11260 declaration for the thunk function itself, FUNCTION is the decl for
11261 the target function. DELTA is an immediate constant offset to be
11262 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11263 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11265 static void
11266 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11267 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11268 tree function)
11270 rtx this_rtx, funexp;
11271 rtx_insn *insn;
11272 unsigned int int_arg_first;
11274 reload_completed = 1;
11275 epilogue_completed = 1;
11277 emit_note (NOTE_INSN_PROLOGUE_END);
11279 if (TARGET_FLAT)
11281 sparc_leaf_function_p = 1;
11283 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11285 else if (flag_delayed_branch)
11287 /* We will emit a regular sibcall below, so we need to instruct
11288 output_sibcall that we are in a leaf function. */
11289 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11291 /* This will cause final.c to invoke leaf_renumber_regs so we
11292 must behave as if we were in a not-yet-leafified function. */
11293 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11295 else
11297 /* We will emit the sibcall manually below, so we will need to
11298 manually spill non-leaf registers. */
11299 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11301 /* We really are in a leaf function. */
11302 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11305 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11306 returns a structure, the structure return pointer is there instead. */
11307 if (TARGET_ARCH64
11308 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11309 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11310 else
11311 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11313 /* Add DELTA. When possible use a plain add, otherwise load it into
11314 a register first. */
11315 if (delta)
11317 rtx delta_rtx = GEN_INT (delta);
11319 if (! SPARC_SIMM13_P (delta))
11321 rtx scratch = gen_rtx_REG (Pmode, 1);
11322 emit_move_insn (scratch, delta_rtx);
11323 delta_rtx = scratch;
11326 /* THIS_RTX += DELTA. */
11327 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11330 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
11331 if (vcall_offset)
11333 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11334 rtx scratch = gen_rtx_REG (Pmode, 1);
11336 gcc_assert (vcall_offset < 0);
11338 /* SCRATCH = *THIS_RTX. */
11339 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11341 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11342 may not have any available scratch register at this point. */
11343 if (SPARC_SIMM13_P (vcall_offset))
11345 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11346 else if (! fixed_regs[5]
11347 /* The below sequence is made up of at least 2 insns,
11348 while the default method may need only one. */
11349 && vcall_offset < -8192)
11351 rtx scratch2 = gen_rtx_REG (Pmode, 5);
11352 emit_move_insn (scratch2, vcall_offset_rtx);
11353 vcall_offset_rtx = scratch2;
11355 else
11357 rtx increment = GEN_INT (-4096);
11359 /* VCALL_OFFSET is a negative number whose typical range can be
11360 estimated as -32768..0 in 32-bit mode. In almost all cases
11361 it is therefore cheaper to emit multiple add insns than
11362 spilling and loading the constant into a register (at least
11363 6 insns). */
11364 while (! SPARC_SIMM13_P (vcall_offset))
11366 emit_insn (gen_add2_insn (scratch, increment));
11367 vcall_offset += 4096;
11369 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11372 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
11373 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11374 gen_rtx_PLUS (Pmode,
11375 scratch,
11376 vcall_offset_rtx)));
11378 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
11379 emit_insn (gen_add2_insn (this_rtx, scratch));
11382 /* Generate a tail call to the target function. */
11383 if (! TREE_USED (function))
11385 assemble_external (function);
11386 TREE_USED (function) = 1;
11388 funexp = XEXP (DECL_RTL (function), 0);
11390 if (flag_delayed_branch)
11392 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11393 insn = emit_call_insn (gen_sibcall (funexp));
11394 SIBLING_CALL_P (insn) = 1;
11396 else
11398 /* The hoops we have to jump through in order to generate a sibcall
11399 without using delay slots... */
11400 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11402 if (flag_pic)
11404 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
11405 start_sequence ();
11406 load_got_register (); /* clobbers %o7 */
11407 scratch = sparc_legitimize_pic_address (funexp, scratch);
11408 seq = get_insns ();
11409 end_sequence ();
11410 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11412 else if (TARGET_ARCH32)
11414 emit_insn (gen_rtx_SET (VOIDmode,
11415 scratch,
11416 gen_rtx_HIGH (SImode, funexp)));
11417 emit_insn (gen_rtx_SET (VOIDmode,
11418 scratch,
11419 gen_rtx_LO_SUM (SImode, scratch, funexp)));
11421 else /* TARGET_ARCH64 */
11423 switch (sparc_cmodel)
11425 case CM_MEDLOW:
11426 case CM_MEDMID:
11427 /* The destination can serve as a temporary. */
11428 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11429 break;
11431 case CM_MEDANY:
11432 case CM_EMBMEDANY:
11433 /* The destination cannot serve as a temporary. */
11434 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11435 start_sequence ();
11436 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11437 seq = get_insns ();
11438 end_sequence ();
11439 emit_and_preserve (seq, spill_reg, 0);
11440 break;
11442 default:
11443 gcc_unreachable ();
11447 emit_jump_insn (gen_indirect_jump (scratch));
11450 emit_barrier ();
11452 /* Run just enough of rest_of_compilation to get the insns emitted.
11453 There's not really enough bulk here to make other passes such as
11454 instruction scheduling worth while. Note that use_thunk calls
11455 assemble_start_function and assemble_end_function. */
11456 insn = get_insns ();
11457 shorten_branches (insn);
11458 final_start_function (insn, file, 1);
11459 final (insn, file, 1);
11460 final_end_function ();
11462 reload_completed = 0;
11463 epilogue_completed = 0;
11466 /* Return true if sparc_output_mi_thunk would be able to output the
11467 assembler code for the thunk function specified by the arguments
11468 it is passed, and false otherwise. */
11469 static bool
11470 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11471 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11472 HOST_WIDE_INT vcall_offset,
11473 const_tree function ATTRIBUTE_UNUSED)
11475 /* Bound the loop used in the default method above. */
11476 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11479 /* How to allocate a 'struct machine_function'. */
11481 static struct machine_function *
11482 sparc_init_machine_status (void)
11484 return ggc_cleared_alloc<machine_function> ();
11487 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11488 We need to emit DTP-relative relocations. */
11490 static void
11491 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11493 switch (size)
11495 case 4:
11496 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11497 break;
11498 case 8:
11499 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11500 break;
11501 default:
11502 gcc_unreachable ();
11504 output_addr_const (file, x);
11505 fputs (")", file);
11508 /* Do whatever processing is required at the end of a file. */
11510 static void
11511 sparc_file_end (void)
11513 /* If we need to emit the special GOT helper function, do so now. */
11514 if (got_helper_rtx)
11516 const char *name = XSTR (got_helper_rtx, 0);
11517 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11518 #ifdef DWARF2_UNWIND_INFO
11519 bool do_cfi;
11520 #endif
11522 if (USE_HIDDEN_LINKONCE)
11524 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11525 get_identifier (name),
11526 build_function_type_list (void_type_node,
11527 NULL_TREE));
11528 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11529 NULL_TREE, void_type_node);
11530 TREE_PUBLIC (decl) = 1;
11531 TREE_STATIC (decl) = 1;
11532 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11533 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11534 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11535 resolve_unique_section (decl, 0, flag_function_sections);
11536 allocate_struct_function (decl, true);
11537 cfun->is_thunk = 1;
11538 current_function_decl = decl;
11539 init_varasm_status ();
11540 assemble_start_function (decl, name);
11542 else
11544 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11545 switch_to_section (text_section);
11546 if (align > 0)
11547 ASM_OUTPUT_ALIGN (asm_out_file, align);
11548 ASM_OUTPUT_LABEL (asm_out_file, name);
11551 #ifdef DWARF2_UNWIND_INFO
11552 do_cfi = dwarf2out_do_cfi_asm ();
11553 if (do_cfi)
11554 fprintf (asm_out_file, "\t.cfi_startproc\n");
11555 #endif
11556 if (flag_delayed_branch)
11557 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11558 reg_name, reg_name);
11559 else
11560 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11561 reg_name, reg_name);
11562 #ifdef DWARF2_UNWIND_INFO
11563 if (do_cfi)
11564 fprintf (asm_out_file, "\t.cfi_endproc\n");
11565 #endif
11568 if (NEED_INDICATE_EXEC_STACK)
11569 file_end_indicate_exec_stack ();
11571 #ifdef TARGET_SOLARIS
11572 solaris_file_end ();
11573 #endif
11576 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11577 /* Implement TARGET_MANGLE_TYPE. */
11579 static const char *
11580 sparc_mangle_type (const_tree type)
11582 if (!TARGET_64BIT
11583 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11584 && TARGET_LONG_DOUBLE_128)
11585 return "g";
11587 /* For all other types, use normal C++ mangling. */
11588 return NULL;
11590 #endif
11592 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11593 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11594 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11596 void
11597 sparc_emit_membar_for_model (enum memmodel model,
11598 int load_store, int before_after)
11600 /* Bits for the MEMBAR mmask field. */
11601 const int LoadLoad = 1;
11602 const int StoreLoad = 2;
11603 const int LoadStore = 4;
11604 const int StoreStore = 8;
11606 int mm = 0, implied = 0;
11608 switch (sparc_memory_model)
11610 case SMM_SC:
11611 /* Sequential Consistency. All memory transactions are immediately
11612 visible in sequential execution order. No barriers needed. */
11613 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11614 break;
11616 case SMM_TSO:
11617 /* Total Store Ordering: all memory transactions with store semantics
11618 are followed by an implied StoreStore. */
11619 implied |= StoreStore;
11621 /* If we're not looking for a raw barrer (before+after), then atomic
11622 operations get the benefit of being both load and store. */
11623 if (load_store == 3 && before_after == 1)
11624 implied |= StoreLoad;
11625 /* FALLTHRU */
11627 case SMM_PSO:
11628 /* Partial Store Ordering: all memory transactions with load semantics
11629 are followed by an implied LoadLoad | LoadStore. */
11630 implied |= LoadLoad | LoadStore;
11632 /* If we're not looking for a raw barrer (before+after), then atomic
11633 operations get the benefit of being both load and store. */
11634 if (load_store == 3 && before_after == 2)
11635 implied |= StoreLoad | StoreStore;
11636 /* FALLTHRU */
11638 case SMM_RMO:
11639 /* Relaxed Memory Ordering: no implicit bits. */
11640 break;
11642 default:
11643 gcc_unreachable ();
11646 if (before_after & 1)
11648 if (model == MEMMODEL_RELEASE
11649 || model == MEMMODEL_ACQ_REL
11650 || model == MEMMODEL_SEQ_CST)
11652 if (load_store & 1)
11653 mm |= LoadLoad | StoreLoad;
11654 if (load_store & 2)
11655 mm |= LoadStore | StoreStore;
11658 if (before_after & 2)
11660 if (model == MEMMODEL_ACQUIRE
11661 || model == MEMMODEL_ACQ_REL
11662 || model == MEMMODEL_SEQ_CST)
11664 if (load_store & 1)
11665 mm |= LoadLoad | LoadStore;
11666 if (load_store & 2)
11667 mm |= StoreLoad | StoreStore;
11671 /* Remove the bits implied by the system memory model. */
11672 mm &= ~implied;
11674 /* For raw barriers (before+after), always emit a barrier.
11675 This will become a compile-time barrier if needed. */
11676 if (mm || before_after == 3)
11677 emit_insn (gen_membar (GEN_INT (mm)));
11680 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11681 compare and swap on the word containing the byte or half-word. */
11683 static void
11684 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11685 rtx oldval, rtx newval)
11687 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11688 rtx addr = gen_reg_rtx (Pmode);
11689 rtx off = gen_reg_rtx (SImode);
11690 rtx oldv = gen_reg_rtx (SImode);
11691 rtx newv = gen_reg_rtx (SImode);
11692 rtx oldvalue = gen_reg_rtx (SImode);
11693 rtx newvalue = gen_reg_rtx (SImode);
11694 rtx res = gen_reg_rtx (SImode);
11695 rtx resv = gen_reg_rtx (SImode);
11696 rtx memsi, val, mask, cc;
11698 emit_insn (gen_rtx_SET (VOIDmode, addr,
11699 gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11701 if (Pmode != SImode)
11702 addr1 = gen_lowpart (SImode, addr1);
11703 emit_insn (gen_rtx_SET (VOIDmode, off,
11704 gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11706 memsi = gen_rtx_MEM (SImode, addr);
11707 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11708 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11710 val = copy_to_reg (memsi);
11712 emit_insn (gen_rtx_SET (VOIDmode, off,
11713 gen_rtx_XOR (SImode, off,
11714 GEN_INT (GET_MODE (mem) == QImode
11715 ? 3 : 2))));
11717 emit_insn (gen_rtx_SET (VOIDmode, off,
11718 gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11720 if (GET_MODE (mem) == QImode)
11721 mask = force_reg (SImode, GEN_INT (0xff));
11722 else
11723 mask = force_reg (SImode, GEN_INT (0xffff));
11725 emit_insn (gen_rtx_SET (VOIDmode, mask,
11726 gen_rtx_ASHIFT (SImode, mask, off)));
11728 emit_insn (gen_rtx_SET (VOIDmode, val,
11729 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11730 val)));
11732 oldval = gen_lowpart (SImode, oldval);
11733 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11734 gen_rtx_ASHIFT (SImode, oldval, off)));
11736 newval = gen_lowpart_common (SImode, newval);
11737 emit_insn (gen_rtx_SET (VOIDmode, newv,
11738 gen_rtx_ASHIFT (SImode, newval, off)));
11740 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11741 gen_rtx_AND (SImode, oldv, mask)));
11743 emit_insn (gen_rtx_SET (VOIDmode, newv,
11744 gen_rtx_AND (SImode, newv, mask)));
11746 rtx_code_label *end_label = gen_label_rtx ();
11747 rtx_code_label *loop_label = gen_label_rtx ();
11748 emit_label (loop_label);
11750 emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
11751 gen_rtx_IOR (SImode, oldv, val)));
11753 emit_insn (gen_rtx_SET (VOIDmode, newvalue,
11754 gen_rtx_IOR (SImode, newv, val)));
11756 emit_move_insn (bool_result, const1_rtx);
11758 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11760 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11762 emit_insn (gen_rtx_SET (VOIDmode, resv,
11763 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11764 res)));
11766 emit_move_insn (bool_result, const0_rtx);
11768 cc = gen_compare_reg_1 (NE, resv, val);
11769 emit_insn (gen_rtx_SET (VOIDmode, val, resv));
11771 /* Use cbranchcc4 to separate the compare and branch! */
11772 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11773 cc, const0_rtx, loop_label));
11775 emit_label (end_label);
11777 emit_insn (gen_rtx_SET (VOIDmode, res,
11778 gen_rtx_AND (SImode, res, mask)));
11780 emit_insn (gen_rtx_SET (VOIDmode, res,
11781 gen_rtx_LSHIFTRT (SImode, res, off)));
11783 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11786 /* Expand code to perform a compare-and-swap. */
11788 void
11789 sparc_expand_compare_and_swap (rtx operands[])
11791 rtx bval, retval, mem, oldval, newval;
11792 enum machine_mode mode;
11793 enum memmodel model;
11795 bval = operands[0];
11796 retval = operands[1];
11797 mem = operands[2];
11798 oldval = operands[3];
11799 newval = operands[4];
11800 model = (enum memmodel) INTVAL (operands[6]);
11801 mode = GET_MODE (mem);
11803 sparc_emit_membar_for_model (model, 3, 1);
11805 if (reg_overlap_mentioned_p (retval, oldval))
11806 oldval = copy_to_reg (oldval);
11808 if (mode == QImode || mode == HImode)
11809 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11810 else
11812 rtx (*gen) (rtx, rtx, rtx, rtx);
11813 rtx x;
11815 if (mode == SImode)
11816 gen = gen_atomic_compare_and_swapsi_1;
11817 else
11818 gen = gen_atomic_compare_and_swapdi_1;
11819 emit_insn (gen (retval, mem, oldval, newval));
11821 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11822 if (x != bval)
11823 convert_move (bval, x, 1);
11826 sparc_emit_membar_for_model (model, 3, 2);
11829 void
11830 sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel)
11832 rtx t_1, t_2, t_3;
11834 sel = gen_lowpart (DImode, sel);
11835 switch (vmode)
11837 case V2SImode:
11838 /* inp = xxxxxxxAxxxxxxxB */
11839 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11840 NULL_RTX, 1, OPTAB_DIRECT);
11841 /* t_1 = ....xxxxxxxAxxx. */
11842 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11843 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11844 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11845 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11846 /* sel = .......B */
11847 /* t_1 = ...A.... */
11848 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11849 /* sel = ...A...B */
11850 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11851 /* sel = AAAABBBB * 4 */
11852 t_1 = force_reg (SImode, GEN_INT (0x01230123));
11853 /* sel = { A*4, A*4+1, A*4+2, ... } */
11854 break;
11856 case V4HImode:
11857 /* inp = xxxAxxxBxxxCxxxD */
11858 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11859 NULL_RTX, 1, OPTAB_DIRECT);
11860 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11861 NULL_RTX, 1, OPTAB_DIRECT);
11862 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11863 NULL_RTX, 1, OPTAB_DIRECT);
11864 /* t_1 = ..xxxAxxxBxxxCxx */
11865 /* t_2 = ....xxxAxxxBxxxC */
11866 /* t_3 = ......xxxAxxxBxx */
11867 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11868 GEN_INT (0x07),
11869 NULL_RTX, 1, OPTAB_DIRECT);
11870 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11871 GEN_INT (0x0700),
11872 NULL_RTX, 1, OPTAB_DIRECT);
11873 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11874 GEN_INT (0x070000),
11875 NULL_RTX, 1, OPTAB_DIRECT);
11876 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11877 GEN_INT (0x07000000),
11878 NULL_RTX, 1, OPTAB_DIRECT);
11879 /* sel = .......D */
11880 /* t_1 = .....C.. */
11881 /* t_2 = ...B.... */
11882 /* t_3 = .A...... */
11883 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11884 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11885 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11886 /* sel = .A.B.C.D */
11887 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11888 /* sel = AABBCCDD * 2 */
11889 t_1 = force_reg (SImode, GEN_INT (0x01010101));
11890 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11891 break;
11893 case V8QImode:
11894 /* input = xAxBxCxDxExFxGxH */
11895 sel = expand_simple_binop (DImode, AND, sel,
11896 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11897 | 0x0f0f0f0f),
11898 NULL_RTX, 1, OPTAB_DIRECT);
11899 /* sel = .A.B.C.D.E.F.G.H */
11900 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11901 NULL_RTX, 1, OPTAB_DIRECT);
11902 /* t_1 = ..A.B.C.D.E.F.G. */
11903 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11904 NULL_RTX, 1, OPTAB_DIRECT);
11905 /* sel = .AABBCCDDEEFFGGH */
11906 sel = expand_simple_binop (DImode, AND, sel,
11907 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11908 | 0xff00ff),
11909 NULL_RTX, 1, OPTAB_DIRECT);
11910 /* sel = ..AB..CD..EF..GH */
11911 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11912 NULL_RTX, 1, OPTAB_DIRECT);
11913 /* t_1 = ....AB..CD..EF.. */
11914 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11915 NULL_RTX, 1, OPTAB_DIRECT);
11916 /* sel = ..ABABCDCDEFEFGH */
11917 sel = expand_simple_binop (DImode, AND, sel,
11918 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11919 NULL_RTX, 1, OPTAB_DIRECT);
11920 /* sel = ....ABCD....EFGH */
11921 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11922 NULL_RTX, 1, OPTAB_DIRECT);
11923 /* t_1 = ........ABCD.... */
11924 sel = gen_lowpart (SImode, sel);
11925 t_1 = gen_lowpart (SImode, t_1);
11926 break;
11928 default:
11929 gcc_unreachable ();
11932 /* Always perform the final addition/merge within the bmask insn. */
11933 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, t_1));
11936 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
11938 static bool
11939 sparc_frame_pointer_required (void)
11941 /* If the stack pointer is dynamically modified in the function, it cannot
11942 serve as the frame pointer. */
11943 if (cfun->calls_alloca)
11944 return true;
11946 /* If the function receives nonlocal gotos, it needs to save the frame
11947 pointer in the nonlocal_goto_save_area object. */
11948 if (cfun->has_nonlocal_label)
11949 return true;
11951 /* In flat mode, that's it. */
11952 if (TARGET_FLAT)
11953 return false;
11955 /* Otherwise, the frame pointer is required if the function isn't leaf. */
11956 return !(crtl->is_leaf && only_leaf_regs_used ());
11959 /* The way this is structured, we can't eliminate SFP in favor of SP
11960 if the frame pointer is required: we want to use the SFP->HFP elimination
11961 in that case. But the test in update_eliminables doesn't know we are
11962 assuming below that we only do the former elimination. */
11964 static bool
11965 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
11967 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
11970 /* Return the hard frame pointer directly to bypass the stack bias. */
11972 static rtx
11973 sparc_builtin_setjmp_frame_value (void)
11975 return hard_frame_pointer_rtx;
11978 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
11979 they won't be allocated. */
11981 static void
11982 sparc_conditional_register_usage (void)
11984 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
11986 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11987 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11989 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
11990 /* then honor it. */
11991 if (TARGET_ARCH32 && fixed_regs[5])
11992 fixed_regs[5] = 1;
11993 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
11994 fixed_regs[5] = 0;
11995 if (! TARGET_V9)
11997 int regno;
11998 for (regno = SPARC_FIRST_V9_FP_REG;
11999 regno <= SPARC_LAST_V9_FP_REG;
12000 regno++)
12001 fixed_regs[regno] = 1;
12002 /* %fcc0 is used by v8 and v9. */
12003 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12004 regno <= SPARC_LAST_V9_FCC_REG;
12005 regno++)
12006 fixed_regs[regno] = 1;
12008 if (! TARGET_FPU)
12010 int regno;
12011 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12012 fixed_regs[regno] = 1;
12014 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12015 /* then honor it. Likewise with g3 and g4. */
12016 if (fixed_regs[2] == 2)
12017 fixed_regs[2] = ! TARGET_APP_REGS;
12018 if (fixed_regs[3] == 2)
12019 fixed_regs[3] = ! TARGET_APP_REGS;
12020 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12021 fixed_regs[4] = ! TARGET_APP_REGS;
12022 else if (TARGET_CM_EMBMEDANY)
12023 fixed_regs[4] = 1;
12024 else if (fixed_regs[4] == 2)
12025 fixed_regs[4] = 0;
12026 if (TARGET_FLAT)
12028 int regno;
12029 /* Disable leaf functions. */
12030 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12031 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12032 leaf_reg_remap [regno] = regno;
12034 if (TARGET_VIS)
12035 global_regs[SPARC_GSR_REG] = 1;
12038 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12040 - We can't load constants into FP registers.
12041 - We can't load FP constants into integer registers when soft-float,
12042 because there is no soft-float pattern with a r/F constraint.
12043 - We can't load FP constants into integer registers for TFmode unless
12044 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12045 - Try and reload integer constants (symbolic or otherwise) back into
12046 registers directly, rather than having them dumped to memory. */
12048 static reg_class_t
12049 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12051 enum machine_mode mode = GET_MODE (x);
12052 if (CONSTANT_P (x))
12054 if (FP_REG_CLASS_P (rclass)
12055 || rclass == GENERAL_OR_FP_REGS
12056 || rclass == GENERAL_OR_EXTRA_FP_REGS
12057 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12058 || (mode == TFmode && ! const_zero_operand (x, mode)))
12059 return NO_REGS;
12061 if (GET_MODE_CLASS (mode) == MODE_INT)
12062 return GENERAL_REGS;
12064 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12066 if (! FP_REG_CLASS_P (rclass)
12067 || !(const_zero_operand (x, mode)
12068 || const_all_ones_operand (x, mode)))
12069 return NO_REGS;
12073 if (TARGET_VIS3
12074 && ! TARGET_ARCH64
12075 && (rclass == EXTRA_FP_REGS
12076 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12078 int regno = true_regnum (x);
12080 if (SPARC_INT_REG_P (regno))
12081 return (rclass == EXTRA_FP_REGS
12082 ? FP_REGS : GENERAL_OR_FP_REGS);
12085 return rclass;
12088 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12089 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12091 const char *
12092 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12094 char mulstr[32];
12096 gcc_assert (! TARGET_ARCH64);
12098 if (sparc_check_64 (operands[1], insn) <= 0)
12099 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12100 if (which_alternative == 1)
12101 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12102 if (GET_CODE (operands[2]) == CONST_INT)
12104 if (which_alternative == 1)
12106 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12107 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12108 output_asm_insn (mulstr, operands);
12109 return "srlx\t%L0, 32, %H0";
12111 else
12113 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12114 output_asm_insn ("or\t%L1, %3, %3", operands);
12115 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12116 output_asm_insn (mulstr, operands);
12117 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12118 return "mov\t%3, %L0";
12121 else if (rtx_equal_p (operands[1], operands[2]))
12123 if (which_alternative == 1)
12125 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12126 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12127 output_asm_insn (mulstr, operands);
12128 return "srlx\t%L0, 32, %H0";
12130 else
12132 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12133 output_asm_insn ("or\t%L1, %3, %3", operands);
12134 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12135 output_asm_insn (mulstr, operands);
12136 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12137 return "mov\t%3, %L0";
12140 if (sparc_check_64 (operands[2], insn) <= 0)
12141 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12142 if (which_alternative == 1)
12144 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12145 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12146 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12147 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12148 output_asm_insn (mulstr, operands);
12149 return "srlx\t%L0, 32, %H0";
12151 else
12153 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12154 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12155 output_asm_insn ("or\t%L1, %3, %3", operands);
12156 output_asm_insn ("or\t%L2, %4, %4", operands);
12157 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12158 output_asm_insn (mulstr, operands);
12159 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12160 return "mov\t%3, %L0";
12164 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12165 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12166 and INNER_MODE are the modes describing TARGET. */
12168 static void
12169 vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode,
12170 enum machine_mode inner_mode)
12172 rtx t1, final_insn, sel;
12173 int bmask;
12175 t1 = gen_reg_rtx (mode);
12177 elt = convert_modes (SImode, inner_mode, elt, true);
12178 emit_move_insn (gen_lowpart(SImode, t1), elt);
12180 switch (mode)
12182 case V2SImode:
12183 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12184 bmask = 0x45674567;
12185 break;
12186 case V4HImode:
12187 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12188 bmask = 0x67676767;
12189 break;
12190 case V8QImode:
12191 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12192 bmask = 0x77777777;
12193 break;
12194 default:
12195 gcc_unreachable ();
12198 sel = force_reg (SImode, GEN_INT (bmask));
12199 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx));
12200 emit_insn (final_insn);
12203 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12204 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12206 static void
12207 vector_init_fpmerge (rtx target, rtx elt)
12209 rtx t1, t2, t2_low, t3, t3_low;
12211 t1 = gen_reg_rtx (V4QImode);
12212 elt = convert_modes (SImode, QImode, elt, true);
12213 emit_move_insn (gen_lowpart (SImode, t1), elt);
12215 t2 = gen_reg_rtx (V8QImode);
12216 t2_low = gen_lowpart (V4QImode, t2);
12217 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12219 t3 = gen_reg_rtx (V8QImode);
12220 t3_low = gen_lowpart (V4QImode, t3);
12221 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12223 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12226 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12227 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12229 static void
12230 vector_init_faligndata (rtx target, rtx elt)
12232 rtx t1 = gen_reg_rtx (V4HImode);
12233 int i;
12235 elt = convert_modes (SImode, HImode, elt, true);
12236 emit_move_insn (gen_lowpart (SImode, t1), elt);
12238 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12239 force_reg (SImode, GEN_INT (6)),
12240 const0_rtx));
12242 for (i = 0; i < 4; i++)
12243 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12246 /* Emit code to initialize TARGET to values for individual fields VALS. */
12248 void
12249 sparc_expand_vector_init (rtx target, rtx vals)
12251 const enum machine_mode mode = GET_MODE (target);
12252 const enum machine_mode inner_mode = GET_MODE_INNER (mode);
12253 const int n_elts = GET_MODE_NUNITS (mode);
12254 int i, n_var = 0;
12255 bool all_same;
12256 rtx mem;
12258 all_same = true;
12259 for (i = 0; i < n_elts; i++)
12261 rtx x = XVECEXP (vals, 0, i);
12262 if (!CONSTANT_P (x))
12263 n_var++;
12265 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12266 all_same = false;
12269 if (n_var == 0)
12271 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12272 return;
12275 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12277 if (GET_MODE_SIZE (inner_mode) == 4)
12279 emit_move_insn (gen_lowpart (SImode, target),
12280 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12281 return;
12283 else if (GET_MODE_SIZE (inner_mode) == 8)
12285 emit_move_insn (gen_lowpart (DImode, target),
12286 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12287 return;
12290 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12291 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12293 emit_move_insn (gen_highpart (word_mode, target),
12294 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12295 emit_move_insn (gen_lowpart (word_mode, target),
12296 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12297 return;
12300 if (all_same && GET_MODE_SIZE (mode) == 8)
12302 if (TARGET_VIS2)
12304 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12305 return;
12307 if (mode == V8QImode)
12309 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12310 return;
12312 if (mode == V4HImode)
12314 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12315 return;
12319 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12320 for (i = 0; i < n_elts; i++)
12321 emit_move_insn (adjust_address_nv (mem, inner_mode,
12322 i * GET_MODE_SIZE (inner_mode)),
12323 XVECEXP (vals, 0, i));
12324 emit_move_insn (target, mem);
12327 /* Implement TARGET_SECONDARY_RELOAD. */
12329 static reg_class_t
12330 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12331 enum machine_mode mode, secondary_reload_info *sri)
12333 enum reg_class rclass = (enum reg_class) rclass_i;
12335 sri->icode = CODE_FOR_nothing;
12336 sri->extra_cost = 0;
12338 /* We need a temporary when loading/storing a HImode/QImode value
12339 between memory and the FPU registers. This can happen when combine puts
12340 a paradoxical subreg in a float/fix conversion insn. */
12341 if (FP_REG_CLASS_P (rclass)
12342 && (mode == HImode || mode == QImode)
12343 && (GET_CODE (x) == MEM
12344 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12345 && true_regnum (x) == -1)))
12346 return GENERAL_REGS;
12348 /* On 32-bit we need a temporary when loading/storing a DFmode value
12349 between unaligned memory and the upper FPU registers. */
12350 if (TARGET_ARCH32
12351 && rclass == EXTRA_FP_REGS
12352 && mode == DFmode
12353 && GET_CODE (x) == MEM
12354 && ! mem_min_alignment (x, 8))
12355 return FP_REGS;
12357 if (((TARGET_CM_MEDANY
12358 && symbolic_operand (x, mode))
12359 || (TARGET_CM_EMBMEDANY
12360 && text_segment_operand (x, mode)))
12361 && ! flag_pic)
12363 if (in_p)
12364 sri->icode = direct_optab_handler (reload_in_optab, mode);
12365 else
12366 sri->icode = direct_optab_handler (reload_out_optab, mode);
12367 return NO_REGS;
12370 if (TARGET_VIS3 && TARGET_ARCH32)
12372 int regno = true_regnum (x);
12374 /* When using VIS3 fp<-->int register moves, on 32-bit we have
12375 to move 8-byte values in 4-byte pieces. This only works via
12376 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
12377 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12378 an FP_REGS intermediate move. */
12379 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12380 || ((general_or_i64_p (rclass)
12381 || rclass == GENERAL_OR_FP_REGS)
12382 && SPARC_FP_REG_P (regno)))
12384 sri->extra_cost = 2;
12385 return FP_REGS;
12389 return NO_REGS;
12392 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12393 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
12395 bool
12396 sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
12398 enum rtx_code rc = GET_CODE (operands[1]);
12399 enum machine_mode cmp_mode;
12400 rtx cc_reg, dst, cmp;
12402 cmp = operands[1];
12403 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12404 return false;
12406 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12407 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12409 cmp_mode = GET_MODE (XEXP (cmp, 0));
12410 rc = GET_CODE (cmp);
12412 dst = operands[0];
12413 if (! rtx_equal_p (operands[2], dst)
12414 && ! rtx_equal_p (operands[3], dst))
12416 if (reg_overlap_mentioned_p (dst, cmp))
12417 dst = gen_reg_rtx (mode);
12419 emit_move_insn (dst, operands[3]);
12421 else if (operands[2] == dst)
12423 operands[2] = operands[3];
12425 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12426 rc = reverse_condition_maybe_unordered (rc);
12427 else
12428 rc = reverse_condition (rc);
12431 if (XEXP (cmp, 1) == const0_rtx
12432 && GET_CODE (XEXP (cmp, 0)) == REG
12433 && cmp_mode == DImode
12434 && v9_regcmp_p (rc))
12435 cc_reg = XEXP (cmp, 0);
12436 else
12437 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12439 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12441 emit_insn (gen_rtx_SET (VOIDmode, dst,
12442 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12444 if (dst != operands[0])
12445 emit_move_insn (operands[0], dst);
12447 return true;
12450 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12451 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12452 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12453 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12454 code to be used for the condition mask. */
12456 void
12457 sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode)
12459 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12460 enum rtx_code code = GET_CODE (operands[3]);
12462 mask = gen_reg_rtx (Pmode);
12463 cop0 = operands[4];
12464 cop1 = operands[5];
12465 if (code == LT || code == GE)
12467 rtx t;
12469 code = swap_condition (code);
12470 t = cop0; cop0 = cop1; cop1 = t;
12473 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12475 fcmp = gen_rtx_UNSPEC (Pmode,
12476 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12477 fcode);
12479 cmask = gen_rtx_UNSPEC (DImode,
12480 gen_rtvec (2, mask, gsr),
12481 ccode);
12483 bshuf = gen_rtx_UNSPEC (mode,
12484 gen_rtvec (3, operands[1], operands[2], gsr),
12485 UNSPEC_BSHUFFLE);
12487 emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp));
12488 emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask));
12490 emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf));
12493 /* On sparc, any mode which naturally allocates into the float
12494 registers should return 4 here. */
12496 unsigned int
12497 sparc_regmode_natural_size (enum machine_mode mode)
12499 int size = UNITS_PER_WORD;
12501 if (TARGET_ARCH64)
12503 enum mode_class mclass = GET_MODE_CLASS (mode);
12505 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12506 size = 4;
12509 return size;
12512 /* Return TRUE if it is a good idea to tie two pseudo registers
12513 when one has mode MODE1 and one has mode MODE2.
12514 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12515 for any hard reg, then this must be FALSE for correct output.
12517 For V9 we have to deal with the fact that only the lower 32 floating
12518 point registers are 32-bit addressable. */
12520 bool
12521 sparc_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
12523 enum mode_class mclass1, mclass2;
12524 unsigned short size1, size2;
12526 if (mode1 == mode2)
12527 return true;
12529 mclass1 = GET_MODE_CLASS (mode1);
12530 mclass2 = GET_MODE_CLASS (mode2);
12531 if (mclass1 != mclass2)
12532 return false;
12534 if (! TARGET_V9)
12535 return true;
12537 /* Classes are the same and we are V9 so we have to deal with upper
12538 vs. lower floating point registers. If one of the modes is a
12539 4-byte mode, and the other is not, we have to mark them as not
12540 tieable because only the lower 32 floating point register are
12541 addressable 32-bits at a time.
12543 We can't just test explicitly for SFmode, otherwise we won't
12544 cover the vector mode cases properly. */
12546 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12547 return true;
12549 size1 = GET_MODE_SIZE (mode1);
12550 size2 = GET_MODE_SIZE (mode2);
12551 if ((size1 > 4 && size2 == 4)
12552 || (size2 > 4 && size1 == 4))
12553 return false;
12555 return true;
12558 /* Implement TARGET_CSTORE_MODE. */
12560 static enum machine_mode
12561 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12563 return (TARGET_ARCH64 ? DImode : SImode);
12566 /* Return the compound expression made of T1 and T2. */
12568 static inline tree
12569 compound_expr (tree t1, tree t2)
12571 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
12574 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
12576 static void
12577 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12579 if (!TARGET_FPU)
12580 return;
12582 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
12583 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
12585 /* We generate the equivalent of feholdexcept (&fenv_var):
12587 unsigned int fenv_var;
12588 __builtin_store_fsr (&fenv_var);
12590 unsigned int tmp1_var;
12591 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
12593 __builtin_load_fsr (&tmp1_var); */
12595 tree fenv_var = create_tmp_var (unsigned_type_node, NULL);
12596 mark_addressable (fenv_var);
12597 tree fenv_addr = build_fold_addr_expr (fenv_var);
12598 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
12599 tree hold_stfsr = build_call_expr (stfsr, 1, fenv_addr);
12601 tree tmp1_var = create_tmp_var (unsigned_type_node, NULL);
12602 mark_addressable (tmp1_var);
12603 tree masked_fenv_var
12604 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
12605 build_int_cst (unsigned_type_node,
12606 ~(accrued_exception_mask | trap_enable_mask)));
12607 tree hold_mask
12608 = build2 (MODIFY_EXPR, void_type_node, tmp1_var, masked_fenv_var);
12610 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
12611 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
12612 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
12614 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
12616 /* We reload the value of tmp1_var to clear the exceptions:
12618 __builtin_load_fsr (&tmp1_var); */
12620 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
12622 /* We generate the equivalent of feupdateenv (&fenv_var):
12624 unsigned int tmp2_var;
12625 __builtin_store_fsr (&tmp2_var);
12627 __builtin_load_fsr (&fenv_var);
12629 if (SPARC_LOW_FE_EXCEPT_VALUES)
12630 tmp2_var >>= 5;
12631 __atomic_feraiseexcept ((int) tmp2_var); */
12633 tree tmp2_var = create_tmp_var (unsigned_type_node, NULL);
12634 mark_addressable (tmp2_var);
12635 tree tmp3_addr = build_fold_addr_expr (tmp2_var);
12636 tree update_stfsr = build_call_expr (stfsr, 1, tmp3_addr);
12638 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
12640 tree atomic_feraiseexcept
12641 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12642 tree update_call
12643 = build_call_expr (atomic_feraiseexcept, 1,
12644 fold_convert (integer_type_node, tmp2_var));
12646 if (SPARC_LOW_FE_EXCEPT_VALUES)
12648 tree shifted_tmp2_var
12649 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
12650 build_int_cst (unsigned_type_node, 5));
12651 tree update_shift
12652 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
12653 update_call = compound_expr (update_shift, update_call);
12656 *update
12657 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
12660 #include "gt-sparc.h"