2014-12-19 Andrew MacLeod <amacleod@redhat.com>
[official-gcc.git] / gcc / config / sparc / sparc.c
blob01e8f98299cf4b7b9a94080cc9027e2448d0c1f1
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2014 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stringpool.h"
29 #include "stor-layout.h"
30 #include "calls.h"
31 #include "varasm.h"
32 #include "rtl.h"
33 #include "regs.h"
34 #include "hard-reg-set.h"
35 #include "insn-config.h"
36 #include "insn-codes.h"
37 #include "conditions.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "flags.h"
41 #include "hashtab.h"
42 #include "hash-set.h"
43 #include "vec.h"
44 #include "machmode.h"
45 #include "input.h"
46 #include "function.h"
47 #include "except.h"
48 #include "expr.h"
49 #include "optabs.h"
50 #include "recog.h"
51 #include "diagnostic-core.h"
52 #include "ggc.h"
53 #include "tm_p.h"
54 #include "debug.h"
55 #include "target.h"
56 #include "target-def.h"
57 #include "common/common-target.h"
58 #include "hash-table.h"
59 #include "predict.h"
60 #include "dominance.h"
61 #include "cfg.h"
62 #include "cfgrtl.h"
63 #include "cfganal.h"
64 #include "lcm.h"
65 #include "cfgbuild.h"
66 #include "cfgcleanup.h"
67 #include "basic-block.h"
68 #include "tree-ssa-alias.h"
69 #include "internal-fn.h"
70 #include "gimple-fold.h"
71 #include "tree-eh.h"
72 #include "gimple-expr.h"
73 #include "is-a.h"
74 #include "gimple.h"
75 #include "gimplify.h"
76 #include "langhooks.h"
77 #include "reload.h"
78 #include "params.h"
79 #include "df.h"
80 #include "opts.h"
81 #include "tree-pass.h"
82 #include "context.h"
83 #include "wide-int.h"
84 #include "builtins.h"
85 #include "rtl-iter.h"
87 /* Processor costs */
89 struct processor_costs {
90 /* Integer load */
91 const int int_load;
93 /* Integer signed load */
94 const int int_sload;
96 /* Integer zeroed load */
97 const int int_zload;
99 /* Float load */
100 const int float_load;
102 /* fmov, fneg, fabs */
103 const int float_move;
105 /* fadd, fsub */
106 const int float_plusminus;
108 /* fcmp */
109 const int float_cmp;
111 /* fmov, fmovr */
112 const int float_cmove;
114 /* fmul */
115 const int float_mul;
117 /* fdivs */
118 const int float_div_sf;
120 /* fdivd */
121 const int float_div_df;
123 /* fsqrts */
124 const int float_sqrt_sf;
126 /* fsqrtd */
127 const int float_sqrt_df;
129 /* umul/smul */
130 const int int_mul;
132 /* mulX */
133 const int int_mulX;
135 /* integer multiply cost for each bit set past the most
136 significant 3, so the formula for multiply cost becomes:
138 if (rs1 < 0)
139 highest_bit = highest_clear_bit(rs1);
140 else
141 highest_bit = highest_set_bit(rs1);
142 if (highest_bit < 3)
143 highest_bit = 3;
144 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
146 A value of zero indicates that the multiply costs is fixed,
147 and not variable. */
148 const int int_mul_bit_factor;
150 /* udiv/sdiv */
151 const int int_div;
153 /* divX */
154 const int int_divX;
156 /* movcc, movr */
157 const int int_cmove;
159 /* penalty for shifts, due to scheduling rules etc. */
160 const int shift_penalty;
163 static const
164 struct processor_costs cypress_costs = {
165 COSTS_N_INSNS (2), /* int load */
166 COSTS_N_INSNS (2), /* int signed load */
167 COSTS_N_INSNS (2), /* int zeroed load */
168 COSTS_N_INSNS (2), /* float load */
169 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
170 COSTS_N_INSNS (5), /* fadd, fsub */
171 COSTS_N_INSNS (1), /* fcmp */
172 COSTS_N_INSNS (1), /* fmov, fmovr */
173 COSTS_N_INSNS (7), /* fmul */
174 COSTS_N_INSNS (37), /* fdivs */
175 COSTS_N_INSNS (37), /* fdivd */
176 COSTS_N_INSNS (63), /* fsqrts */
177 COSTS_N_INSNS (63), /* fsqrtd */
178 COSTS_N_INSNS (1), /* imul */
179 COSTS_N_INSNS (1), /* imulX */
180 0, /* imul bit factor */
181 COSTS_N_INSNS (1), /* idiv */
182 COSTS_N_INSNS (1), /* idivX */
183 COSTS_N_INSNS (1), /* movcc/movr */
184 0, /* shift penalty */
187 static const
188 struct processor_costs supersparc_costs = {
189 COSTS_N_INSNS (1), /* int load */
190 COSTS_N_INSNS (1), /* int signed load */
191 COSTS_N_INSNS (1), /* int zeroed load */
192 COSTS_N_INSNS (0), /* float load */
193 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
194 COSTS_N_INSNS (3), /* fadd, fsub */
195 COSTS_N_INSNS (3), /* fcmp */
196 COSTS_N_INSNS (1), /* fmov, fmovr */
197 COSTS_N_INSNS (3), /* fmul */
198 COSTS_N_INSNS (6), /* fdivs */
199 COSTS_N_INSNS (9), /* fdivd */
200 COSTS_N_INSNS (12), /* fsqrts */
201 COSTS_N_INSNS (12), /* fsqrtd */
202 COSTS_N_INSNS (4), /* imul */
203 COSTS_N_INSNS (4), /* imulX */
204 0, /* imul bit factor */
205 COSTS_N_INSNS (4), /* idiv */
206 COSTS_N_INSNS (4), /* idivX */
207 COSTS_N_INSNS (1), /* movcc/movr */
208 1, /* shift penalty */
211 static const
212 struct processor_costs hypersparc_costs = {
213 COSTS_N_INSNS (1), /* int load */
214 COSTS_N_INSNS (1), /* int signed load */
215 COSTS_N_INSNS (1), /* int zeroed load */
216 COSTS_N_INSNS (1), /* float load */
217 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
218 COSTS_N_INSNS (1), /* fadd, fsub */
219 COSTS_N_INSNS (1), /* fcmp */
220 COSTS_N_INSNS (1), /* fmov, fmovr */
221 COSTS_N_INSNS (1), /* fmul */
222 COSTS_N_INSNS (8), /* fdivs */
223 COSTS_N_INSNS (12), /* fdivd */
224 COSTS_N_INSNS (17), /* fsqrts */
225 COSTS_N_INSNS (17), /* fsqrtd */
226 COSTS_N_INSNS (17), /* imul */
227 COSTS_N_INSNS (17), /* imulX */
228 0, /* imul bit factor */
229 COSTS_N_INSNS (17), /* idiv */
230 COSTS_N_INSNS (17), /* idivX */
231 COSTS_N_INSNS (1), /* movcc/movr */
232 0, /* shift penalty */
235 static const
236 struct processor_costs leon_costs = {
237 COSTS_N_INSNS (1), /* int load */
238 COSTS_N_INSNS (1), /* int signed load */
239 COSTS_N_INSNS (1), /* int zeroed load */
240 COSTS_N_INSNS (1), /* float load */
241 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
242 COSTS_N_INSNS (1), /* fadd, fsub */
243 COSTS_N_INSNS (1), /* fcmp */
244 COSTS_N_INSNS (1), /* fmov, fmovr */
245 COSTS_N_INSNS (1), /* fmul */
246 COSTS_N_INSNS (15), /* fdivs */
247 COSTS_N_INSNS (15), /* fdivd */
248 COSTS_N_INSNS (23), /* fsqrts */
249 COSTS_N_INSNS (23), /* fsqrtd */
250 COSTS_N_INSNS (5), /* imul */
251 COSTS_N_INSNS (5), /* imulX */
252 0, /* imul bit factor */
253 COSTS_N_INSNS (5), /* idiv */
254 COSTS_N_INSNS (5), /* idivX */
255 COSTS_N_INSNS (1), /* movcc/movr */
256 0, /* shift penalty */
259 static const
260 struct processor_costs leon3_costs = {
261 COSTS_N_INSNS (1), /* int load */
262 COSTS_N_INSNS (1), /* int signed load */
263 COSTS_N_INSNS (1), /* int zeroed load */
264 COSTS_N_INSNS (1), /* float load */
265 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
266 COSTS_N_INSNS (1), /* fadd, fsub */
267 COSTS_N_INSNS (1), /* fcmp */
268 COSTS_N_INSNS (1), /* fmov, fmovr */
269 COSTS_N_INSNS (1), /* fmul */
270 COSTS_N_INSNS (14), /* fdivs */
271 COSTS_N_INSNS (15), /* fdivd */
272 COSTS_N_INSNS (22), /* fsqrts */
273 COSTS_N_INSNS (23), /* fsqrtd */
274 COSTS_N_INSNS (5), /* imul */
275 COSTS_N_INSNS (5), /* imulX */
276 0, /* imul bit factor */
277 COSTS_N_INSNS (35), /* idiv */
278 COSTS_N_INSNS (35), /* idivX */
279 COSTS_N_INSNS (1), /* movcc/movr */
280 0, /* shift penalty */
283 static const
284 struct processor_costs sparclet_costs = {
285 COSTS_N_INSNS (3), /* int load */
286 COSTS_N_INSNS (3), /* int signed load */
287 COSTS_N_INSNS (1), /* int zeroed load */
288 COSTS_N_INSNS (1), /* float load */
289 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
290 COSTS_N_INSNS (1), /* fadd, fsub */
291 COSTS_N_INSNS (1), /* fcmp */
292 COSTS_N_INSNS (1), /* fmov, fmovr */
293 COSTS_N_INSNS (1), /* fmul */
294 COSTS_N_INSNS (1), /* fdivs */
295 COSTS_N_INSNS (1), /* fdivd */
296 COSTS_N_INSNS (1), /* fsqrts */
297 COSTS_N_INSNS (1), /* fsqrtd */
298 COSTS_N_INSNS (5), /* imul */
299 COSTS_N_INSNS (5), /* imulX */
300 0, /* imul bit factor */
301 COSTS_N_INSNS (5), /* idiv */
302 COSTS_N_INSNS (5), /* idivX */
303 COSTS_N_INSNS (1), /* movcc/movr */
304 0, /* shift penalty */
307 static const
308 struct processor_costs ultrasparc_costs = {
309 COSTS_N_INSNS (2), /* int load */
310 COSTS_N_INSNS (3), /* int signed load */
311 COSTS_N_INSNS (2), /* int zeroed load */
312 COSTS_N_INSNS (2), /* float load */
313 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
314 COSTS_N_INSNS (4), /* fadd, fsub */
315 COSTS_N_INSNS (1), /* fcmp */
316 COSTS_N_INSNS (2), /* fmov, fmovr */
317 COSTS_N_INSNS (4), /* fmul */
318 COSTS_N_INSNS (13), /* fdivs */
319 COSTS_N_INSNS (23), /* fdivd */
320 COSTS_N_INSNS (13), /* fsqrts */
321 COSTS_N_INSNS (23), /* fsqrtd */
322 COSTS_N_INSNS (4), /* imul */
323 COSTS_N_INSNS (4), /* imulX */
324 2, /* imul bit factor */
325 COSTS_N_INSNS (37), /* idiv */
326 COSTS_N_INSNS (68), /* idivX */
327 COSTS_N_INSNS (2), /* movcc/movr */
328 2, /* shift penalty */
331 static const
332 struct processor_costs ultrasparc3_costs = {
333 COSTS_N_INSNS (2), /* int load */
334 COSTS_N_INSNS (3), /* int signed load */
335 COSTS_N_INSNS (3), /* int zeroed load */
336 COSTS_N_INSNS (2), /* float load */
337 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
338 COSTS_N_INSNS (4), /* fadd, fsub */
339 COSTS_N_INSNS (5), /* fcmp */
340 COSTS_N_INSNS (3), /* fmov, fmovr */
341 COSTS_N_INSNS (4), /* fmul */
342 COSTS_N_INSNS (17), /* fdivs */
343 COSTS_N_INSNS (20), /* fdivd */
344 COSTS_N_INSNS (20), /* fsqrts */
345 COSTS_N_INSNS (29), /* fsqrtd */
346 COSTS_N_INSNS (6), /* imul */
347 COSTS_N_INSNS (6), /* imulX */
348 0, /* imul bit factor */
349 COSTS_N_INSNS (40), /* idiv */
350 COSTS_N_INSNS (71), /* idivX */
351 COSTS_N_INSNS (2), /* movcc/movr */
352 0, /* shift penalty */
355 static const
356 struct processor_costs niagara_costs = {
357 COSTS_N_INSNS (3), /* int load */
358 COSTS_N_INSNS (3), /* int signed load */
359 COSTS_N_INSNS (3), /* int zeroed load */
360 COSTS_N_INSNS (9), /* float load */
361 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
362 COSTS_N_INSNS (8), /* fadd, fsub */
363 COSTS_N_INSNS (26), /* fcmp */
364 COSTS_N_INSNS (8), /* fmov, fmovr */
365 COSTS_N_INSNS (29), /* fmul */
366 COSTS_N_INSNS (54), /* fdivs */
367 COSTS_N_INSNS (83), /* fdivd */
368 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
369 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
370 COSTS_N_INSNS (11), /* imul */
371 COSTS_N_INSNS (11), /* imulX */
372 0, /* imul bit factor */
373 COSTS_N_INSNS (72), /* idiv */
374 COSTS_N_INSNS (72), /* idivX */
375 COSTS_N_INSNS (1), /* movcc/movr */
376 0, /* shift penalty */
379 static const
380 struct processor_costs niagara2_costs = {
381 COSTS_N_INSNS (3), /* int load */
382 COSTS_N_INSNS (3), /* int signed load */
383 COSTS_N_INSNS (3), /* int zeroed load */
384 COSTS_N_INSNS (3), /* float load */
385 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
386 COSTS_N_INSNS (6), /* fadd, fsub */
387 COSTS_N_INSNS (6), /* fcmp */
388 COSTS_N_INSNS (6), /* fmov, fmovr */
389 COSTS_N_INSNS (6), /* fmul */
390 COSTS_N_INSNS (19), /* fdivs */
391 COSTS_N_INSNS (33), /* fdivd */
392 COSTS_N_INSNS (19), /* fsqrts */
393 COSTS_N_INSNS (33), /* fsqrtd */
394 COSTS_N_INSNS (5), /* imul */
395 COSTS_N_INSNS (5), /* imulX */
396 0, /* imul bit factor */
397 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
398 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
399 COSTS_N_INSNS (1), /* movcc/movr */
400 0, /* shift penalty */
403 static const
404 struct processor_costs niagara3_costs = {
405 COSTS_N_INSNS (3), /* int load */
406 COSTS_N_INSNS (3), /* int signed load */
407 COSTS_N_INSNS (3), /* int zeroed load */
408 COSTS_N_INSNS (3), /* float load */
409 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
410 COSTS_N_INSNS (9), /* fadd, fsub */
411 COSTS_N_INSNS (9), /* fcmp */
412 COSTS_N_INSNS (9), /* fmov, fmovr */
413 COSTS_N_INSNS (9), /* fmul */
414 COSTS_N_INSNS (23), /* fdivs */
415 COSTS_N_INSNS (37), /* fdivd */
416 COSTS_N_INSNS (23), /* fsqrts */
417 COSTS_N_INSNS (37), /* fsqrtd */
418 COSTS_N_INSNS (9), /* imul */
419 COSTS_N_INSNS (9), /* imulX */
420 0, /* imul bit factor */
421 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
422 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
423 COSTS_N_INSNS (1), /* movcc/movr */
424 0, /* shift penalty */
427 static const
428 struct processor_costs niagara4_costs = {
429 COSTS_N_INSNS (5), /* int load */
430 COSTS_N_INSNS (5), /* int signed load */
431 COSTS_N_INSNS (5), /* int zeroed load */
432 COSTS_N_INSNS (5), /* float load */
433 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
434 COSTS_N_INSNS (11), /* fadd, fsub */
435 COSTS_N_INSNS (11), /* fcmp */
436 COSTS_N_INSNS (11), /* fmov, fmovr */
437 COSTS_N_INSNS (11), /* fmul */
438 COSTS_N_INSNS (24), /* fdivs */
439 COSTS_N_INSNS (37), /* fdivd */
440 COSTS_N_INSNS (24), /* fsqrts */
441 COSTS_N_INSNS (37), /* fsqrtd */
442 COSTS_N_INSNS (12), /* imul */
443 COSTS_N_INSNS (12), /* imulX */
444 0, /* imul bit factor */
445 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
446 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
447 COSTS_N_INSNS (1), /* movcc/movr */
448 0, /* shift penalty */
451 static const struct processor_costs *sparc_costs = &cypress_costs;
453 #ifdef HAVE_AS_RELAX_OPTION
454 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
455 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
456 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
457 somebody does not branch between the sethi and jmp. */
458 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
459 #else
460 #define LEAF_SIBCALL_SLOT_RESERVED_P \
461 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
462 #endif
464 /* Vector to say how input registers are mapped to output registers.
465 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
466 eliminate it. You must use -fomit-frame-pointer to get that. */
467 char leaf_reg_remap[] =
468 { 0, 1, 2, 3, 4, 5, 6, 7,
469 -1, -1, -1, -1, -1, -1, 14, -1,
470 -1, -1, -1, -1, -1, -1, -1, -1,
471 8, 9, 10, 11, 12, 13, -1, 15,
473 32, 33, 34, 35, 36, 37, 38, 39,
474 40, 41, 42, 43, 44, 45, 46, 47,
475 48, 49, 50, 51, 52, 53, 54, 55,
476 56, 57, 58, 59, 60, 61, 62, 63,
477 64, 65, 66, 67, 68, 69, 70, 71,
478 72, 73, 74, 75, 76, 77, 78, 79,
479 80, 81, 82, 83, 84, 85, 86, 87,
480 88, 89, 90, 91, 92, 93, 94, 95,
481 96, 97, 98, 99, 100, 101, 102};
483 /* Vector, indexed by hard register number, which contains 1
484 for a register that is allowable in a candidate for leaf
485 function treatment. */
486 char sparc_leaf_regs[] =
487 { 1, 1, 1, 1, 1, 1, 1, 1,
488 0, 0, 0, 0, 0, 0, 1, 0,
489 0, 0, 0, 0, 0, 0, 0, 0,
490 1, 1, 1, 1, 1, 1, 0, 1,
491 1, 1, 1, 1, 1, 1, 1, 1,
492 1, 1, 1, 1, 1, 1, 1, 1,
493 1, 1, 1, 1, 1, 1, 1, 1,
494 1, 1, 1, 1, 1, 1, 1, 1,
495 1, 1, 1, 1, 1, 1, 1, 1,
496 1, 1, 1, 1, 1, 1, 1, 1,
497 1, 1, 1, 1, 1, 1, 1, 1,
498 1, 1, 1, 1, 1, 1, 1, 1,
499 1, 1, 1, 1, 1, 1, 1};
501 struct GTY(()) machine_function
503 /* Size of the frame of the function. */
504 HOST_WIDE_INT frame_size;
506 /* Size of the frame of the function minus the register window save area
507 and the outgoing argument area. */
508 HOST_WIDE_INT apparent_frame_size;
510 /* Register we pretend the frame pointer is allocated to. Normally, this
511 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
512 record "offset" separately as it may be too big for (reg + disp). */
513 rtx frame_base_reg;
514 HOST_WIDE_INT frame_base_offset;
516 /* Number of global or FP registers to be saved (as 4-byte quantities). */
517 int n_global_fp_regs;
519 /* True if the current function is leaf and uses only leaf regs,
520 so that the SPARC leaf function optimization can be applied.
521 Private version of crtl->uses_only_leaf_regs, see
522 sparc_expand_prologue for the rationale. */
523 int leaf_function_p;
525 /* True if the prologue saves local or in registers. */
526 bool save_local_in_regs_p;
528 /* True if the data calculated by sparc_expand_prologue are valid. */
529 bool prologue_data_valid_p;
532 #define sparc_frame_size cfun->machine->frame_size
533 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
534 #define sparc_frame_base_reg cfun->machine->frame_base_reg
535 #define sparc_frame_base_offset cfun->machine->frame_base_offset
536 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
537 #define sparc_leaf_function_p cfun->machine->leaf_function_p
538 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
539 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
541 /* 1 if the next opcode is to be specially indented. */
542 int sparc_indent_opcode = 0;
544 static void sparc_option_override (void);
545 static void sparc_init_modes (void);
546 static void scan_record_type (const_tree, int *, int *, int *);
547 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
548 const_tree, bool, bool, int *, int *);
550 static int supersparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
551 static int hypersparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
553 static void sparc_emit_set_const32 (rtx, rtx);
554 static void sparc_emit_set_const64 (rtx, rtx);
555 static void sparc_output_addr_vec (rtx);
556 static void sparc_output_addr_diff_vec (rtx);
557 static void sparc_output_deferred_case_vectors (void);
558 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
559 static bool sparc_legitimate_constant_p (machine_mode, rtx);
560 static rtx sparc_builtin_saveregs (void);
561 static int epilogue_renumber (rtx *, int);
562 static bool sparc_assemble_integer (rtx, unsigned int, int);
563 static int set_extends (rtx_insn *);
564 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
565 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
566 #ifdef TARGET_SOLARIS
567 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
568 tree) ATTRIBUTE_UNUSED;
569 #endif
570 static int sparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
571 static int sparc_issue_rate (void);
572 static void sparc_sched_init (FILE *, int, int);
573 static int sparc_use_sched_lookahead (void);
575 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
576 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
577 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
578 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
579 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
581 static bool sparc_function_ok_for_sibcall (tree, tree);
582 static void sparc_init_libfuncs (void);
583 static void sparc_init_builtins (void);
584 static void sparc_fpu_init_builtins (void);
585 static void sparc_vis_init_builtins (void);
586 static tree sparc_builtin_decl (unsigned, bool);
587 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
588 static tree sparc_fold_builtin (tree, int, tree *, bool);
589 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
590 HOST_WIDE_INT, tree);
591 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
592 HOST_WIDE_INT, const_tree);
593 static struct machine_function * sparc_init_machine_status (void);
594 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
595 static rtx sparc_tls_get_addr (void);
596 static rtx sparc_tls_got (void);
597 static int sparc_register_move_cost (machine_mode,
598 reg_class_t, reg_class_t);
599 static bool sparc_rtx_costs (rtx, int, int, int, int *, bool);
600 static rtx sparc_function_value (const_tree, const_tree, bool);
601 static rtx sparc_libcall_value (machine_mode, const_rtx);
602 static bool sparc_function_value_regno_p (const unsigned int);
603 static rtx sparc_struct_value_rtx (tree, int);
604 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
605 int *, const_tree, int);
606 static bool sparc_return_in_memory (const_tree, const_tree);
607 static bool sparc_strict_argument_naming (cumulative_args_t);
608 static void sparc_va_start (tree, rtx);
609 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
610 static bool sparc_vector_mode_supported_p (machine_mode);
611 static bool sparc_tls_referenced_p (rtx);
612 static rtx sparc_legitimize_tls_address (rtx);
613 static rtx sparc_legitimize_pic_address (rtx, rtx);
614 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
615 static rtx sparc_delegitimize_address (rtx);
616 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
617 static bool sparc_pass_by_reference (cumulative_args_t,
618 machine_mode, const_tree, bool);
619 static void sparc_function_arg_advance (cumulative_args_t,
620 machine_mode, const_tree, bool);
621 static rtx sparc_function_arg_1 (cumulative_args_t,
622 machine_mode, const_tree, bool, bool);
623 static rtx sparc_function_arg (cumulative_args_t,
624 machine_mode, const_tree, bool);
625 static rtx sparc_function_incoming_arg (cumulative_args_t,
626 machine_mode, const_tree, bool);
627 static unsigned int sparc_function_arg_boundary (machine_mode,
628 const_tree);
629 static int sparc_arg_partial_bytes (cumulative_args_t,
630 machine_mode, tree, bool);
631 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
632 static void sparc_file_end (void);
633 static bool sparc_frame_pointer_required (void);
634 static bool sparc_can_eliminate (const int, const int);
635 static rtx sparc_builtin_setjmp_frame_value (void);
636 static void sparc_conditional_register_usage (void);
637 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
638 static const char *sparc_mangle_type (const_tree);
639 #endif
640 static void sparc_trampoline_init (rtx, tree, rtx);
641 static machine_mode sparc_preferred_simd_mode (machine_mode);
642 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
643 static bool sparc_print_operand_punct_valid_p (unsigned char);
644 static void sparc_print_operand (FILE *, rtx, int);
645 static void sparc_print_operand_address (FILE *, rtx);
646 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
647 machine_mode,
648 secondary_reload_info *);
649 static machine_mode sparc_cstore_mode (enum insn_code icode);
650 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
652 #ifdef SUBTARGET_ATTRIBUTE_TABLE
653 /* Table of valid machine attributes. */
654 static const struct attribute_spec sparc_attribute_table[] =
656 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, decl_handler,
657 type_handler, do_diagnostic } */
658 SUBTARGET_ATTRIBUTE_TABLE,
659 { NULL, 0, 0, false, false, false, NULL, NULL, false }
661 #endif
663 /* Option handling. */
665 /* Parsed value. */
666 enum cmodel sparc_cmodel;
668 char sparc_hard_reg_printed[8];
670 /* Initialize the GCC target structure. */
672 /* The default is to use .half rather than .short for aligned HI objects. */
673 #undef TARGET_ASM_ALIGNED_HI_OP
674 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
676 #undef TARGET_ASM_UNALIGNED_HI_OP
677 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
678 #undef TARGET_ASM_UNALIGNED_SI_OP
679 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
680 #undef TARGET_ASM_UNALIGNED_DI_OP
681 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
683 /* The target hook has to handle DI-mode values. */
684 #undef TARGET_ASM_INTEGER
685 #define TARGET_ASM_INTEGER sparc_assemble_integer
687 #undef TARGET_ASM_FUNCTION_PROLOGUE
688 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
689 #undef TARGET_ASM_FUNCTION_EPILOGUE
690 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
692 #undef TARGET_SCHED_ADJUST_COST
693 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
694 #undef TARGET_SCHED_ISSUE_RATE
695 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
696 #undef TARGET_SCHED_INIT
697 #define TARGET_SCHED_INIT sparc_sched_init
698 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
699 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
701 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
702 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
704 #undef TARGET_INIT_LIBFUNCS
705 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
707 #undef TARGET_LEGITIMIZE_ADDRESS
708 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
709 #undef TARGET_DELEGITIMIZE_ADDRESS
710 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
711 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
712 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
714 #undef TARGET_INIT_BUILTINS
715 #define TARGET_INIT_BUILTINS sparc_init_builtins
716 #undef TARGET_BUILTIN_DECL
717 #define TARGET_BUILTIN_DECL sparc_builtin_decl
718 #undef TARGET_EXPAND_BUILTIN
719 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
720 #undef TARGET_FOLD_BUILTIN
721 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
723 #if TARGET_TLS
724 #undef TARGET_HAVE_TLS
725 #define TARGET_HAVE_TLS true
726 #endif
728 #undef TARGET_CANNOT_FORCE_CONST_MEM
729 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
731 #undef TARGET_ASM_OUTPUT_MI_THUNK
732 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
733 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
734 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
736 #undef TARGET_RTX_COSTS
737 #define TARGET_RTX_COSTS sparc_rtx_costs
738 #undef TARGET_ADDRESS_COST
739 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
740 #undef TARGET_REGISTER_MOVE_COST
741 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
743 #undef TARGET_PROMOTE_FUNCTION_MODE
744 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
746 #undef TARGET_FUNCTION_VALUE
747 #define TARGET_FUNCTION_VALUE sparc_function_value
748 #undef TARGET_LIBCALL_VALUE
749 #define TARGET_LIBCALL_VALUE sparc_libcall_value
750 #undef TARGET_FUNCTION_VALUE_REGNO_P
751 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
753 #undef TARGET_STRUCT_VALUE_RTX
754 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
755 #undef TARGET_RETURN_IN_MEMORY
756 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
757 #undef TARGET_MUST_PASS_IN_STACK
758 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
759 #undef TARGET_PASS_BY_REFERENCE
760 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
761 #undef TARGET_ARG_PARTIAL_BYTES
762 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
763 #undef TARGET_FUNCTION_ARG_ADVANCE
764 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
765 #undef TARGET_FUNCTION_ARG
766 #define TARGET_FUNCTION_ARG sparc_function_arg
767 #undef TARGET_FUNCTION_INCOMING_ARG
768 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
769 #undef TARGET_FUNCTION_ARG_BOUNDARY
770 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
772 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
773 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
774 #undef TARGET_STRICT_ARGUMENT_NAMING
775 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
777 #undef TARGET_EXPAND_BUILTIN_VA_START
778 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
779 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
780 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
782 #undef TARGET_VECTOR_MODE_SUPPORTED_P
783 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
785 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
786 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
788 #ifdef SUBTARGET_INSERT_ATTRIBUTES
789 #undef TARGET_INSERT_ATTRIBUTES
790 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
791 #endif
793 #ifdef SUBTARGET_ATTRIBUTE_TABLE
794 #undef TARGET_ATTRIBUTE_TABLE
795 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
796 #endif
798 #undef TARGET_RELAXED_ORDERING
799 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
801 #undef TARGET_OPTION_OVERRIDE
802 #define TARGET_OPTION_OVERRIDE sparc_option_override
804 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
805 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
806 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
807 #endif
809 #undef TARGET_ASM_FILE_END
810 #define TARGET_ASM_FILE_END sparc_file_end
812 #undef TARGET_FRAME_POINTER_REQUIRED
813 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
815 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
816 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
818 #undef TARGET_CAN_ELIMINATE
819 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
821 #undef TARGET_PREFERRED_RELOAD_CLASS
822 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
824 #undef TARGET_SECONDARY_RELOAD
825 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
827 #undef TARGET_CONDITIONAL_REGISTER_USAGE
828 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
830 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
831 #undef TARGET_MANGLE_TYPE
832 #define TARGET_MANGLE_TYPE sparc_mangle_type
833 #endif
835 #undef TARGET_LEGITIMATE_ADDRESS_P
836 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
838 #undef TARGET_LEGITIMATE_CONSTANT_P
839 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
841 #undef TARGET_TRAMPOLINE_INIT
842 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
844 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
845 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
846 #undef TARGET_PRINT_OPERAND
847 #define TARGET_PRINT_OPERAND sparc_print_operand
848 #undef TARGET_PRINT_OPERAND_ADDRESS
849 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
851 /* The value stored by LDSTUB. */
852 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
853 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
855 #undef TARGET_CSTORE_MODE
856 #define TARGET_CSTORE_MODE sparc_cstore_mode
858 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
859 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
861 struct gcc_target targetm = TARGET_INITIALIZER;
863 /* Return the memory reference contained in X if any, zero otherwise. */
865 static rtx
866 mem_ref (rtx x)
868 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
869 x = XEXP (x, 0);
871 if (MEM_P (x))
872 return x;
874 return NULL_RTX;
877 /* We use a machine specific pass to enable workarounds for errata.
878 We need to have the (essentially) final form of the insn stream in order
879 to properly detect the various hazards. Therefore, this machine specific
880 pass runs as late as possible. The pass is inserted in the pass pipeline
881 at the end of sparc_option_override. */
883 static unsigned int
884 sparc_do_work_around_errata (void)
886 rtx_insn *insn, *next;
888 /* Force all instructions to be split into their final form. */
889 split_all_insns_noflow ();
891 /* Now look for specific patterns in the insn stream. */
892 for (insn = get_insns (); insn; insn = next)
894 bool insert_nop = false;
895 rtx set;
897 /* Look into the instruction in a delay slot. */
898 if (NONJUMP_INSN_P (insn))
899 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
900 insn = seq->insn (1);
902 /* Look for a single-word load into an odd-numbered FP register. */
903 if (sparc_fix_at697f
904 && NONJUMP_INSN_P (insn)
905 && (set = single_set (insn)) != NULL_RTX
906 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
907 && MEM_P (SET_SRC (set))
908 && REG_P (SET_DEST (set))
909 && REGNO (SET_DEST (set)) > 31
910 && REGNO (SET_DEST (set)) % 2 != 0)
912 /* The wrong dependency is on the enclosing double register. */
913 const unsigned int x = REGNO (SET_DEST (set)) - 1;
914 unsigned int src1, src2, dest;
915 int code;
917 next = next_active_insn (insn);
918 if (!next)
919 break;
920 /* If the insn is a branch, then it cannot be problematic. */
921 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
922 continue;
924 extract_insn (next);
925 code = INSN_CODE (next);
927 switch (code)
929 case CODE_FOR_adddf3:
930 case CODE_FOR_subdf3:
931 case CODE_FOR_muldf3:
932 case CODE_FOR_divdf3:
933 dest = REGNO (recog_data.operand[0]);
934 src1 = REGNO (recog_data.operand[1]);
935 src2 = REGNO (recog_data.operand[2]);
936 if (src1 != src2)
938 /* Case [1-4]:
939 ld [address], %fx+1
940 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
941 if ((src1 == x || src2 == x)
942 && (dest == src1 || dest == src2))
943 insert_nop = true;
945 else
947 /* Case 5:
948 ld [address], %fx+1
949 FPOPd %fx, %fx, %fx */
950 if (src1 == x
951 && dest == src1
952 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
953 insert_nop = true;
955 break;
957 case CODE_FOR_sqrtdf2:
958 dest = REGNO (recog_data.operand[0]);
959 src1 = REGNO (recog_data.operand[1]);
960 /* Case 6:
961 ld [address], %fx+1
962 fsqrtd %fx, %fx */
963 if (src1 == x && dest == src1)
964 insert_nop = true;
965 break;
967 default:
968 break;
972 /* Look for a single-word load into an integer register. */
973 else if (sparc_fix_ut699
974 && NONJUMP_INSN_P (insn)
975 && (set = single_set (insn)) != NULL_RTX
976 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
977 && mem_ref (SET_SRC (set)) != NULL_RTX
978 && REG_P (SET_DEST (set))
979 && REGNO (SET_DEST (set)) < 32)
981 /* There is no problem if the second memory access has a data
982 dependency on the first single-cycle load. */
983 rtx x = SET_DEST (set);
985 next = next_active_insn (insn);
986 if (!next)
987 break;
988 /* If the insn is a branch, then it cannot be problematic. */
989 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
990 continue;
992 /* Look for a second memory access to/from an integer register. */
993 if ((set = single_set (next)) != NULL_RTX)
995 rtx src = SET_SRC (set);
996 rtx dest = SET_DEST (set);
997 rtx mem;
999 /* LDD is affected. */
1000 if ((mem = mem_ref (src)) != NULL_RTX
1001 && REG_P (dest)
1002 && REGNO (dest) < 32
1003 && !reg_mentioned_p (x, XEXP (mem, 0)))
1004 insert_nop = true;
1006 /* STD is *not* affected. */
1007 else if (MEM_P (dest)
1008 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1009 && (src == CONST0_RTX (GET_MODE (dest))
1010 || (REG_P (src)
1011 && REGNO (src) < 32
1012 && REGNO (src) != REGNO (x)))
1013 && !reg_mentioned_p (x, XEXP (dest, 0)))
1014 insert_nop = true;
1018 /* Look for a single-word load/operation into an FP register. */
1019 else if (sparc_fix_ut699
1020 && NONJUMP_INSN_P (insn)
1021 && (set = single_set (insn)) != NULL_RTX
1022 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1023 && REG_P (SET_DEST (set))
1024 && REGNO (SET_DEST (set)) > 31)
1026 /* Number of instructions in the problematic window. */
1027 const int n_insns = 4;
1028 /* The problematic combination is with the sibling FP register. */
1029 const unsigned int x = REGNO (SET_DEST (set));
1030 const unsigned int y = x ^ 1;
1031 rtx_insn *after;
1032 int i;
1034 next = next_active_insn (insn);
1035 if (!next)
1036 break;
1037 /* If the insn is a branch, then it cannot be problematic. */
1038 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1039 continue;
1041 /* Look for a second load/operation into the sibling FP register. */
1042 if (!((set = single_set (next)) != NULL_RTX
1043 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1044 && REG_P (SET_DEST (set))
1045 && REGNO (SET_DEST (set)) == y))
1046 continue;
1048 /* Look for a (possible) store from the FP register in the next N
1049 instructions, but bail out if it is again modified or if there
1050 is a store from the sibling FP register before this store. */
1051 for (after = next, i = 0; i < n_insns; i++)
1053 bool branch_p;
1055 after = next_active_insn (after);
1056 if (!after)
1057 break;
1059 /* This is a branch with an empty delay slot. */
1060 if (!NONJUMP_INSN_P (after))
1062 if (++i == n_insns)
1063 break;
1064 branch_p = true;
1065 after = NULL;
1067 /* This is a branch with a filled delay slot. */
1068 else if (rtx_sequence *seq =
1069 dyn_cast <rtx_sequence *> (PATTERN (after)))
1071 if (++i == n_insns)
1072 break;
1073 branch_p = true;
1074 after = seq->insn (1);
1076 /* This is a regular instruction. */
1077 else
1078 branch_p = false;
1080 if (after && (set = single_set (after)) != NULL_RTX)
1082 const rtx src = SET_SRC (set);
1083 const rtx dest = SET_DEST (set);
1084 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1086 /* If the FP register is again modified before the store,
1087 then the store isn't affected. */
1088 if (REG_P (dest)
1089 && (REGNO (dest) == x
1090 || (REGNO (dest) == y && size == 8)))
1091 break;
1093 if (MEM_P (dest) && REG_P (src))
1095 /* If there is a store from the sibling FP register
1096 before the store, then the store is not affected. */
1097 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1098 break;
1100 /* Otherwise, the store is affected. */
1101 if (REGNO (src) == x && size == 4)
1103 insert_nop = true;
1104 break;
1109 /* If we have a branch in the first M instructions, then we
1110 cannot see the (M+2)th instruction so we play safe. */
1111 if (branch_p && i <= (n_insns - 2))
1113 insert_nop = true;
1114 break;
1119 else
1120 next = NEXT_INSN (insn);
1122 if (insert_nop)
1123 emit_insn_before (gen_nop (), next);
1126 return 0;
1129 namespace {
1131 const pass_data pass_data_work_around_errata =
1133 RTL_PASS, /* type */
1134 "errata", /* name */
1135 OPTGROUP_NONE, /* optinfo_flags */
1136 TV_MACH_DEP, /* tv_id */
1137 0, /* properties_required */
1138 0, /* properties_provided */
1139 0, /* properties_destroyed */
1140 0, /* todo_flags_start */
1141 0, /* todo_flags_finish */
1144 class pass_work_around_errata : public rtl_opt_pass
1146 public:
1147 pass_work_around_errata(gcc::context *ctxt)
1148 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1151 /* opt_pass methods: */
1152 virtual bool gate (function *)
1154 /* The only errata we handle are those of the AT697F and UT699. */
1155 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
1158 virtual unsigned int execute (function *)
1160 return sparc_do_work_around_errata ();
1163 }; // class pass_work_around_errata
1165 } // anon namespace
1167 rtl_opt_pass *
1168 make_pass_work_around_errata (gcc::context *ctxt)
1170 return new pass_work_around_errata (ctxt);
1173 /* Helpers for TARGET_DEBUG_OPTIONS. */
1174 static void
1175 dump_target_flag_bits (const int flags)
1177 if (flags & MASK_64BIT)
1178 fprintf (stderr, "64BIT ");
1179 if (flags & MASK_APP_REGS)
1180 fprintf (stderr, "APP_REGS ");
1181 if (flags & MASK_FASTER_STRUCTS)
1182 fprintf (stderr, "FASTER_STRUCTS ");
1183 if (flags & MASK_FLAT)
1184 fprintf (stderr, "FLAT ");
1185 if (flags & MASK_FMAF)
1186 fprintf (stderr, "FMAF ");
1187 if (flags & MASK_FPU)
1188 fprintf (stderr, "FPU ");
1189 if (flags & MASK_HARD_QUAD)
1190 fprintf (stderr, "HARD_QUAD ");
1191 if (flags & MASK_POPC)
1192 fprintf (stderr, "POPC ");
1193 if (flags & MASK_PTR64)
1194 fprintf (stderr, "PTR64 ");
1195 if (flags & MASK_STACK_BIAS)
1196 fprintf (stderr, "STACK_BIAS ");
1197 if (flags & MASK_UNALIGNED_DOUBLES)
1198 fprintf (stderr, "UNALIGNED_DOUBLES ");
1199 if (flags & MASK_V8PLUS)
1200 fprintf (stderr, "V8PLUS ");
1201 if (flags & MASK_VIS)
1202 fprintf (stderr, "VIS ");
1203 if (flags & MASK_VIS2)
1204 fprintf (stderr, "VIS2 ");
1205 if (flags & MASK_VIS3)
1206 fprintf (stderr, "VIS3 ");
1207 if (flags & MASK_CBCOND)
1208 fprintf (stderr, "CBCOND ");
1209 if (flags & MASK_DEPRECATED_V8_INSNS)
1210 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1211 if (flags & MASK_SPARCLET)
1212 fprintf (stderr, "SPARCLET ");
1213 if (flags & MASK_SPARCLITE)
1214 fprintf (stderr, "SPARCLITE ");
1215 if (flags & MASK_V8)
1216 fprintf (stderr, "V8 ");
1217 if (flags & MASK_V9)
1218 fprintf (stderr, "V9 ");
1221 static void
1222 dump_target_flags (const char *prefix, const int flags)
1224 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1225 dump_target_flag_bits (flags);
1226 fprintf(stderr, "]\n");
1229 /* Validate and override various options, and do some machine dependent
1230 initialization. */
1232 static void
1233 sparc_option_override (void)
1235 static struct code_model {
1236 const char *const name;
1237 const enum cmodel value;
1238 } const cmodels[] = {
1239 { "32", CM_32 },
1240 { "medlow", CM_MEDLOW },
1241 { "medmid", CM_MEDMID },
1242 { "medany", CM_MEDANY },
1243 { "embmedany", CM_EMBMEDANY },
1244 { NULL, (enum cmodel) 0 }
1246 const struct code_model *cmodel;
1247 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1248 static struct cpu_default {
1249 const int cpu;
1250 const enum processor_type processor;
1251 } const cpu_default[] = {
1252 /* There must be one entry here for each TARGET_CPU value. */
1253 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1254 { TARGET_CPU_v8, PROCESSOR_V8 },
1255 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1256 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1257 { TARGET_CPU_leon, PROCESSOR_LEON },
1258 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1259 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1260 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1261 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1262 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1263 { TARGET_CPU_v9, PROCESSOR_V9 },
1264 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1265 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1266 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1267 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1268 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1269 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1270 { -1, PROCESSOR_V7 }
1272 const struct cpu_default *def;
1273 /* Table of values for -m{cpu,tune}=. This must match the order of
1274 the enum processor_type in sparc-opts.h. */
1275 static struct cpu_table {
1276 const char *const name;
1277 const int disable;
1278 const int enable;
1279 } const cpu_table[] = {
1280 { "v7", MASK_ISA, 0 },
1281 { "cypress", MASK_ISA, 0 },
1282 { "v8", MASK_ISA, MASK_V8 },
1283 /* TI TMS390Z55 supersparc */
1284 { "supersparc", MASK_ISA, MASK_V8 },
1285 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1286 { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
1287 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
1288 { "leon3v7", MASK_ISA, MASK_LEON3|MASK_FPU },
1289 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1290 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1291 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1292 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1293 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1294 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1295 { "sparclet", MASK_ISA, MASK_SPARCLET },
1296 /* TEMIC sparclet */
1297 { "tsc701", MASK_ISA, MASK_SPARCLET },
1298 { "v9", MASK_ISA, MASK_V9 },
1299 /* UltraSPARC I, II, IIi */
1300 { "ultrasparc", MASK_ISA,
1301 /* Although insns using %y are deprecated, it is a clear win. */
1302 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1303 /* UltraSPARC III */
1304 /* ??? Check if %y issue still holds true. */
1305 { "ultrasparc3", MASK_ISA,
1306 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1307 /* UltraSPARC T1 */
1308 { "niagara", MASK_ISA,
1309 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1310 /* UltraSPARC T2 */
1311 { "niagara2", MASK_ISA,
1312 MASK_V9|MASK_POPC|MASK_VIS2 },
1313 /* UltraSPARC T3 */
1314 { "niagara3", MASK_ISA,
1315 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
1316 /* UltraSPARC T4 */
1317 { "niagara4", MASK_ISA,
1318 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1320 const struct cpu_table *cpu;
1321 unsigned int i;
1322 int fpu;
1324 if (sparc_debug_string != NULL)
1326 const char *q;
1327 char *p;
1329 p = ASTRDUP (sparc_debug_string);
1330 while ((q = strtok (p, ",")) != NULL)
1332 bool invert;
1333 int mask;
1335 p = NULL;
1336 if (*q == '!')
1338 invert = true;
1339 q++;
1341 else
1342 invert = false;
1344 if (! strcmp (q, "all"))
1345 mask = MASK_DEBUG_ALL;
1346 else if (! strcmp (q, "options"))
1347 mask = MASK_DEBUG_OPTIONS;
1348 else
1349 error ("unknown -mdebug-%s switch", q);
1351 if (invert)
1352 sparc_debug &= ~mask;
1353 else
1354 sparc_debug |= mask;
1358 if (TARGET_DEBUG_OPTIONS)
1360 dump_target_flags("Initial target_flags", target_flags);
1361 dump_target_flags("target_flags_explicit", target_flags_explicit);
1364 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1365 SUBTARGET_OVERRIDE_OPTIONS;
1366 #endif
1368 #ifndef SPARC_BI_ARCH
1369 /* Check for unsupported architecture size. */
1370 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1371 error ("%s is not supported by this configuration",
1372 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1373 #endif
1375 /* We force all 64bit archs to use 128 bit long double */
1376 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1378 error ("-mlong-double-64 not allowed with -m64");
1379 target_flags |= MASK_LONG_DOUBLE_128;
1382 /* Code model selection. */
1383 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1385 #ifdef SPARC_BI_ARCH
1386 if (TARGET_ARCH32)
1387 sparc_cmodel = CM_32;
1388 #endif
1390 if (sparc_cmodel_string != NULL)
1392 if (TARGET_ARCH64)
1394 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1395 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1396 break;
1397 if (cmodel->name == NULL)
1398 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1399 else
1400 sparc_cmodel = cmodel->value;
1402 else
1403 error ("-mcmodel= is not supported on 32 bit systems");
1406 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1407 for (i = 8; i < 16; i++)
1408 if (!call_used_regs [i])
1410 error ("-fcall-saved-REG is not supported for out registers");
1411 call_used_regs [i] = 1;
1414 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1416 /* Set the default CPU. */
1417 if (!global_options_set.x_sparc_cpu_and_features)
1419 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1420 if (def->cpu == TARGET_CPU_DEFAULT)
1421 break;
1422 gcc_assert (def->cpu != -1);
1423 sparc_cpu_and_features = def->processor;
1426 if (!global_options_set.x_sparc_cpu)
1427 sparc_cpu = sparc_cpu_and_features;
1429 cpu = &cpu_table[(int) sparc_cpu_and_features];
1431 if (TARGET_DEBUG_OPTIONS)
1433 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1434 fprintf (stderr, "sparc_cpu: %s\n",
1435 cpu_table[(int) sparc_cpu].name);
1436 dump_target_flags ("cpu->disable", cpu->disable);
1437 dump_target_flags ("cpu->enable", cpu->enable);
1440 target_flags &= ~cpu->disable;
1441 target_flags |= (cpu->enable
1442 #ifndef HAVE_AS_FMAF_HPC_VIS3
1443 & ~(MASK_FMAF | MASK_VIS3)
1444 #endif
1445 #ifndef HAVE_AS_SPARC4
1446 & ~MASK_CBCOND
1447 #endif
1448 #ifndef HAVE_AS_LEON
1449 & ~(MASK_LEON | MASK_LEON3)
1450 #endif
1453 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1454 the processor default. */
1455 if (target_flags_explicit & MASK_FPU)
1456 target_flags = (target_flags & ~MASK_FPU) | fpu;
1458 /* -mvis2 implies -mvis */
1459 if (TARGET_VIS2)
1460 target_flags |= MASK_VIS;
1462 /* -mvis3 implies -mvis2 and -mvis */
1463 if (TARGET_VIS3)
1464 target_flags |= MASK_VIS2 | MASK_VIS;
1466 /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is
1467 disabled. */
1468 if (! TARGET_FPU)
1469 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF);
1471 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1472 are available.
1473 -m64 also implies v9. */
1474 if (TARGET_VIS || TARGET_ARCH64)
1476 target_flags |= MASK_V9;
1477 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1480 /* -mvis also implies -mv8plus on 32-bit */
1481 if (TARGET_VIS && ! TARGET_ARCH64)
1482 target_flags |= MASK_V8PLUS;
1484 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1485 if (TARGET_V9 && TARGET_ARCH32)
1486 target_flags |= MASK_DEPRECATED_V8_INSNS;
1488 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1489 if (! TARGET_V9 || TARGET_ARCH64)
1490 target_flags &= ~MASK_V8PLUS;
1492 /* Don't use stack biasing in 32 bit mode. */
1493 if (TARGET_ARCH32)
1494 target_flags &= ~MASK_STACK_BIAS;
1496 /* Supply a default value for align_functions. */
1497 if (align_functions == 0
1498 && (sparc_cpu == PROCESSOR_ULTRASPARC
1499 || sparc_cpu == PROCESSOR_ULTRASPARC3
1500 || sparc_cpu == PROCESSOR_NIAGARA
1501 || sparc_cpu == PROCESSOR_NIAGARA2
1502 || sparc_cpu == PROCESSOR_NIAGARA3
1503 || sparc_cpu == PROCESSOR_NIAGARA4))
1504 align_functions = 32;
1506 /* Validate PCC_STRUCT_RETURN. */
1507 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1508 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1510 /* Only use .uaxword when compiling for a 64-bit target. */
1511 if (!TARGET_ARCH64)
1512 targetm.asm_out.unaligned_op.di = NULL;
1514 /* Do various machine dependent initializations. */
1515 sparc_init_modes ();
1517 /* Set up function hooks. */
1518 init_machine_status = sparc_init_machine_status;
1520 switch (sparc_cpu)
1522 case PROCESSOR_V7:
1523 case PROCESSOR_CYPRESS:
1524 sparc_costs = &cypress_costs;
1525 break;
1526 case PROCESSOR_V8:
1527 case PROCESSOR_SPARCLITE:
1528 case PROCESSOR_SUPERSPARC:
1529 sparc_costs = &supersparc_costs;
1530 break;
1531 case PROCESSOR_F930:
1532 case PROCESSOR_F934:
1533 case PROCESSOR_HYPERSPARC:
1534 case PROCESSOR_SPARCLITE86X:
1535 sparc_costs = &hypersparc_costs;
1536 break;
1537 case PROCESSOR_LEON:
1538 sparc_costs = &leon_costs;
1539 break;
1540 case PROCESSOR_LEON3:
1541 case PROCESSOR_LEON3V7:
1542 sparc_costs = &leon3_costs;
1543 break;
1544 case PROCESSOR_SPARCLET:
1545 case PROCESSOR_TSC701:
1546 sparc_costs = &sparclet_costs;
1547 break;
1548 case PROCESSOR_V9:
1549 case PROCESSOR_ULTRASPARC:
1550 sparc_costs = &ultrasparc_costs;
1551 break;
1552 case PROCESSOR_ULTRASPARC3:
1553 sparc_costs = &ultrasparc3_costs;
1554 break;
1555 case PROCESSOR_NIAGARA:
1556 sparc_costs = &niagara_costs;
1557 break;
1558 case PROCESSOR_NIAGARA2:
1559 sparc_costs = &niagara2_costs;
1560 break;
1561 case PROCESSOR_NIAGARA3:
1562 sparc_costs = &niagara3_costs;
1563 break;
1564 case PROCESSOR_NIAGARA4:
1565 sparc_costs = &niagara4_costs;
1566 break;
1567 case PROCESSOR_NATIVE:
1568 gcc_unreachable ();
1571 if (sparc_memory_model == SMM_DEFAULT)
1573 /* Choose the memory model for the operating system. */
1574 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1575 if (os_default != SMM_DEFAULT)
1576 sparc_memory_model = os_default;
1577 /* Choose the most relaxed model for the processor. */
1578 else if (TARGET_V9)
1579 sparc_memory_model = SMM_RMO;
1580 else if (TARGET_LEON3)
1581 sparc_memory_model = SMM_TSO;
1582 else if (TARGET_LEON)
1583 sparc_memory_model = SMM_SC;
1584 else if (TARGET_V8)
1585 sparc_memory_model = SMM_PSO;
1586 else
1587 sparc_memory_model = SMM_SC;
1590 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1591 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1592 target_flags |= MASK_LONG_DOUBLE_128;
1593 #endif
1595 if (TARGET_DEBUG_OPTIONS)
1596 dump_target_flags ("Final target_flags", target_flags);
1598 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1599 ((sparc_cpu == PROCESSOR_ULTRASPARC
1600 || sparc_cpu == PROCESSOR_NIAGARA
1601 || sparc_cpu == PROCESSOR_NIAGARA2
1602 || sparc_cpu == PROCESSOR_NIAGARA3
1603 || sparc_cpu == PROCESSOR_NIAGARA4)
1605 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1606 ? 8 : 3)),
1607 global_options.x_param_values,
1608 global_options_set.x_param_values);
1609 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1610 ((sparc_cpu == PROCESSOR_ULTRASPARC
1611 || sparc_cpu == PROCESSOR_ULTRASPARC3
1612 || sparc_cpu == PROCESSOR_NIAGARA
1613 || sparc_cpu == PROCESSOR_NIAGARA2
1614 || sparc_cpu == PROCESSOR_NIAGARA3
1615 || sparc_cpu == PROCESSOR_NIAGARA4)
1616 ? 64 : 32),
1617 global_options.x_param_values,
1618 global_options_set.x_param_values);
1620 /* Disable save slot sharing for call-clobbered registers by default.
1621 The IRA sharing algorithm works on single registers only and this
1622 pessimizes for double floating-point registers. */
1623 if (!global_options_set.x_flag_ira_share_save_slots)
1624 flag_ira_share_save_slots = 0;
1626 /* We register a machine specific pass to work around errata, if any.
1627 The pass mut be scheduled as late as possible so that we have the
1628 (essentially) final form of the insn stream to work on.
1629 Registering the pass must be done at start up. It's convenient to
1630 do it here. */
1631 opt_pass *errata_pass = make_pass_work_around_errata (g);
1632 struct register_pass_info insert_pass_work_around_errata =
1634 errata_pass, /* pass */
1635 "dbr", /* reference_pass_name */
1636 1, /* ref_pass_instance_number */
1637 PASS_POS_INSERT_AFTER /* po_op */
1639 register_pass (&insert_pass_work_around_errata);
1642 /* Miscellaneous utilities. */
1644 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1645 or branch on register contents instructions. */
1648 v9_regcmp_p (enum rtx_code code)
1650 return (code == EQ || code == NE || code == GE || code == LT
1651 || code == LE || code == GT);
1654 /* Nonzero if OP is a floating point constant which can
1655 be loaded into an integer register using a single
1656 sethi instruction. */
1659 fp_sethi_p (rtx op)
1661 if (GET_CODE (op) == CONST_DOUBLE)
1663 REAL_VALUE_TYPE r;
1664 long i;
1666 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1667 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1668 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1671 return 0;
1674 /* Nonzero if OP is a floating point constant which can
1675 be loaded into an integer register using a single
1676 mov instruction. */
1679 fp_mov_p (rtx op)
1681 if (GET_CODE (op) == CONST_DOUBLE)
1683 REAL_VALUE_TYPE r;
1684 long i;
1686 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1687 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1688 return SPARC_SIMM13_P (i);
1691 return 0;
1694 /* Nonzero if OP is a floating point constant which can
1695 be loaded into an integer register using a high/losum
1696 instruction sequence. */
1699 fp_high_losum_p (rtx op)
1701 /* The constraints calling this should only be in
1702 SFmode move insns, so any constant which cannot
1703 be moved using a single insn will do. */
1704 if (GET_CODE (op) == CONST_DOUBLE)
1706 REAL_VALUE_TYPE r;
1707 long i;
1709 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1710 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1711 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1714 return 0;
1717 /* Return true if the address of LABEL can be loaded by means of the
1718 mov{si,di}_pic_label_ref patterns in PIC mode. */
1720 static bool
1721 can_use_mov_pic_label_ref (rtx label)
1723 /* VxWorks does not impose a fixed gap between segments; the run-time
1724 gap can be different from the object-file gap. We therefore can't
1725 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1726 are absolutely sure that X is in the same segment as the GOT.
1727 Unfortunately, the flexibility of linker scripts means that we
1728 can't be sure of that in general, so assume that GOT-relative
1729 accesses are never valid on VxWorks. */
1730 if (TARGET_VXWORKS_RTP)
1731 return false;
1733 /* Similarly, if the label is non-local, it might end up being placed
1734 in a different section than the current one; now mov_pic_label_ref
1735 requires the label and the code to be in the same section. */
1736 if (LABEL_REF_NONLOCAL_P (label))
1737 return false;
1739 /* Finally, if we are reordering basic blocks and partition into hot
1740 and cold sections, this might happen for any label. */
1741 if (flag_reorder_blocks_and_partition)
1742 return false;
1744 return true;
1747 /* Expand a move instruction. Return true if all work is done. */
1749 bool
1750 sparc_expand_move (machine_mode mode, rtx *operands)
1752 /* Handle sets of MEM first. */
1753 if (GET_CODE (operands[0]) == MEM)
1755 /* 0 is a register (or a pair of registers) on SPARC. */
1756 if (register_or_zero_operand (operands[1], mode))
1757 return false;
1759 if (!reload_in_progress)
1761 operands[0] = validize_mem (operands[0]);
1762 operands[1] = force_reg (mode, operands[1]);
1766 /* Fixup TLS cases. */
1767 if (TARGET_HAVE_TLS
1768 && CONSTANT_P (operands[1])
1769 && sparc_tls_referenced_p (operands [1]))
1771 operands[1] = sparc_legitimize_tls_address (operands[1]);
1772 return false;
1775 /* Fixup PIC cases. */
1776 if (flag_pic && CONSTANT_P (operands[1]))
1778 if (pic_address_needs_scratch (operands[1]))
1779 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1781 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1782 if (GET_CODE (operands[1]) == LABEL_REF
1783 && can_use_mov_pic_label_ref (operands[1]))
1785 if (mode == SImode)
1787 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1788 return true;
1791 if (mode == DImode)
1793 gcc_assert (TARGET_ARCH64);
1794 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1795 return true;
1799 if (symbolic_operand (operands[1], mode))
1801 operands[1]
1802 = sparc_legitimize_pic_address (operands[1],
1803 reload_in_progress
1804 ? operands[0] : NULL_RTX);
1805 return false;
1809 /* If we are trying to toss an integer constant into FP registers,
1810 or loading a FP or vector constant, force it into memory. */
1811 if (CONSTANT_P (operands[1])
1812 && REG_P (operands[0])
1813 && (SPARC_FP_REG_P (REGNO (operands[0]))
1814 || SCALAR_FLOAT_MODE_P (mode)
1815 || VECTOR_MODE_P (mode)))
1817 /* emit_group_store will send such bogosity to us when it is
1818 not storing directly into memory. So fix this up to avoid
1819 crashes in output_constant_pool. */
1820 if (operands [1] == const0_rtx)
1821 operands[1] = CONST0_RTX (mode);
1823 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1824 always other regs. */
1825 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1826 && (const_zero_operand (operands[1], mode)
1827 || const_all_ones_operand (operands[1], mode)))
1828 return false;
1830 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1831 /* We are able to build any SF constant in integer registers
1832 with at most 2 instructions. */
1833 && (mode == SFmode
1834 /* And any DF constant in integer registers. */
1835 || (mode == DFmode
1836 && ! can_create_pseudo_p ())))
1837 return false;
1839 operands[1] = force_const_mem (mode, operands[1]);
1840 if (!reload_in_progress)
1841 operands[1] = validize_mem (operands[1]);
1842 return false;
1845 /* Accept non-constants and valid constants unmodified. */
1846 if (!CONSTANT_P (operands[1])
1847 || GET_CODE (operands[1]) == HIGH
1848 || input_operand (operands[1], mode))
1849 return false;
1851 switch (mode)
1853 case QImode:
1854 /* All QImode constants require only one insn, so proceed. */
1855 break;
1857 case HImode:
1858 case SImode:
1859 sparc_emit_set_const32 (operands[0], operands[1]);
1860 return true;
1862 case DImode:
1863 /* input_operand should have filtered out 32-bit mode. */
1864 sparc_emit_set_const64 (operands[0], operands[1]);
1865 return true;
1867 case TImode:
1869 rtx high, low;
1870 /* TImode isn't available in 32-bit mode. */
1871 split_double (operands[1], &high, &low);
1872 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1873 high));
1874 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1875 low));
1877 return true;
1879 default:
1880 gcc_unreachable ();
1883 return false;
1886 /* Load OP1, a 32-bit constant, into OP0, a register.
1887 We know it can't be done in one insn when we get
1888 here, the move expander guarantees this. */
1890 static void
1891 sparc_emit_set_const32 (rtx op0, rtx op1)
1893 machine_mode mode = GET_MODE (op0);
1894 rtx temp = op0;
1896 if (can_create_pseudo_p ())
1897 temp = gen_reg_rtx (mode);
1899 if (GET_CODE (op1) == CONST_INT)
1901 gcc_assert (!small_int_operand (op1, mode)
1902 && !const_high_operand (op1, mode));
1904 /* Emit them as real moves instead of a HIGH/LO_SUM,
1905 this way CSE can see everything and reuse intermediate
1906 values if it wants. */
1907 emit_insn (gen_rtx_SET (VOIDmode, temp,
1908 GEN_INT (INTVAL (op1)
1909 & ~(HOST_WIDE_INT)0x3ff)));
1911 emit_insn (gen_rtx_SET (VOIDmode,
1912 op0,
1913 gen_rtx_IOR (mode, temp,
1914 GEN_INT (INTVAL (op1) & 0x3ff))));
1916 else
1918 /* A symbol, emit in the traditional way. */
1919 emit_insn (gen_rtx_SET (VOIDmode, temp,
1920 gen_rtx_HIGH (mode, op1)));
1921 emit_insn (gen_rtx_SET (VOIDmode,
1922 op0, gen_rtx_LO_SUM (mode, temp, op1)));
1926 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1927 If TEMP is nonzero, we are forbidden to use any other scratch
1928 registers. Otherwise, we are allowed to generate them as needed.
1930 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1931 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1933 void
1934 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1936 rtx temp1, temp2, temp3, temp4, temp5;
1937 rtx ti_temp = 0;
1939 if (temp && GET_MODE (temp) == TImode)
1941 ti_temp = temp;
1942 temp = gen_rtx_REG (DImode, REGNO (temp));
1945 /* SPARC-V9 code-model support. */
1946 switch (sparc_cmodel)
1948 case CM_MEDLOW:
1949 /* The range spanned by all instructions in the object is less
1950 than 2^31 bytes (2GB) and the distance from any instruction
1951 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1952 than 2^31 bytes (2GB).
1954 The executable must be in the low 4TB of the virtual address
1955 space.
1957 sethi %hi(symbol), %temp1
1958 or %temp1, %lo(symbol), %reg */
1959 if (temp)
1960 temp1 = temp; /* op0 is allowed. */
1961 else
1962 temp1 = gen_reg_rtx (DImode);
1964 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1965 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1966 break;
1968 case CM_MEDMID:
1969 /* The range spanned by all instructions in the object is less
1970 than 2^31 bytes (2GB) and the distance from any instruction
1971 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1972 than 2^31 bytes (2GB).
1974 The executable must be in the low 16TB of the virtual address
1975 space.
1977 sethi %h44(symbol), %temp1
1978 or %temp1, %m44(symbol), %temp2
1979 sllx %temp2, 12, %temp3
1980 or %temp3, %l44(symbol), %reg */
1981 if (temp)
1983 temp1 = op0;
1984 temp2 = op0;
1985 temp3 = temp; /* op0 is allowed. */
1987 else
1989 temp1 = gen_reg_rtx (DImode);
1990 temp2 = gen_reg_rtx (DImode);
1991 temp3 = gen_reg_rtx (DImode);
1994 emit_insn (gen_seth44 (temp1, op1));
1995 emit_insn (gen_setm44 (temp2, temp1, op1));
1996 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1997 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1998 emit_insn (gen_setl44 (op0, temp3, op1));
1999 break;
2001 case CM_MEDANY:
2002 /* The range spanned by all instructions in the object is less
2003 than 2^31 bytes (2GB) and the distance from any instruction
2004 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2005 than 2^31 bytes (2GB).
2007 The executable can be placed anywhere in the virtual address
2008 space.
2010 sethi %hh(symbol), %temp1
2011 sethi %lm(symbol), %temp2
2012 or %temp1, %hm(symbol), %temp3
2013 sllx %temp3, 32, %temp4
2014 or %temp4, %temp2, %temp5
2015 or %temp5, %lo(symbol), %reg */
2016 if (temp)
2018 /* It is possible that one of the registers we got for operands[2]
2019 might coincide with that of operands[0] (which is why we made
2020 it TImode). Pick the other one to use as our scratch. */
2021 if (rtx_equal_p (temp, op0))
2023 gcc_assert (ti_temp);
2024 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2026 temp1 = op0;
2027 temp2 = temp; /* op0 is _not_ allowed, see above. */
2028 temp3 = op0;
2029 temp4 = op0;
2030 temp5 = op0;
2032 else
2034 temp1 = gen_reg_rtx (DImode);
2035 temp2 = gen_reg_rtx (DImode);
2036 temp3 = gen_reg_rtx (DImode);
2037 temp4 = gen_reg_rtx (DImode);
2038 temp5 = gen_reg_rtx (DImode);
2041 emit_insn (gen_sethh (temp1, op1));
2042 emit_insn (gen_setlm (temp2, op1));
2043 emit_insn (gen_sethm (temp3, temp1, op1));
2044 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2045 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2046 emit_insn (gen_rtx_SET (VOIDmode, temp5,
2047 gen_rtx_PLUS (DImode, temp4, temp2)));
2048 emit_insn (gen_setlo (op0, temp5, op1));
2049 break;
2051 case CM_EMBMEDANY:
2052 /* Old old old backwards compatibility kruft here.
2053 Essentially it is MEDLOW with a fixed 64-bit
2054 virtual base added to all data segment addresses.
2055 Text-segment stuff is computed like MEDANY, we can't
2056 reuse the code above because the relocation knobs
2057 look different.
2059 Data segment: sethi %hi(symbol), %temp1
2060 add %temp1, EMBMEDANY_BASE_REG, %temp2
2061 or %temp2, %lo(symbol), %reg */
2062 if (data_segment_operand (op1, GET_MODE (op1)))
2064 if (temp)
2066 temp1 = temp; /* op0 is allowed. */
2067 temp2 = op0;
2069 else
2071 temp1 = gen_reg_rtx (DImode);
2072 temp2 = gen_reg_rtx (DImode);
2075 emit_insn (gen_embmedany_sethi (temp1, op1));
2076 emit_insn (gen_embmedany_brsum (temp2, temp1));
2077 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2080 /* Text segment: sethi %uhi(symbol), %temp1
2081 sethi %hi(symbol), %temp2
2082 or %temp1, %ulo(symbol), %temp3
2083 sllx %temp3, 32, %temp4
2084 or %temp4, %temp2, %temp5
2085 or %temp5, %lo(symbol), %reg */
2086 else
2088 if (temp)
2090 /* It is possible that one of the registers we got for operands[2]
2091 might coincide with that of operands[0] (which is why we made
2092 it TImode). Pick the other one to use as our scratch. */
2093 if (rtx_equal_p (temp, op0))
2095 gcc_assert (ti_temp);
2096 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2098 temp1 = op0;
2099 temp2 = temp; /* op0 is _not_ allowed, see above. */
2100 temp3 = op0;
2101 temp4 = op0;
2102 temp5 = op0;
2104 else
2106 temp1 = gen_reg_rtx (DImode);
2107 temp2 = gen_reg_rtx (DImode);
2108 temp3 = gen_reg_rtx (DImode);
2109 temp4 = gen_reg_rtx (DImode);
2110 temp5 = gen_reg_rtx (DImode);
2113 emit_insn (gen_embmedany_textuhi (temp1, op1));
2114 emit_insn (gen_embmedany_texthi (temp2, op1));
2115 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2116 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2117 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2118 emit_insn (gen_rtx_SET (VOIDmode, temp5,
2119 gen_rtx_PLUS (DImode, temp4, temp2)));
2120 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2122 break;
2124 default:
2125 gcc_unreachable ();
2129 #if HOST_BITS_PER_WIDE_INT == 32
2130 static void
2131 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
2133 gcc_unreachable ();
2135 #else
2136 /* These avoid problems when cross compiling. If we do not
2137 go through all this hair then the optimizer will see
2138 invalid REG_EQUAL notes or in some cases none at all. */
2139 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2140 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2141 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2142 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2144 /* The optimizer is not to assume anything about exactly
2145 which bits are set for a HIGH, they are unspecified.
2146 Unfortunately this leads to many missed optimizations
2147 during CSE. We mask out the non-HIGH bits, and matches
2148 a plain movdi, to alleviate this problem. */
2149 static rtx
2150 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2152 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2155 static rtx
2156 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2158 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
2161 static rtx
2162 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2164 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2167 static rtx
2168 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2170 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2173 /* Worker routines for 64-bit constant formation on arch64.
2174 One of the key things to be doing in these emissions is
2175 to create as many temp REGs as possible. This makes it
2176 possible for half-built constants to be used later when
2177 such values are similar to something required later on.
2178 Without doing this, the optimizer cannot see such
2179 opportunities. */
2181 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2182 unsigned HOST_WIDE_INT, int);
2184 static void
2185 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2186 unsigned HOST_WIDE_INT low_bits, int is_neg)
2188 unsigned HOST_WIDE_INT high_bits;
2190 if (is_neg)
2191 high_bits = (~low_bits) & 0xffffffff;
2192 else
2193 high_bits = low_bits;
2195 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2196 if (!is_neg)
2198 emit_insn (gen_rtx_SET (VOIDmode, op0,
2199 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2201 else
2203 /* If we are XOR'ing with -1, then we should emit a one's complement
2204 instead. This way the combiner will notice logical operations
2205 such as ANDN later on and substitute. */
2206 if ((low_bits & 0x3ff) == 0x3ff)
2208 emit_insn (gen_rtx_SET (VOIDmode, op0,
2209 gen_rtx_NOT (DImode, temp)));
2211 else
2213 emit_insn (gen_rtx_SET (VOIDmode, op0,
2214 gen_safe_XOR64 (temp,
2215 (-(HOST_WIDE_INT)0x400
2216 | (low_bits & 0x3ff)))));
2221 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2222 unsigned HOST_WIDE_INT, int);
2224 static void
2225 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2226 unsigned HOST_WIDE_INT high_bits,
2227 unsigned HOST_WIDE_INT low_immediate,
2228 int shift_count)
2230 rtx temp2 = op0;
2232 if ((high_bits & 0xfffffc00) != 0)
2234 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2235 if ((high_bits & ~0xfffffc00) != 0)
2236 emit_insn (gen_rtx_SET (VOIDmode, op0,
2237 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2238 else
2239 temp2 = temp;
2241 else
2243 emit_insn (gen_safe_SET64 (temp, high_bits));
2244 temp2 = temp;
2247 /* Now shift it up into place. */
2248 emit_insn (gen_rtx_SET (VOIDmode, op0,
2249 gen_rtx_ASHIFT (DImode, temp2,
2250 GEN_INT (shift_count))));
2252 /* If there is a low immediate part piece, finish up by
2253 putting that in as well. */
2254 if (low_immediate != 0)
2255 emit_insn (gen_rtx_SET (VOIDmode, op0,
2256 gen_safe_OR64 (op0, low_immediate)));
2259 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2260 unsigned HOST_WIDE_INT);
2262 /* Full 64-bit constant decomposition. Even though this is the
2263 'worst' case, we still optimize a few things away. */
2264 static void
2265 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2266 unsigned HOST_WIDE_INT high_bits,
2267 unsigned HOST_WIDE_INT low_bits)
2269 rtx sub_temp = op0;
2271 if (can_create_pseudo_p ())
2272 sub_temp = gen_reg_rtx (DImode);
2274 if ((high_bits & 0xfffffc00) != 0)
2276 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2277 if ((high_bits & ~0xfffffc00) != 0)
2278 emit_insn (gen_rtx_SET (VOIDmode,
2279 sub_temp,
2280 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2281 else
2282 sub_temp = temp;
2284 else
2286 emit_insn (gen_safe_SET64 (temp, high_bits));
2287 sub_temp = temp;
2290 if (can_create_pseudo_p ())
2292 rtx temp2 = gen_reg_rtx (DImode);
2293 rtx temp3 = gen_reg_rtx (DImode);
2294 rtx temp4 = gen_reg_rtx (DImode);
2296 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2297 gen_rtx_ASHIFT (DImode, sub_temp,
2298 GEN_INT (32))));
2300 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2301 if ((low_bits & ~0xfffffc00) != 0)
2303 emit_insn (gen_rtx_SET (VOIDmode, temp3,
2304 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2305 emit_insn (gen_rtx_SET (VOIDmode, op0,
2306 gen_rtx_PLUS (DImode, temp4, temp3)));
2308 else
2310 emit_insn (gen_rtx_SET (VOIDmode, op0,
2311 gen_rtx_PLUS (DImode, temp4, temp2)));
2314 else
2316 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2317 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2318 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2319 int to_shift = 12;
2321 /* We are in the middle of reload, so this is really
2322 painful. However we do still make an attempt to
2323 avoid emitting truly stupid code. */
2324 if (low1 != const0_rtx)
2326 emit_insn (gen_rtx_SET (VOIDmode, op0,
2327 gen_rtx_ASHIFT (DImode, sub_temp,
2328 GEN_INT (to_shift))));
2329 emit_insn (gen_rtx_SET (VOIDmode, op0,
2330 gen_rtx_IOR (DImode, op0, low1)));
2331 sub_temp = op0;
2332 to_shift = 12;
2334 else
2336 to_shift += 12;
2338 if (low2 != const0_rtx)
2340 emit_insn (gen_rtx_SET (VOIDmode, op0,
2341 gen_rtx_ASHIFT (DImode, sub_temp,
2342 GEN_INT (to_shift))));
2343 emit_insn (gen_rtx_SET (VOIDmode, op0,
2344 gen_rtx_IOR (DImode, op0, low2)));
2345 sub_temp = op0;
2346 to_shift = 8;
2348 else
2350 to_shift += 8;
2352 emit_insn (gen_rtx_SET (VOIDmode, op0,
2353 gen_rtx_ASHIFT (DImode, sub_temp,
2354 GEN_INT (to_shift))));
2355 if (low3 != const0_rtx)
2356 emit_insn (gen_rtx_SET (VOIDmode, op0,
2357 gen_rtx_IOR (DImode, op0, low3)));
2358 /* phew... */
2362 /* Analyze a 64-bit constant for certain properties. */
2363 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2364 unsigned HOST_WIDE_INT,
2365 int *, int *, int *);
2367 static void
2368 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2369 unsigned HOST_WIDE_INT low_bits,
2370 int *hbsp, int *lbsp, int *abbasp)
2372 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2373 int i;
2375 lowest_bit_set = highest_bit_set = -1;
2376 i = 0;
2379 if ((lowest_bit_set == -1)
2380 && ((low_bits >> i) & 1))
2381 lowest_bit_set = i;
2382 if ((highest_bit_set == -1)
2383 && ((high_bits >> (32 - i - 1)) & 1))
2384 highest_bit_set = (64 - i - 1);
2386 while (++i < 32
2387 && ((highest_bit_set == -1)
2388 || (lowest_bit_set == -1)));
2389 if (i == 32)
2391 i = 0;
2394 if ((lowest_bit_set == -1)
2395 && ((high_bits >> i) & 1))
2396 lowest_bit_set = i + 32;
2397 if ((highest_bit_set == -1)
2398 && ((low_bits >> (32 - i - 1)) & 1))
2399 highest_bit_set = 32 - i - 1;
2401 while (++i < 32
2402 && ((highest_bit_set == -1)
2403 || (lowest_bit_set == -1)));
2405 /* If there are no bits set this should have gone out
2406 as one instruction! */
2407 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2408 all_bits_between_are_set = 1;
2409 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2411 if (i < 32)
2413 if ((low_bits & (1 << i)) != 0)
2414 continue;
2416 else
2418 if ((high_bits & (1 << (i - 32))) != 0)
2419 continue;
2421 all_bits_between_are_set = 0;
2422 break;
2424 *hbsp = highest_bit_set;
2425 *lbsp = lowest_bit_set;
2426 *abbasp = all_bits_between_are_set;
2429 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2431 static int
2432 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2433 unsigned HOST_WIDE_INT low_bits)
2435 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2437 if (high_bits == 0
2438 || high_bits == 0xffffffff)
2439 return 1;
2441 analyze_64bit_constant (high_bits, low_bits,
2442 &highest_bit_set, &lowest_bit_set,
2443 &all_bits_between_are_set);
2445 if ((highest_bit_set == 63
2446 || lowest_bit_set == 0)
2447 && all_bits_between_are_set != 0)
2448 return 1;
2450 if ((highest_bit_set - lowest_bit_set) < 21)
2451 return 1;
2453 return 0;
2456 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2457 unsigned HOST_WIDE_INT,
2458 int, int);
2460 static unsigned HOST_WIDE_INT
2461 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2462 unsigned HOST_WIDE_INT low_bits,
2463 int lowest_bit_set, int shift)
2465 HOST_WIDE_INT hi, lo;
2467 if (lowest_bit_set < 32)
2469 lo = (low_bits >> lowest_bit_set) << shift;
2470 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2472 else
2474 lo = 0;
2475 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2477 gcc_assert (! (hi & lo));
2478 return (hi | lo);
2481 /* Here we are sure to be arch64 and this is an integer constant
2482 being loaded into a register. Emit the most efficient
2483 insn sequence possible. Detection of all the 1-insn cases
2484 has been done already. */
2485 static void
2486 sparc_emit_set_const64 (rtx op0, rtx op1)
2488 unsigned HOST_WIDE_INT high_bits, low_bits;
2489 int lowest_bit_set, highest_bit_set;
2490 int all_bits_between_are_set;
2491 rtx temp = 0;
2493 /* Sanity check that we know what we are working with. */
2494 gcc_assert (TARGET_ARCH64
2495 && (GET_CODE (op0) == SUBREG
2496 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2498 if (! can_create_pseudo_p ())
2499 temp = op0;
2501 if (GET_CODE (op1) != CONST_INT)
2503 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2504 return;
2507 if (! temp)
2508 temp = gen_reg_rtx (DImode);
2510 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2511 low_bits = (INTVAL (op1) & 0xffffffff);
2513 /* low_bits bits 0 --> 31
2514 high_bits bits 32 --> 63 */
2516 analyze_64bit_constant (high_bits, low_bits,
2517 &highest_bit_set, &lowest_bit_set,
2518 &all_bits_between_are_set);
2520 /* First try for a 2-insn sequence. */
2522 /* These situations are preferred because the optimizer can
2523 * do more things with them:
2524 * 1) mov -1, %reg
2525 * sllx %reg, shift, %reg
2526 * 2) mov -1, %reg
2527 * srlx %reg, shift, %reg
2528 * 3) mov some_small_const, %reg
2529 * sllx %reg, shift, %reg
2531 if (((highest_bit_set == 63
2532 || lowest_bit_set == 0)
2533 && all_bits_between_are_set != 0)
2534 || ((highest_bit_set - lowest_bit_set) < 12))
2536 HOST_WIDE_INT the_const = -1;
2537 int shift = lowest_bit_set;
2539 if ((highest_bit_set != 63
2540 && lowest_bit_set != 0)
2541 || all_bits_between_are_set == 0)
2543 the_const =
2544 create_simple_focus_bits (high_bits, low_bits,
2545 lowest_bit_set, 0);
2547 else if (lowest_bit_set == 0)
2548 shift = -(63 - highest_bit_set);
2550 gcc_assert (SPARC_SIMM13_P (the_const));
2551 gcc_assert (shift != 0);
2553 emit_insn (gen_safe_SET64 (temp, the_const));
2554 if (shift > 0)
2555 emit_insn (gen_rtx_SET (VOIDmode,
2556 op0,
2557 gen_rtx_ASHIFT (DImode,
2558 temp,
2559 GEN_INT (shift))));
2560 else if (shift < 0)
2561 emit_insn (gen_rtx_SET (VOIDmode,
2562 op0,
2563 gen_rtx_LSHIFTRT (DImode,
2564 temp,
2565 GEN_INT (-shift))));
2566 return;
2569 /* Now a range of 22 or less bits set somewhere.
2570 * 1) sethi %hi(focus_bits), %reg
2571 * sllx %reg, shift, %reg
2572 * 2) sethi %hi(focus_bits), %reg
2573 * srlx %reg, shift, %reg
2575 if ((highest_bit_set - lowest_bit_set) < 21)
2577 unsigned HOST_WIDE_INT focus_bits =
2578 create_simple_focus_bits (high_bits, low_bits,
2579 lowest_bit_set, 10);
2581 gcc_assert (SPARC_SETHI_P (focus_bits));
2582 gcc_assert (lowest_bit_set != 10);
2584 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2586 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2587 if (lowest_bit_set < 10)
2588 emit_insn (gen_rtx_SET (VOIDmode,
2589 op0,
2590 gen_rtx_LSHIFTRT (DImode, temp,
2591 GEN_INT (10 - lowest_bit_set))));
2592 else if (lowest_bit_set > 10)
2593 emit_insn (gen_rtx_SET (VOIDmode,
2594 op0,
2595 gen_rtx_ASHIFT (DImode, temp,
2596 GEN_INT (lowest_bit_set - 10))));
2597 return;
2600 /* 1) sethi %hi(low_bits), %reg
2601 * or %reg, %lo(low_bits), %reg
2602 * 2) sethi %hi(~low_bits), %reg
2603 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2605 if (high_bits == 0
2606 || high_bits == 0xffffffff)
2608 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2609 (high_bits == 0xffffffff));
2610 return;
2613 /* Now, try 3-insn sequences. */
2615 /* 1) sethi %hi(high_bits), %reg
2616 * or %reg, %lo(high_bits), %reg
2617 * sllx %reg, 32, %reg
2619 if (low_bits == 0)
2621 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2622 return;
2625 /* We may be able to do something quick
2626 when the constant is negated, so try that. */
2627 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2628 (~low_bits) & 0xfffffc00))
2630 /* NOTE: The trailing bits get XOR'd so we need the
2631 non-negated bits, not the negated ones. */
2632 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2634 if ((((~high_bits) & 0xffffffff) == 0
2635 && ((~low_bits) & 0x80000000) == 0)
2636 || (((~high_bits) & 0xffffffff) == 0xffffffff
2637 && ((~low_bits) & 0x80000000) != 0))
2639 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2641 if ((SPARC_SETHI_P (fast_int)
2642 && (~high_bits & 0xffffffff) == 0)
2643 || SPARC_SIMM13_P (fast_int))
2644 emit_insn (gen_safe_SET64 (temp, fast_int));
2645 else
2646 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2648 else
2650 rtx negated_const;
2651 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2652 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2653 sparc_emit_set_const64 (temp, negated_const);
2656 /* If we are XOR'ing with -1, then we should emit a one's complement
2657 instead. This way the combiner will notice logical operations
2658 such as ANDN later on and substitute. */
2659 if (trailing_bits == 0x3ff)
2661 emit_insn (gen_rtx_SET (VOIDmode, op0,
2662 gen_rtx_NOT (DImode, temp)));
2664 else
2666 emit_insn (gen_rtx_SET (VOIDmode,
2667 op0,
2668 gen_safe_XOR64 (temp,
2669 (-0x400 | trailing_bits))));
2671 return;
2674 /* 1) sethi %hi(xxx), %reg
2675 * or %reg, %lo(xxx), %reg
2676 * sllx %reg, yyy, %reg
2678 * ??? This is just a generalized version of the low_bits==0
2679 * thing above, FIXME...
2681 if ((highest_bit_set - lowest_bit_set) < 32)
2683 unsigned HOST_WIDE_INT focus_bits =
2684 create_simple_focus_bits (high_bits, low_bits,
2685 lowest_bit_set, 0);
2687 /* We can't get here in this state. */
2688 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2690 /* So what we know is that the set bits straddle the
2691 middle of the 64-bit word. */
2692 sparc_emit_set_const64_quick2 (op0, temp,
2693 focus_bits, 0,
2694 lowest_bit_set);
2695 return;
2698 /* 1) sethi %hi(high_bits), %reg
2699 * or %reg, %lo(high_bits), %reg
2700 * sllx %reg, 32, %reg
2701 * or %reg, low_bits, %reg
2703 if (SPARC_SIMM13_P(low_bits)
2704 && ((int)low_bits > 0))
2706 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2707 return;
2710 /* The easiest way when all else fails, is full decomposition. */
2711 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2713 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
2715 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2716 return the mode to be used for the comparison. For floating-point,
2717 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
2718 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2719 processing is needed. */
2721 machine_mode
2722 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2724 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2726 switch (op)
2728 case EQ:
2729 case NE:
2730 case UNORDERED:
2731 case ORDERED:
2732 case UNLT:
2733 case UNLE:
2734 case UNGT:
2735 case UNGE:
2736 case UNEQ:
2737 case LTGT:
2738 return CCFPmode;
2740 case LT:
2741 case LE:
2742 case GT:
2743 case GE:
2744 return CCFPEmode;
2746 default:
2747 gcc_unreachable ();
2750 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2751 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2753 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2754 return CCX_NOOVmode;
2755 else
2756 return CC_NOOVmode;
2758 else
2760 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2761 return CCXmode;
2762 else
2763 return CCmode;
2767 /* Emit the compare insn and return the CC reg for a CODE comparison
2768 with operands X and Y. */
2770 static rtx
2771 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2773 machine_mode mode;
2774 rtx cc_reg;
2776 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2777 return x;
2779 mode = SELECT_CC_MODE (code, x, y);
2781 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2782 fcc regs (cse can't tell they're really call clobbered regs and will
2783 remove a duplicate comparison even if there is an intervening function
2784 call - it will then try to reload the cc reg via an int reg which is why
2785 we need the movcc patterns). It is possible to provide the movcc
2786 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2787 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2788 to tell cse that CCFPE mode registers (even pseudos) are call
2789 clobbered. */
2791 /* ??? This is an experiment. Rather than making changes to cse which may
2792 or may not be easy/clean, we do our own cse. This is possible because
2793 we will generate hard registers. Cse knows they're call clobbered (it
2794 doesn't know the same thing about pseudos). If we guess wrong, no big
2795 deal, but if we win, great! */
2797 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2798 #if 1 /* experiment */
2800 int reg;
2801 /* We cycle through the registers to ensure they're all exercised. */
2802 static int next_fcc_reg = 0;
2803 /* Previous x,y for each fcc reg. */
2804 static rtx prev_args[4][2];
2806 /* Scan prev_args for x,y. */
2807 for (reg = 0; reg < 4; reg++)
2808 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2809 break;
2810 if (reg == 4)
2812 reg = next_fcc_reg;
2813 prev_args[reg][0] = x;
2814 prev_args[reg][1] = y;
2815 next_fcc_reg = (next_fcc_reg + 1) & 3;
2817 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2819 #else
2820 cc_reg = gen_reg_rtx (mode);
2821 #endif /* ! experiment */
2822 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2823 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2824 else
2825 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2827 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2828 will only result in an unrecognizable insn so no point in asserting. */
2829 emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2831 return cc_reg;
2835 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2838 gen_compare_reg (rtx cmp)
2840 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2843 /* This function is used for v9 only.
2844 DEST is the target of the Scc insn.
2845 CODE is the code for an Scc's comparison.
2846 X and Y are the values we compare.
2848 This function is needed to turn
2850 (set (reg:SI 110)
2851 (gt (reg:CCX 100 %icc)
2852 (const_int 0)))
2853 into
2854 (set (reg:SI 110)
2855 (gt:DI (reg:CCX 100 %icc)
2856 (const_int 0)))
2858 IE: The instruction recognizer needs to see the mode of the comparison to
2859 find the right instruction. We could use "gt:DI" right in the
2860 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2862 static int
2863 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2865 if (! TARGET_ARCH64
2866 && (GET_MODE (x) == DImode
2867 || GET_MODE (dest) == DImode))
2868 return 0;
2870 /* Try to use the movrCC insns. */
2871 if (TARGET_ARCH64
2872 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2873 && y == const0_rtx
2874 && v9_regcmp_p (compare_code))
2876 rtx op0 = x;
2877 rtx temp;
2879 /* Special case for op0 != 0. This can be done with one instruction if
2880 dest == x. */
2882 if (compare_code == NE
2883 && GET_MODE (dest) == DImode
2884 && rtx_equal_p (op0, dest))
2886 emit_insn (gen_rtx_SET (VOIDmode, dest,
2887 gen_rtx_IF_THEN_ELSE (DImode,
2888 gen_rtx_fmt_ee (compare_code, DImode,
2889 op0, const0_rtx),
2890 const1_rtx,
2891 dest)));
2892 return 1;
2895 if (reg_overlap_mentioned_p (dest, op0))
2897 /* Handle the case where dest == x.
2898 We "early clobber" the result. */
2899 op0 = gen_reg_rtx (GET_MODE (x));
2900 emit_move_insn (op0, x);
2903 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2904 if (GET_MODE (op0) != DImode)
2906 temp = gen_reg_rtx (DImode);
2907 convert_move (temp, op0, 0);
2909 else
2910 temp = op0;
2911 emit_insn (gen_rtx_SET (VOIDmode, dest,
2912 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2913 gen_rtx_fmt_ee (compare_code, DImode,
2914 temp, const0_rtx),
2915 const1_rtx,
2916 dest)));
2917 return 1;
2919 else
2921 x = gen_compare_reg_1 (compare_code, x, y);
2922 y = const0_rtx;
2924 gcc_assert (GET_MODE (x) != CC_NOOVmode
2925 && GET_MODE (x) != CCX_NOOVmode);
2927 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2928 emit_insn (gen_rtx_SET (VOIDmode, dest,
2929 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2930 gen_rtx_fmt_ee (compare_code,
2931 GET_MODE (x), x, y),
2932 const1_rtx, dest)));
2933 return 1;
2938 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2939 without jumps using the addx/subx instructions. */
2941 bool
2942 emit_scc_insn (rtx operands[])
2944 rtx tem;
2945 rtx x;
2946 rtx y;
2947 enum rtx_code code;
2949 /* The quad-word fp compare library routines all return nonzero to indicate
2950 true, which is different from the equivalent libgcc routines, so we must
2951 handle them specially here. */
2952 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2954 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2955 GET_CODE (operands[1]));
2956 operands[2] = XEXP (operands[1], 0);
2957 operands[3] = XEXP (operands[1], 1);
2960 code = GET_CODE (operands[1]);
2961 x = operands[2];
2962 y = operands[3];
2964 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2965 more applications). The exception to this is "reg != 0" which can
2966 be done in one instruction on v9 (so we do it). */
2967 if (code == EQ)
2969 if (GET_MODE (x) == SImode)
2971 rtx pat;
2972 if (TARGET_ARCH64)
2973 pat = gen_seqsidi_special (operands[0], x, y);
2974 else
2975 pat = gen_seqsisi_special (operands[0], x, y);
2976 emit_insn (pat);
2977 return true;
2979 else if (GET_MODE (x) == DImode)
2981 rtx pat = gen_seqdi_special (operands[0], x, y);
2982 emit_insn (pat);
2983 return true;
2987 if (code == NE)
2989 if (GET_MODE (x) == SImode)
2991 rtx pat;
2992 if (TARGET_ARCH64)
2993 pat = gen_snesidi_special (operands[0], x, y);
2994 else
2995 pat = gen_snesisi_special (operands[0], x, y);
2996 emit_insn (pat);
2997 return true;
2999 else if (GET_MODE (x) == DImode)
3001 rtx pat;
3002 if (TARGET_VIS3)
3003 pat = gen_snedi_special_vis3 (operands[0], x, y);
3004 else
3005 pat = gen_snedi_special (operands[0], x, y);
3006 emit_insn (pat);
3007 return true;
3011 if (TARGET_V9
3012 && TARGET_ARCH64
3013 && GET_MODE (x) == DImode
3014 && !(TARGET_VIS3
3015 && (code == GTU || code == LTU))
3016 && gen_v9_scc (operands[0], code, x, y))
3017 return true;
3019 /* We can do LTU and GEU using the addx/subx instructions too. And
3020 for GTU/LEU, if both operands are registers swap them and fall
3021 back to the easy case. */
3022 if (code == GTU || code == LEU)
3024 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3025 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3027 tem = x;
3028 x = y;
3029 y = tem;
3030 code = swap_condition (code);
3034 if (code == LTU
3035 || (!TARGET_VIS3 && code == GEU))
3037 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3038 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3039 gen_compare_reg_1 (code, x, y),
3040 const0_rtx)));
3041 return true;
3044 /* All the posibilities to use addx/subx based sequences has been
3045 exhausted, try for a 3 instruction sequence using v9 conditional
3046 moves. */
3047 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3048 return true;
3050 /* Nope, do branches. */
3051 return false;
3054 /* Emit a conditional jump insn for the v9 architecture using comparison code
3055 CODE and jump target LABEL.
3056 This function exists to take advantage of the v9 brxx insns. */
3058 static void
3059 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3061 emit_jump_insn (gen_rtx_SET (VOIDmode,
3062 pc_rtx,
3063 gen_rtx_IF_THEN_ELSE (VOIDmode,
3064 gen_rtx_fmt_ee (code, GET_MODE (op0),
3065 op0, const0_rtx),
3066 gen_rtx_LABEL_REF (VOIDmode, label),
3067 pc_rtx)));
3070 /* Emit a conditional jump insn for the UA2011 architecture using
3071 comparison code CODE and jump target LABEL. This function exists
3072 to take advantage of the UA2011 Compare and Branch insns. */
3074 static void
3075 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3077 rtx if_then_else;
3079 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3080 gen_rtx_fmt_ee(code, GET_MODE(op0),
3081 op0, op1),
3082 gen_rtx_LABEL_REF (VOIDmode, label),
3083 pc_rtx);
3085 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, if_then_else));
3088 void
3089 emit_conditional_branch_insn (rtx operands[])
3091 /* The quad-word fp compare library routines all return nonzero to indicate
3092 true, which is different from the equivalent libgcc routines, so we must
3093 handle them specially here. */
3094 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3096 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3097 GET_CODE (operands[0]));
3098 operands[1] = XEXP (operands[0], 0);
3099 operands[2] = XEXP (operands[0], 1);
3102 /* If we can tell early on that the comparison is against a constant
3103 that won't fit in the 5-bit signed immediate field of a cbcond,
3104 use one of the other v9 conditional branch sequences. */
3105 if (TARGET_CBCOND
3106 && GET_CODE (operands[1]) == REG
3107 && (GET_MODE (operands[1]) == SImode
3108 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3109 && (GET_CODE (operands[2]) != CONST_INT
3110 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3112 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3113 return;
3116 if (TARGET_ARCH64 && operands[2] == const0_rtx
3117 && GET_CODE (operands[1]) == REG
3118 && GET_MODE (operands[1]) == DImode)
3120 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3121 return;
3124 operands[1] = gen_compare_reg (operands[0]);
3125 operands[2] = const0_rtx;
3126 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3127 operands[1], operands[2]);
3128 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3129 operands[3]));
3133 /* Generate a DFmode part of a hard TFmode register.
3134 REG is the TFmode hard register, LOW is 1 for the
3135 low 64bit of the register and 0 otherwise.
3138 gen_df_reg (rtx reg, int low)
3140 int regno = REGNO (reg);
3142 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3143 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3144 return gen_rtx_REG (DFmode, regno);
3147 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3148 Unlike normal calls, TFmode operands are passed by reference. It is
3149 assumed that no more than 3 operands are required. */
3151 static void
3152 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3154 rtx ret_slot = NULL, arg[3], func_sym;
3155 int i;
3157 /* We only expect to be called for conversions, unary, and binary ops. */
3158 gcc_assert (nargs == 2 || nargs == 3);
3160 for (i = 0; i < nargs; ++i)
3162 rtx this_arg = operands[i];
3163 rtx this_slot;
3165 /* TFmode arguments and return values are passed by reference. */
3166 if (GET_MODE (this_arg) == TFmode)
3168 int force_stack_temp;
3170 force_stack_temp = 0;
3171 if (TARGET_BUGGY_QP_LIB && i == 0)
3172 force_stack_temp = 1;
3174 if (GET_CODE (this_arg) == MEM
3175 && ! force_stack_temp)
3177 tree expr = MEM_EXPR (this_arg);
3178 if (expr)
3179 mark_addressable (expr);
3180 this_arg = XEXP (this_arg, 0);
3182 else if (CONSTANT_P (this_arg)
3183 && ! force_stack_temp)
3185 this_slot = force_const_mem (TFmode, this_arg);
3186 this_arg = XEXP (this_slot, 0);
3188 else
3190 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3192 /* Operand 0 is the return value. We'll copy it out later. */
3193 if (i > 0)
3194 emit_move_insn (this_slot, this_arg);
3195 else
3196 ret_slot = this_slot;
3198 this_arg = XEXP (this_slot, 0);
3202 arg[i] = this_arg;
3205 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3207 if (GET_MODE (operands[0]) == TFmode)
3209 if (nargs == 2)
3210 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3211 arg[0], GET_MODE (arg[0]),
3212 arg[1], GET_MODE (arg[1]));
3213 else
3214 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3215 arg[0], GET_MODE (arg[0]),
3216 arg[1], GET_MODE (arg[1]),
3217 arg[2], GET_MODE (arg[2]));
3219 if (ret_slot)
3220 emit_move_insn (operands[0], ret_slot);
3222 else
3224 rtx ret;
3226 gcc_assert (nargs == 2);
3228 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3229 GET_MODE (operands[0]), 1,
3230 arg[1], GET_MODE (arg[1]));
3232 if (ret != operands[0])
3233 emit_move_insn (operands[0], ret);
3237 /* Expand soft-float TFmode calls to sparc abi routines. */
3239 static void
3240 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3242 const char *func;
3244 switch (code)
3246 case PLUS:
3247 func = "_Qp_add";
3248 break;
3249 case MINUS:
3250 func = "_Qp_sub";
3251 break;
3252 case MULT:
3253 func = "_Qp_mul";
3254 break;
3255 case DIV:
3256 func = "_Qp_div";
3257 break;
3258 default:
3259 gcc_unreachable ();
3262 emit_soft_tfmode_libcall (func, 3, operands);
3265 static void
3266 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3268 const char *func;
3270 gcc_assert (code == SQRT);
3271 func = "_Qp_sqrt";
3273 emit_soft_tfmode_libcall (func, 2, operands);
3276 static void
3277 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3279 const char *func;
3281 switch (code)
3283 case FLOAT_EXTEND:
3284 switch (GET_MODE (operands[1]))
3286 case SFmode:
3287 func = "_Qp_stoq";
3288 break;
3289 case DFmode:
3290 func = "_Qp_dtoq";
3291 break;
3292 default:
3293 gcc_unreachable ();
3295 break;
3297 case FLOAT_TRUNCATE:
3298 switch (GET_MODE (operands[0]))
3300 case SFmode:
3301 func = "_Qp_qtos";
3302 break;
3303 case DFmode:
3304 func = "_Qp_qtod";
3305 break;
3306 default:
3307 gcc_unreachable ();
3309 break;
3311 case FLOAT:
3312 switch (GET_MODE (operands[1]))
3314 case SImode:
3315 func = "_Qp_itoq";
3316 if (TARGET_ARCH64)
3317 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3318 break;
3319 case DImode:
3320 func = "_Qp_xtoq";
3321 break;
3322 default:
3323 gcc_unreachable ();
3325 break;
3327 case UNSIGNED_FLOAT:
3328 switch (GET_MODE (operands[1]))
3330 case SImode:
3331 func = "_Qp_uitoq";
3332 if (TARGET_ARCH64)
3333 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3334 break;
3335 case DImode:
3336 func = "_Qp_uxtoq";
3337 break;
3338 default:
3339 gcc_unreachable ();
3341 break;
3343 case FIX:
3344 switch (GET_MODE (operands[0]))
3346 case SImode:
3347 func = "_Qp_qtoi";
3348 break;
3349 case DImode:
3350 func = "_Qp_qtox";
3351 break;
3352 default:
3353 gcc_unreachable ();
3355 break;
3357 case UNSIGNED_FIX:
3358 switch (GET_MODE (operands[0]))
3360 case SImode:
3361 func = "_Qp_qtoui";
3362 break;
3363 case DImode:
3364 func = "_Qp_qtoux";
3365 break;
3366 default:
3367 gcc_unreachable ();
3369 break;
3371 default:
3372 gcc_unreachable ();
3375 emit_soft_tfmode_libcall (func, 2, operands);
3378 /* Expand a hard-float tfmode operation. All arguments must be in
3379 registers. */
3381 static void
3382 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3384 rtx op, dest;
3386 if (GET_RTX_CLASS (code) == RTX_UNARY)
3388 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3389 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3391 else
3393 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3394 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3395 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3396 operands[1], operands[2]);
3399 if (register_operand (operands[0], VOIDmode))
3400 dest = operands[0];
3401 else
3402 dest = gen_reg_rtx (GET_MODE (operands[0]));
3404 emit_insn (gen_rtx_SET (VOIDmode, dest, op));
3406 if (dest != operands[0])
3407 emit_move_insn (operands[0], dest);
3410 void
3411 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3413 if (TARGET_HARD_QUAD)
3414 emit_hard_tfmode_operation (code, operands);
3415 else
3416 emit_soft_tfmode_binop (code, operands);
3419 void
3420 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3422 if (TARGET_HARD_QUAD)
3423 emit_hard_tfmode_operation (code, operands);
3424 else
3425 emit_soft_tfmode_unop (code, operands);
3428 void
3429 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3431 if (TARGET_HARD_QUAD)
3432 emit_hard_tfmode_operation (code, operands);
3433 else
3434 emit_soft_tfmode_cvt (code, operands);
3437 /* Return nonzero if a branch/jump/call instruction will be emitting
3438 nop into its delay slot. */
3441 empty_delay_slot (rtx_insn *insn)
3443 rtx seq;
3445 /* If no previous instruction (should not happen), return true. */
3446 if (PREV_INSN (insn) == NULL)
3447 return 1;
3449 seq = NEXT_INSN (PREV_INSN (insn));
3450 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3451 return 0;
3453 return 1;
3456 /* Return nonzero if we should emit a nop after a cbcond instruction.
3457 The cbcond instruction does not have a delay slot, however there is
3458 a severe performance penalty if a control transfer appears right
3459 after a cbcond. Therefore we emit a nop when we detect this
3460 situation. */
3463 emit_cbcond_nop (rtx insn)
3465 rtx next = next_active_insn (insn);
3467 if (!next)
3468 return 1;
3470 if (NONJUMP_INSN_P (next)
3471 && GET_CODE (PATTERN (next)) == SEQUENCE)
3472 next = XVECEXP (PATTERN (next), 0, 0);
3473 else if (CALL_P (next)
3474 && GET_CODE (PATTERN (next)) == PARALLEL)
3476 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3478 if (GET_CODE (delay) == RETURN)
3480 /* It's a sibling call. Do not emit the nop if we're going
3481 to emit something other than the jump itself as the first
3482 instruction of the sibcall sequence. */
3483 if (sparc_leaf_function_p || TARGET_FLAT)
3484 return 0;
3488 if (NONJUMP_INSN_P (next))
3489 return 0;
3491 return 1;
3494 /* Return nonzero if TRIAL can go into the call delay slot. */
3497 eligible_for_call_delay (rtx_insn *trial)
3499 rtx pat;
3501 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3502 return 0;
3504 /* Binutils allows
3505 call __tls_get_addr, %tgd_call (foo)
3506 add %l7, %o0, %o0, %tgd_add (foo)
3507 while Sun as/ld does not. */
3508 if (TARGET_GNU_TLS || !TARGET_TLS)
3509 return 1;
3511 pat = PATTERN (trial);
3513 /* We must reject tgd_add{32|64}, i.e.
3514 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3515 and tldm_add{32|64}, i.e.
3516 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3517 for Sun as/ld. */
3518 if (GET_CODE (pat) == SET
3519 && GET_CODE (SET_SRC (pat)) == PLUS)
3521 rtx unspec = XEXP (SET_SRC (pat), 1);
3523 if (GET_CODE (unspec) == UNSPEC
3524 && (XINT (unspec, 1) == UNSPEC_TLSGD
3525 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3526 return 0;
3529 return 1;
3532 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3533 instruction. RETURN_P is true if the v9 variant 'return' is to be
3534 considered in the test too.
3536 TRIAL must be a SET whose destination is a REG appropriate for the
3537 'restore' instruction or, if RETURN_P is true, for the 'return'
3538 instruction. */
3540 static int
3541 eligible_for_restore_insn (rtx trial, bool return_p)
3543 rtx pat = PATTERN (trial);
3544 rtx src = SET_SRC (pat);
3545 bool src_is_freg = false;
3546 rtx src_reg;
3548 /* Since we now can do moves between float and integer registers when
3549 VIS3 is enabled, we have to catch this case. We can allow such
3550 moves when doing a 'return' however. */
3551 src_reg = src;
3552 if (GET_CODE (src_reg) == SUBREG)
3553 src_reg = SUBREG_REG (src_reg);
3554 if (GET_CODE (src_reg) == REG
3555 && SPARC_FP_REG_P (REGNO (src_reg)))
3556 src_is_freg = true;
3558 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3559 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3560 && arith_operand (src, GET_MODE (src))
3561 && ! src_is_freg)
3563 if (TARGET_ARCH64)
3564 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3565 else
3566 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3569 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3570 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3571 && arith_double_operand (src, GET_MODE (src))
3572 && ! src_is_freg)
3573 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3575 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3576 else if (! TARGET_FPU && register_operand (src, SFmode))
3577 return 1;
3579 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3580 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3581 return 1;
3583 /* If we have the 'return' instruction, anything that does not use
3584 local or output registers and can go into a delay slot wins. */
3585 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3586 return 1;
3588 /* The 'restore src1,src2,dest' pattern for SImode. */
3589 else if (GET_CODE (src) == PLUS
3590 && register_operand (XEXP (src, 0), SImode)
3591 && arith_operand (XEXP (src, 1), SImode))
3592 return 1;
3594 /* The 'restore src1,src2,dest' pattern for DImode. */
3595 else if (GET_CODE (src) == PLUS
3596 && register_operand (XEXP (src, 0), DImode)
3597 && arith_double_operand (XEXP (src, 1), DImode))
3598 return 1;
3600 /* The 'restore src1,%lo(src2),dest' pattern. */
3601 else if (GET_CODE (src) == LO_SUM
3602 && ! TARGET_CM_MEDMID
3603 && ((register_operand (XEXP (src, 0), SImode)
3604 && immediate_operand (XEXP (src, 1), SImode))
3605 || (TARGET_ARCH64
3606 && register_operand (XEXP (src, 0), DImode)
3607 && immediate_operand (XEXP (src, 1), DImode))))
3608 return 1;
3610 /* The 'restore src,src,dest' pattern. */
3611 else if (GET_CODE (src) == ASHIFT
3612 && (register_operand (XEXP (src, 0), SImode)
3613 || register_operand (XEXP (src, 0), DImode))
3614 && XEXP (src, 1) == const1_rtx)
3615 return 1;
3617 return 0;
3620 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3623 eligible_for_return_delay (rtx_insn *trial)
3625 int regno;
3626 rtx pat;
3628 /* If the function uses __builtin_eh_return, the eh_return machinery
3629 occupies the delay slot. */
3630 if (crtl->calls_eh_return)
3631 return 0;
3633 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3634 return 0;
3636 /* In the case of a leaf or flat function, anything can go into the slot. */
3637 if (sparc_leaf_function_p || TARGET_FLAT)
3638 return 1;
3640 if (!NONJUMP_INSN_P (trial))
3641 return 0;
3643 pat = PATTERN (trial);
3644 if (GET_CODE (pat) == PARALLEL)
3646 int i;
3648 if (! TARGET_V9)
3649 return 0;
3650 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3652 rtx expr = XVECEXP (pat, 0, i);
3653 if (GET_CODE (expr) != SET)
3654 return 0;
3655 if (GET_CODE (SET_DEST (expr)) != REG)
3656 return 0;
3657 regno = REGNO (SET_DEST (expr));
3658 if (regno >= 8 && regno < 24)
3659 return 0;
3661 return !epilogue_renumber (&pat, 1);
3664 if (GET_CODE (pat) != SET)
3665 return 0;
3667 if (GET_CODE (SET_DEST (pat)) != REG)
3668 return 0;
3670 regno = REGNO (SET_DEST (pat));
3672 /* Otherwise, only operations which can be done in tandem with
3673 a `restore' or `return' insn can go into the delay slot. */
3674 if (regno >= 8 && regno < 24)
3675 return 0;
3677 /* If this instruction sets up floating point register and we have a return
3678 instruction, it can probably go in. But restore will not work
3679 with FP_REGS. */
3680 if (! SPARC_INT_REG_P (regno))
3681 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3683 return eligible_for_restore_insn (trial, true);
3686 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3689 eligible_for_sibcall_delay (rtx_insn *trial)
3691 rtx pat;
3693 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3694 return 0;
3696 if (!NONJUMP_INSN_P (trial))
3697 return 0;
3699 pat = PATTERN (trial);
3701 if (sparc_leaf_function_p || TARGET_FLAT)
3703 /* If the tail call is done using the call instruction,
3704 we have to restore %o7 in the delay slot. */
3705 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3706 return 0;
3708 /* %g1 is used to build the function address */
3709 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3710 return 0;
3712 return 1;
3715 if (GET_CODE (pat) != SET)
3716 return 0;
3718 /* Otherwise, only operations which can be done in tandem with
3719 a `restore' insn can go into the delay slot. */
3720 if (GET_CODE (SET_DEST (pat)) != REG
3721 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3722 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3723 return 0;
3725 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3726 in most cases. */
3727 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3728 return 0;
3730 return eligible_for_restore_insn (trial, false);
3733 /* Determine if it's legal to put X into the constant pool. This
3734 is not possible if X contains the address of a symbol that is
3735 not constant (TLS) or not known at final link time (PIC). */
3737 static bool
3738 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
3740 switch (GET_CODE (x))
3742 case CONST_INT:
3743 case CONST_DOUBLE:
3744 case CONST_VECTOR:
3745 /* Accept all non-symbolic constants. */
3746 return false;
3748 case LABEL_REF:
3749 /* Labels are OK iff we are non-PIC. */
3750 return flag_pic != 0;
3752 case SYMBOL_REF:
3753 /* 'Naked' TLS symbol references are never OK,
3754 non-TLS symbols are OK iff we are non-PIC. */
3755 if (SYMBOL_REF_TLS_MODEL (x))
3756 return true;
3757 else
3758 return flag_pic != 0;
3760 case CONST:
3761 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3762 case PLUS:
3763 case MINUS:
3764 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3765 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3766 case UNSPEC:
3767 return true;
3768 default:
3769 gcc_unreachable ();
3773 /* Global Offset Table support. */
3774 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3775 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3777 /* Return the SYMBOL_REF for the Global Offset Table. */
3779 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3781 static rtx
3782 sparc_got (void)
3784 if (!sparc_got_symbol)
3785 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3787 return sparc_got_symbol;
3790 /* Ensure that we are not using patterns that are not OK with PIC. */
3793 check_pic (int i)
3795 rtx op;
3797 switch (flag_pic)
3799 case 1:
3800 op = recog_data.operand[i];
3801 gcc_assert (GET_CODE (op) != SYMBOL_REF
3802 && (GET_CODE (op) != CONST
3803 || (GET_CODE (XEXP (op, 0)) == MINUS
3804 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3805 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3806 case 2:
3807 default:
3808 return 1;
3812 /* Return true if X is an address which needs a temporary register when
3813 reloaded while generating PIC code. */
3816 pic_address_needs_scratch (rtx x)
3818 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3819 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3820 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3821 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3822 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3823 return 1;
3825 return 0;
3828 /* Determine if a given RTX is a valid constant. We already know this
3829 satisfies CONSTANT_P. */
3831 static bool
3832 sparc_legitimate_constant_p (machine_mode mode, rtx x)
3834 switch (GET_CODE (x))
3836 case CONST:
3837 case SYMBOL_REF:
3838 if (sparc_tls_referenced_p (x))
3839 return false;
3840 break;
3842 case CONST_DOUBLE:
3843 if (GET_MODE (x) == VOIDmode)
3844 return true;
3846 /* Floating point constants are generally not ok.
3847 The only exception is 0.0 and all-ones in VIS. */
3848 if (TARGET_VIS
3849 && SCALAR_FLOAT_MODE_P (mode)
3850 && (const_zero_operand (x, mode)
3851 || const_all_ones_operand (x, mode)))
3852 return true;
3854 return false;
3856 case CONST_VECTOR:
3857 /* Vector constants are generally not ok.
3858 The only exception is 0 or -1 in VIS. */
3859 if (TARGET_VIS
3860 && (const_zero_operand (x, mode)
3861 || const_all_ones_operand (x, mode)))
3862 return true;
3864 return false;
3866 default:
3867 break;
3870 return true;
3873 /* Determine if a given RTX is a valid constant address. */
3875 bool
3876 constant_address_p (rtx x)
3878 switch (GET_CODE (x))
3880 case LABEL_REF:
3881 case CONST_INT:
3882 case HIGH:
3883 return true;
3885 case CONST:
3886 if (flag_pic && pic_address_needs_scratch (x))
3887 return false;
3888 return sparc_legitimate_constant_p (Pmode, x);
3890 case SYMBOL_REF:
3891 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3893 default:
3894 return false;
3898 /* Nonzero if the constant value X is a legitimate general operand
3899 when generating PIC code. It is given that flag_pic is on and
3900 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
3902 bool
3903 legitimate_pic_operand_p (rtx x)
3905 if (pic_address_needs_scratch (x))
3906 return false;
3907 if (sparc_tls_referenced_p (x))
3908 return false;
3909 return true;
3912 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3913 (CONST_INT_P (X) \
3914 && INTVAL (X) >= -0x1000 \
3915 && INTVAL (X) < (0x1000 - GET_MODE_SIZE (MODE)))
3917 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3918 (CONST_INT_P (X) \
3919 && INTVAL (X) >= -0x1000 \
3920 && INTVAL (X) < (0xc00 - GET_MODE_SIZE (MODE)))
3922 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3924 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3925 ordinarily. This changes a bit when generating PIC. */
3927 static bool
3928 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
3930 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3932 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3933 rs1 = addr;
3934 else if (GET_CODE (addr) == PLUS)
3936 rs1 = XEXP (addr, 0);
3937 rs2 = XEXP (addr, 1);
3939 /* Canonicalize. REG comes first, if there are no regs,
3940 LO_SUM comes first. */
3941 if (!REG_P (rs1)
3942 && GET_CODE (rs1) != SUBREG
3943 && (REG_P (rs2)
3944 || GET_CODE (rs2) == SUBREG
3945 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3947 rs1 = XEXP (addr, 1);
3948 rs2 = XEXP (addr, 0);
3951 if ((flag_pic == 1
3952 && rs1 == pic_offset_table_rtx
3953 && !REG_P (rs2)
3954 && GET_CODE (rs2) != SUBREG
3955 && GET_CODE (rs2) != LO_SUM
3956 && GET_CODE (rs2) != MEM
3957 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3958 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3959 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3960 || ((REG_P (rs1)
3961 || GET_CODE (rs1) == SUBREG)
3962 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3964 imm1 = rs2;
3965 rs2 = NULL;
3967 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3968 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3970 /* We prohibit REG + REG for TFmode when there are no quad move insns
3971 and we consequently need to split. We do this because REG+REG
3972 is not an offsettable address. If we get the situation in reload
3973 where source and destination of a movtf pattern are both MEMs with
3974 REG+REG address, then only one of them gets converted to an
3975 offsettable address. */
3976 if (mode == TFmode
3977 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
3978 return 0;
3980 /* Likewise for TImode, but in all cases. */
3981 if (mode == TImode)
3982 return 0;
3984 /* We prohibit REG + REG on ARCH32 if not optimizing for
3985 DFmode/DImode because then mem_min_alignment is likely to be zero
3986 after reload and the forced split would lack a matching splitter
3987 pattern. */
3988 if (TARGET_ARCH32 && !optimize
3989 && (mode == DFmode || mode == DImode))
3990 return 0;
3992 else if (USE_AS_OFFSETABLE_LO10
3993 && GET_CODE (rs1) == LO_SUM
3994 && TARGET_ARCH64
3995 && ! TARGET_CM_MEDMID
3996 && RTX_OK_FOR_OLO10_P (rs2, mode))
3998 rs2 = NULL;
3999 imm1 = XEXP (rs1, 1);
4000 rs1 = XEXP (rs1, 0);
4001 if (!CONSTANT_P (imm1)
4002 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4003 return 0;
4006 else if (GET_CODE (addr) == LO_SUM)
4008 rs1 = XEXP (addr, 0);
4009 imm1 = XEXP (addr, 1);
4011 if (!CONSTANT_P (imm1)
4012 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4013 return 0;
4015 /* We can't allow TFmode in 32-bit mode, because an offset greater
4016 than the alignment (8) may cause the LO_SUM to overflow. */
4017 if (mode == TFmode && TARGET_ARCH32)
4018 return 0;
4020 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4021 return 1;
4022 else
4023 return 0;
4025 if (GET_CODE (rs1) == SUBREG)
4026 rs1 = SUBREG_REG (rs1);
4027 if (!REG_P (rs1))
4028 return 0;
4030 if (rs2)
4032 if (GET_CODE (rs2) == SUBREG)
4033 rs2 = SUBREG_REG (rs2);
4034 if (!REG_P (rs2))
4035 return 0;
4038 if (strict)
4040 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4041 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4042 return 0;
4044 else
4046 if ((! SPARC_INT_REG_P (REGNO (rs1))
4047 && REGNO (rs1) != FRAME_POINTER_REGNUM
4048 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4049 || (rs2
4050 && (! SPARC_INT_REG_P (REGNO (rs2))
4051 && REGNO (rs2) != FRAME_POINTER_REGNUM
4052 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4053 return 0;
4055 return 1;
4058 /* Return the SYMBOL_REF for the tls_get_addr function. */
4060 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4062 static rtx
4063 sparc_tls_get_addr (void)
4065 if (!sparc_tls_symbol)
4066 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4068 return sparc_tls_symbol;
4071 /* Return the Global Offset Table to be used in TLS mode. */
4073 static rtx
4074 sparc_tls_got (void)
4076 /* In PIC mode, this is just the PIC offset table. */
4077 if (flag_pic)
4079 crtl->uses_pic_offset_table = 1;
4080 return pic_offset_table_rtx;
4083 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4084 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4085 if (TARGET_SUN_TLS && TARGET_ARCH32)
4087 load_got_register ();
4088 return global_offset_table_rtx;
4091 /* In all other cases, we load a new pseudo with the GOT symbol. */
4092 return copy_to_reg (sparc_got ());
4095 /* Return true if X contains a thread-local symbol. */
4097 static bool
4098 sparc_tls_referenced_p (rtx x)
4100 if (!TARGET_HAVE_TLS)
4101 return false;
4103 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4104 x = XEXP (XEXP (x, 0), 0);
4106 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4107 return true;
4109 /* That's all we handle in sparc_legitimize_tls_address for now. */
4110 return false;
4113 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4114 this (thread-local) address. */
4116 static rtx
4117 sparc_legitimize_tls_address (rtx addr)
4119 rtx temp1, temp2, temp3, ret, o0, got;
4120 rtx_insn *insn;
4122 gcc_assert (can_create_pseudo_p ());
4124 if (GET_CODE (addr) == SYMBOL_REF)
4125 switch (SYMBOL_REF_TLS_MODEL (addr))
4127 case TLS_MODEL_GLOBAL_DYNAMIC:
4128 start_sequence ();
4129 temp1 = gen_reg_rtx (SImode);
4130 temp2 = gen_reg_rtx (SImode);
4131 ret = gen_reg_rtx (Pmode);
4132 o0 = gen_rtx_REG (Pmode, 8);
4133 got = sparc_tls_got ();
4134 emit_insn (gen_tgd_hi22 (temp1, addr));
4135 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4136 if (TARGET_ARCH32)
4138 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4139 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4140 addr, const1_rtx));
4142 else
4144 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4145 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4146 addr, const1_rtx));
4148 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4149 insn = get_insns ();
4150 end_sequence ();
4151 emit_libcall_block (insn, ret, o0, addr);
4152 break;
4154 case TLS_MODEL_LOCAL_DYNAMIC:
4155 start_sequence ();
4156 temp1 = gen_reg_rtx (SImode);
4157 temp2 = gen_reg_rtx (SImode);
4158 temp3 = gen_reg_rtx (Pmode);
4159 ret = gen_reg_rtx (Pmode);
4160 o0 = gen_rtx_REG (Pmode, 8);
4161 got = sparc_tls_got ();
4162 emit_insn (gen_tldm_hi22 (temp1));
4163 emit_insn (gen_tldm_lo10 (temp2, temp1));
4164 if (TARGET_ARCH32)
4166 emit_insn (gen_tldm_add32 (o0, got, temp2));
4167 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4168 const1_rtx));
4170 else
4172 emit_insn (gen_tldm_add64 (o0, got, temp2));
4173 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4174 const1_rtx));
4176 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4177 insn = get_insns ();
4178 end_sequence ();
4179 emit_libcall_block (insn, temp3, o0,
4180 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4181 UNSPEC_TLSLD_BASE));
4182 temp1 = gen_reg_rtx (SImode);
4183 temp2 = gen_reg_rtx (SImode);
4184 emit_insn (gen_tldo_hix22 (temp1, addr));
4185 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4186 if (TARGET_ARCH32)
4187 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4188 else
4189 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4190 break;
4192 case TLS_MODEL_INITIAL_EXEC:
4193 temp1 = gen_reg_rtx (SImode);
4194 temp2 = gen_reg_rtx (SImode);
4195 temp3 = gen_reg_rtx (Pmode);
4196 got = sparc_tls_got ();
4197 emit_insn (gen_tie_hi22 (temp1, addr));
4198 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4199 if (TARGET_ARCH32)
4200 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4201 else
4202 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4203 if (TARGET_SUN_TLS)
4205 ret = gen_reg_rtx (Pmode);
4206 if (TARGET_ARCH32)
4207 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4208 temp3, addr));
4209 else
4210 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4211 temp3, addr));
4213 else
4214 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4215 break;
4217 case TLS_MODEL_LOCAL_EXEC:
4218 temp1 = gen_reg_rtx (Pmode);
4219 temp2 = gen_reg_rtx (Pmode);
4220 if (TARGET_ARCH32)
4222 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4223 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4225 else
4227 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4228 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4230 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4231 break;
4233 default:
4234 gcc_unreachable ();
4237 else if (GET_CODE (addr) == CONST)
4239 rtx base, offset;
4241 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4243 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4244 offset = XEXP (XEXP (addr, 0), 1);
4246 base = force_operand (base, NULL_RTX);
4247 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4248 offset = force_reg (Pmode, offset);
4249 ret = gen_rtx_PLUS (Pmode, base, offset);
4252 else
4253 gcc_unreachable (); /* for now ... */
4255 return ret;
4258 /* Legitimize PIC addresses. If the address is already position-independent,
4259 we return ORIG. Newly generated position-independent addresses go into a
4260 reg. This is REG if nonzero, otherwise we allocate register(s) as
4261 necessary. */
4263 static rtx
4264 sparc_legitimize_pic_address (rtx orig, rtx reg)
4266 bool gotdata_op = false;
4268 if (GET_CODE (orig) == SYMBOL_REF
4269 /* See the comment in sparc_expand_move. */
4270 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4272 rtx pic_ref, address;
4273 rtx_insn *insn;
4275 if (reg == 0)
4277 gcc_assert (can_create_pseudo_p ());
4278 reg = gen_reg_rtx (Pmode);
4281 if (flag_pic == 2)
4283 /* If not during reload, allocate another temp reg here for loading
4284 in the address, so that these instructions can be optimized
4285 properly. */
4286 rtx temp_reg = (! can_create_pseudo_p ()
4287 ? reg : gen_reg_rtx (Pmode));
4289 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4290 won't get confused into thinking that these two instructions
4291 are loading in the true address of the symbol. If in the
4292 future a PIC rtx exists, that should be used instead. */
4293 if (TARGET_ARCH64)
4295 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4296 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4298 else
4300 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4301 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4303 address = temp_reg;
4304 gotdata_op = true;
4306 else
4307 address = orig;
4309 crtl->uses_pic_offset_table = 1;
4310 if (gotdata_op)
4312 if (TARGET_ARCH64)
4313 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4314 pic_offset_table_rtx,
4315 address, orig));
4316 else
4317 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4318 pic_offset_table_rtx,
4319 address, orig));
4321 else
4323 pic_ref
4324 = gen_const_mem (Pmode,
4325 gen_rtx_PLUS (Pmode,
4326 pic_offset_table_rtx, address));
4327 insn = emit_move_insn (reg, pic_ref);
4330 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4331 by loop. */
4332 set_unique_reg_note (insn, REG_EQUAL, orig);
4333 return reg;
4335 else if (GET_CODE (orig) == CONST)
4337 rtx base, offset;
4339 if (GET_CODE (XEXP (orig, 0)) == PLUS
4340 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4341 return orig;
4343 if (reg == 0)
4345 gcc_assert (can_create_pseudo_p ());
4346 reg = gen_reg_rtx (Pmode);
4349 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4350 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4351 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4352 base == reg ? NULL_RTX : reg);
4354 if (GET_CODE (offset) == CONST_INT)
4356 if (SMALL_INT (offset))
4357 return plus_constant (Pmode, base, INTVAL (offset));
4358 else if (can_create_pseudo_p ())
4359 offset = force_reg (Pmode, offset);
4360 else
4361 /* If we reach here, then something is seriously wrong. */
4362 gcc_unreachable ();
4364 return gen_rtx_PLUS (Pmode, base, offset);
4366 else if (GET_CODE (orig) == LABEL_REF)
4367 /* ??? We ought to be checking that the register is live instead, in case
4368 it is eliminated. */
4369 crtl->uses_pic_offset_table = 1;
4371 return orig;
4374 /* Try machine-dependent ways of modifying an illegitimate address X
4375 to be legitimate. If we find one, return the new, valid address.
4377 OLDX is the address as it was before break_out_memory_refs was called.
4378 In some cases it is useful to look at this to decide what needs to be done.
4380 MODE is the mode of the operand pointed to by X.
4382 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4384 static rtx
4385 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4386 machine_mode mode)
4388 rtx orig_x = x;
4390 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4391 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4392 force_operand (XEXP (x, 0), NULL_RTX));
4393 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4394 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4395 force_operand (XEXP (x, 1), NULL_RTX));
4396 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4397 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4398 XEXP (x, 1));
4399 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4400 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4401 force_operand (XEXP (x, 1), NULL_RTX));
4403 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4404 return x;
4406 if (sparc_tls_referenced_p (x))
4407 x = sparc_legitimize_tls_address (x);
4408 else if (flag_pic)
4409 x = sparc_legitimize_pic_address (x, NULL_RTX);
4410 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4411 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4412 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4413 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4414 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4415 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4416 else if (GET_CODE (x) == SYMBOL_REF
4417 || GET_CODE (x) == CONST
4418 || GET_CODE (x) == LABEL_REF)
4419 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4421 return x;
4424 /* Delegitimize an address that was legitimized by the above function. */
4426 static rtx
4427 sparc_delegitimize_address (rtx x)
4429 x = delegitimize_mem_from_attrs (x);
4431 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4432 switch (XINT (XEXP (x, 1), 1))
4434 case UNSPEC_MOVE_PIC:
4435 case UNSPEC_TLSLE:
4436 x = XVECEXP (XEXP (x, 1), 0, 0);
4437 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4438 break;
4439 default:
4440 break;
4443 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4444 if (GET_CODE (x) == MINUS
4445 && REG_P (XEXP (x, 0))
4446 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4447 && GET_CODE (XEXP (x, 1)) == LO_SUM
4448 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4449 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4451 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4452 gcc_assert (GET_CODE (x) == LABEL_REF);
4455 return x;
4458 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4459 replace the input X, or the original X if no replacement is called for.
4460 The output parameter *WIN is 1 if the calling macro should goto WIN,
4461 0 if it should not.
4463 For SPARC, we wish to handle addresses by splitting them into
4464 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4465 This cuts the number of extra insns by one.
4467 Do nothing when generating PIC code and the address is a symbolic
4468 operand or requires a scratch register. */
4471 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4472 int opnum, int type,
4473 int ind_levels ATTRIBUTE_UNUSED, int *win)
4475 /* Decompose SImode constants into HIGH+LO_SUM. */
4476 if (CONSTANT_P (x)
4477 && (mode != TFmode || TARGET_ARCH64)
4478 && GET_MODE (x) == SImode
4479 && GET_CODE (x) != LO_SUM
4480 && GET_CODE (x) != HIGH
4481 && sparc_cmodel <= CM_MEDLOW
4482 && !(flag_pic
4483 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4485 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4486 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4487 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4488 opnum, (enum reload_type)type);
4489 *win = 1;
4490 return x;
4493 /* We have to recognize what we have already generated above. */
4494 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4496 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4497 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4498 opnum, (enum reload_type)type);
4499 *win = 1;
4500 return x;
4503 *win = 0;
4504 return x;
4507 /* Return true if ADDR (a legitimate address expression)
4508 has an effect that depends on the machine mode it is used for.
4510 In PIC mode,
4512 (mem:HI [%l7+a])
4514 is not equivalent to
4516 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4518 because [%l7+a+1] is interpreted as the address of (a+1). */
4521 static bool
4522 sparc_mode_dependent_address_p (const_rtx addr,
4523 addr_space_t as ATTRIBUTE_UNUSED)
4525 if (flag_pic && GET_CODE (addr) == PLUS)
4527 rtx op0 = XEXP (addr, 0);
4528 rtx op1 = XEXP (addr, 1);
4529 if (op0 == pic_offset_table_rtx
4530 && symbolic_operand (op1, VOIDmode))
4531 return true;
4534 return false;
4537 #ifdef HAVE_GAS_HIDDEN
4538 # define USE_HIDDEN_LINKONCE 1
4539 #else
4540 # define USE_HIDDEN_LINKONCE 0
4541 #endif
4543 static void
4544 get_pc_thunk_name (char name[32], unsigned int regno)
4546 const char *reg_name = reg_names[regno];
4548 /* Skip the leading '%' as that cannot be used in a
4549 symbol name. */
4550 reg_name += 1;
4552 if (USE_HIDDEN_LINKONCE)
4553 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4554 else
4555 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4558 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4560 static rtx
4561 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4563 int orig_flag_pic = flag_pic;
4564 rtx insn;
4566 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4567 flag_pic = 0;
4568 if (TARGET_ARCH64)
4569 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4570 else
4571 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4572 flag_pic = orig_flag_pic;
4574 return insn;
4577 /* Emit code to load the GOT register. */
4579 void
4580 load_got_register (void)
4582 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4583 if (!global_offset_table_rtx)
4584 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4586 if (TARGET_VXWORKS_RTP)
4587 emit_insn (gen_vxworks_load_got ());
4588 else
4590 /* The GOT symbol is subject to a PC-relative relocation so we need a
4591 helper function to add the PC value and thus get the final value. */
4592 if (!got_helper_rtx)
4594 char name[32];
4595 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4596 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4599 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4600 got_helper_rtx,
4601 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4604 /* Need to emit this whether or not we obey regdecls,
4605 since setjmp/longjmp can cause life info to screw up.
4606 ??? In the case where we don't obey regdecls, this is not sufficient
4607 since we may not fall out the bottom. */
4608 emit_use (global_offset_table_rtx);
4611 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4612 address of the call target. */
4614 void
4615 sparc_emit_call_insn (rtx pat, rtx addr)
4617 rtx_insn *insn;
4619 insn = emit_call_insn (pat);
4621 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4622 if (TARGET_VXWORKS_RTP
4623 && flag_pic
4624 && GET_CODE (addr) == SYMBOL_REF
4625 && (SYMBOL_REF_DECL (addr)
4626 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4627 : !SYMBOL_REF_LOCAL_P (addr)))
4629 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4630 crtl->uses_pic_offset_table = 1;
4634 /* Return 1 if RTX is a MEM which is known to be aligned to at
4635 least a DESIRED byte boundary. */
4638 mem_min_alignment (rtx mem, int desired)
4640 rtx addr, base, offset;
4642 /* If it's not a MEM we can't accept it. */
4643 if (GET_CODE (mem) != MEM)
4644 return 0;
4646 /* Obviously... */
4647 if (!TARGET_UNALIGNED_DOUBLES
4648 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4649 return 1;
4651 /* ??? The rest of the function predates MEM_ALIGN so
4652 there is probably a bit of redundancy. */
4653 addr = XEXP (mem, 0);
4654 base = offset = NULL_RTX;
4655 if (GET_CODE (addr) == PLUS)
4657 if (GET_CODE (XEXP (addr, 0)) == REG)
4659 base = XEXP (addr, 0);
4661 /* What we are saying here is that if the base
4662 REG is aligned properly, the compiler will make
4663 sure any REG based index upon it will be so
4664 as well. */
4665 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4666 offset = XEXP (addr, 1);
4667 else
4668 offset = const0_rtx;
4671 else if (GET_CODE (addr) == REG)
4673 base = addr;
4674 offset = const0_rtx;
4677 if (base != NULL_RTX)
4679 int regno = REGNO (base);
4681 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4683 /* Check if the compiler has recorded some information
4684 about the alignment of the base REG. If reload has
4685 completed, we already matched with proper alignments.
4686 If not running global_alloc, reload might give us
4687 unaligned pointer to local stack though. */
4688 if (((cfun != 0
4689 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4690 || (optimize && reload_completed))
4691 && (INTVAL (offset) & (desired - 1)) == 0)
4692 return 1;
4694 else
4696 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4697 return 1;
4700 else if (! TARGET_UNALIGNED_DOUBLES
4701 || CONSTANT_P (addr)
4702 || GET_CODE (addr) == LO_SUM)
4704 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4705 is true, in which case we can only assume that an access is aligned if
4706 it is to a constant address, or the address involves a LO_SUM. */
4707 return 1;
4710 /* An obviously unaligned address. */
4711 return 0;
4715 /* Vectors to keep interesting information about registers where it can easily
4716 be got. We used to use the actual mode value as the bit number, but there
4717 are more than 32 modes now. Instead we use two tables: one indexed by
4718 hard register number, and one indexed by mode. */
4720 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4721 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4722 mapped into one sparc_mode_class mode. */
4724 enum sparc_mode_class {
4725 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4726 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4727 CC_MODE, CCFP_MODE
4730 /* Modes for single-word and smaller quantities. */
4731 #define S_MODES \
4732 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4734 /* Modes for double-word and smaller quantities. */
4735 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4737 /* Modes for quad-word and smaller quantities. */
4738 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4740 /* Modes for 8-word and smaller quantities. */
4741 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4743 /* Modes for single-float quantities. */
4744 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4746 /* Modes for double-float and smaller quantities. */
4747 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4749 /* Modes for quad-float and smaller quantities. */
4750 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4752 /* Modes for quad-float pairs and smaller quantities. */
4753 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4755 /* Modes for double-float only quantities. */
4756 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4758 /* Modes for quad-float and double-float only quantities. */
4759 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4761 /* Modes for quad-float pairs and double-float only quantities. */
4762 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4764 /* Modes for condition codes. */
4765 #define CC_MODES (1 << (int) CC_MODE)
4766 #define CCFP_MODES (1 << (int) CCFP_MODE)
4768 /* Value is 1 if register/mode pair is acceptable on sparc.
4769 The funny mixture of D and T modes is because integer operations
4770 do not specially operate on tetra quantities, so non-quad-aligned
4771 registers can hold quadword quantities (except %o4 and %i4 because
4772 they cross fixed registers). */
4774 /* This points to either the 32 bit or the 64 bit version. */
4775 const int *hard_regno_mode_classes;
4777 static const int hard_32bit_mode_classes[] = {
4778 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4779 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4780 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4781 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4783 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4784 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4785 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4786 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4788 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4789 and none can hold SFmode/SImode values. */
4790 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4791 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4792 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4793 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4795 /* %fcc[0123] */
4796 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4798 /* %icc, %sfp, %gsr */
4799 CC_MODES, 0, D_MODES
4802 static const int hard_64bit_mode_classes[] = {
4803 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4804 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4805 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4806 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4808 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4809 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4810 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4811 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4813 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4814 and none can hold SFmode/SImode values. */
4815 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4816 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4817 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4818 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4820 /* %fcc[0123] */
4821 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4823 /* %icc, %sfp, %gsr */
4824 CC_MODES, 0, D_MODES
4827 int sparc_mode_class [NUM_MACHINE_MODES];
4829 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4831 static void
4832 sparc_init_modes (void)
4834 int i;
4836 for (i = 0; i < NUM_MACHINE_MODES; i++)
4838 machine_mode m = (machine_mode) i;
4839 unsigned int size = GET_MODE_SIZE (m);
4841 switch (GET_MODE_CLASS (m))
4843 case MODE_INT:
4844 case MODE_PARTIAL_INT:
4845 case MODE_COMPLEX_INT:
4846 if (size < 4)
4847 sparc_mode_class[i] = 1 << (int) H_MODE;
4848 else if (size == 4)
4849 sparc_mode_class[i] = 1 << (int) S_MODE;
4850 else if (size == 8)
4851 sparc_mode_class[i] = 1 << (int) D_MODE;
4852 else if (size == 16)
4853 sparc_mode_class[i] = 1 << (int) T_MODE;
4854 else if (size == 32)
4855 sparc_mode_class[i] = 1 << (int) O_MODE;
4856 else
4857 sparc_mode_class[i] = 0;
4858 break;
4859 case MODE_VECTOR_INT:
4860 if (size == 4)
4861 sparc_mode_class[i] = 1 << (int) SF_MODE;
4862 else if (size == 8)
4863 sparc_mode_class[i] = 1 << (int) DF_MODE;
4864 else
4865 sparc_mode_class[i] = 0;
4866 break;
4867 case MODE_FLOAT:
4868 case MODE_COMPLEX_FLOAT:
4869 if (size == 4)
4870 sparc_mode_class[i] = 1 << (int) SF_MODE;
4871 else if (size == 8)
4872 sparc_mode_class[i] = 1 << (int) DF_MODE;
4873 else if (size == 16)
4874 sparc_mode_class[i] = 1 << (int) TF_MODE;
4875 else if (size == 32)
4876 sparc_mode_class[i] = 1 << (int) OF_MODE;
4877 else
4878 sparc_mode_class[i] = 0;
4879 break;
4880 case MODE_CC:
4881 if (m == CCFPmode || m == CCFPEmode)
4882 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4883 else
4884 sparc_mode_class[i] = 1 << (int) CC_MODE;
4885 break;
4886 default:
4887 sparc_mode_class[i] = 0;
4888 break;
4892 if (TARGET_ARCH64)
4893 hard_regno_mode_classes = hard_64bit_mode_classes;
4894 else
4895 hard_regno_mode_classes = hard_32bit_mode_classes;
4897 /* Initialize the array used by REGNO_REG_CLASS. */
4898 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4900 if (i < 16 && TARGET_V8PLUS)
4901 sparc_regno_reg_class[i] = I64_REGS;
4902 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4903 sparc_regno_reg_class[i] = GENERAL_REGS;
4904 else if (i < 64)
4905 sparc_regno_reg_class[i] = FP_REGS;
4906 else if (i < 96)
4907 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4908 else if (i < 100)
4909 sparc_regno_reg_class[i] = FPCC_REGS;
4910 else
4911 sparc_regno_reg_class[i] = NO_REGS;
4915 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4917 static inline bool
4918 save_global_or_fp_reg_p (unsigned int regno,
4919 int leaf_function ATTRIBUTE_UNUSED)
4921 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4924 /* Return whether the return address register (%i7) is needed. */
4926 static inline bool
4927 return_addr_reg_needed_p (int leaf_function)
4929 /* If it is live, for example because of __builtin_return_address (0). */
4930 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4931 return true;
4933 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4934 if (!leaf_function
4935 /* Loading the GOT register clobbers %o7. */
4936 || crtl->uses_pic_offset_table
4937 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4938 return true;
4940 return false;
4943 /* Return whether REGNO, a local or in register, must be saved/restored. */
4945 static bool
4946 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4948 /* General case: call-saved registers live at some point. */
4949 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4950 return true;
4952 /* Frame pointer register (%fp) if needed. */
4953 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4954 return true;
4956 /* Return address register (%i7) if needed. */
4957 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4958 return true;
4960 /* GOT register (%l7) if needed. */
4961 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
4962 return true;
4964 /* If the function accesses prior frames, the frame pointer and the return
4965 address of the previous frame must be saved on the stack. */
4966 if (crtl->accesses_prior_frames
4967 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
4968 return true;
4970 return false;
4973 /* Compute the frame size required by the function. This function is called
4974 during the reload pass and also by sparc_expand_prologue. */
4976 HOST_WIDE_INT
4977 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
4979 HOST_WIDE_INT frame_size, apparent_frame_size;
4980 int args_size, n_global_fp_regs = 0;
4981 bool save_local_in_regs_p = false;
4982 unsigned int i;
4984 /* If the function allocates dynamic stack space, the dynamic offset is
4985 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
4986 if (leaf_function && !cfun->calls_alloca)
4987 args_size = 0;
4988 else
4989 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
4991 /* Calculate space needed for global registers. */
4992 if (TARGET_ARCH64)
4993 for (i = 0; i < 8; i++)
4994 if (save_global_or_fp_reg_p (i, 0))
4995 n_global_fp_regs += 2;
4996 else
4997 for (i = 0; i < 8; i += 2)
4998 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4999 n_global_fp_regs += 2;
5001 /* In the flat window model, find out which local and in registers need to
5002 be saved. We don't reserve space in the current frame for them as they
5003 will be spilled into the register window save area of the caller's frame.
5004 However, as soon as we use this register window save area, we must create
5005 that of the current frame to make it the live one. */
5006 if (TARGET_FLAT)
5007 for (i = 16; i < 32; i++)
5008 if (save_local_or_in_reg_p (i, leaf_function))
5010 save_local_in_regs_p = true;
5011 break;
5014 /* Calculate space needed for FP registers. */
5015 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5016 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5017 n_global_fp_regs += 2;
5019 if (size == 0
5020 && n_global_fp_regs == 0
5021 && args_size == 0
5022 && !save_local_in_regs_p)
5023 frame_size = apparent_frame_size = 0;
5024 else
5026 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
5027 apparent_frame_size = (size - STARTING_FRAME_OFFSET + 7) & -8;
5028 apparent_frame_size += n_global_fp_regs * 4;
5030 /* We need to add the size of the outgoing argument area. */
5031 frame_size = apparent_frame_size + ((args_size + 7) & -8);
5033 /* And that of the register window save area. */
5034 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5036 /* Finally, bump to the appropriate alignment. */
5037 frame_size = SPARC_STACK_ALIGN (frame_size);
5040 /* Set up values for use in prologue and epilogue. */
5041 sparc_frame_size = frame_size;
5042 sparc_apparent_frame_size = apparent_frame_size;
5043 sparc_n_global_fp_regs = n_global_fp_regs;
5044 sparc_save_local_in_regs_p = save_local_in_regs_p;
5046 return frame_size;
5049 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5052 sparc_initial_elimination_offset (int to)
5054 int offset;
5056 if (to == STACK_POINTER_REGNUM)
5057 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5058 else
5059 offset = 0;
5061 offset += SPARC_STACK_BIAS;
5062 return offset;
5065 /* Output any necessary .register pseudo-ops. */
5067 void
5068 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5070 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5071 int i;
5073 if (TARGET_ARCH32)
5074 return;
5076 /* Check if %g[2367] were used without
5077 .register being printed for them already. */
5078 for (i = 2; i < 8; i++)
5080 if (df_regs_ever_live_p (i)
5081 && ! sparc_hard_reg_printed [i])
5083 sparc_hard_reg_printed [i] = 1;
5084 /* %g7 is used as TLS base register, use #ignore
5085 for it instead of #scratch. */
5086 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5087 i == 7 ? "ignore" : "scratch");
5089 if (i == 3) i = 5;
5091 #endif
5094 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5096 #if PROBE_INTERVAL > 4096
5097 #error Cannot use indexed addressing mode for stack probing
5098 #endif
5100 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5101 inclusive. These are offsets from the current stack pointer.
5103 Note that we don't use the REG+REG addressing mode for the probes because
5104 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5105 so the advantages of having a single code win here. */
5107 static void
5108 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5110 rtx g1 = gen_rtx_REG (Pmode, 1);
5112 /* See if we have a constant small number of probes to generate. If so,
5113 that's the easy case. */
5114 if (size <= PROBE_INTERVAL)
5116 emit_move_insn (g1, GEN_INT (first));
5117 emit_insn (gen_rtx_SET (VOIDmode, g1,
5118 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5119 emit_stack_probe (plus_constant (Pmode, g1, -size));
5122 /* The run-time loop is made up of 10 insns in the generic case while the
5123 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5124 else if (size <= 5 * PROBE_INTERVAL)
5126 HOST_WIDE_INT i;
5128 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5129 emit_insn (gen_rtx_SET (VOIDmode, g1,
5130 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5131 emit_stack_probe (g1);
5133 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5134 it exceeds SIZE. If only two probes are needed, this will not
5135 generate any code. Then probe at FIRST + SIZE. */
5136 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5138 emit_insn (gen_rtx_SET (VOIDmode, g1,
5139 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5140 emit_stack_probe (g1);
5143 emit_stack_probe (plus_constant (Pmode, g1,
5144 (i - PROBE_INTERVAL) - size));
5147 /* Otherwise, do the same as above, but in a loop. Note that we must be
5148 extra careful with variables wrapping around because we might be at
5149 the very top (or the very bottom) of the address space and we have
5150 to be able to handle this case properly; in particular, we use an
5151 equality test for the loop condition. */
5152 else
5154 HOST_WIDE_INT rounded_size;
5155 rtx g4 = gen_rtx_REG (Pmode, 4);
5157 emit_move_insn (g1, GEN_INT (first));
5160 /* Step 1: round SIZE to the previous multiple of the interval. */
5162 rounded_size = size & -PROBE_INTERVAL;
5163 emit_move_insn (g4, GEN_INT (rounded_size));
5166 /* Step 2: compute initial and final value of the loop counter. */
5168 /* TEST_ADDR = SP + FIRST. */
5169 emit_insn (gen_rtx_SET (VOIDmode, g1,
5170 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5172 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5173 emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4)));
5176 /* Step 3: the loop
5178 while (TEST_ADDR != LAST_ADDR)
5180 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5181 probe at TEST_ADDR
5184 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5185 until it is equal to ROUNDED_SIZE. */
5187 if (TARGET_ARCH64)
5188 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5189 else
5190 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5193 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5194 that SIZE is equal to ROUNDED_SIZE. */
5196 if (size != rounded_size)
5197 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5200 /* Make sure nothing is scheduled before we are done. */
5201 emit_insn (gen_blockage ());
5204 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5205 absolute addresses. */
5207 const char *
5208 output_probe_stack_range (rtx reg1, rtx reg2)
5210 static int labelno = 0;
5211 char loop_lab[32], end_lab[32];
5212 rtx xops[2];
5214 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
5215 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
5217 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5219 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
5220 xops[0] = reg1;
5221 xops[1] = reg2;
5222 output_asm_insn ("cmp\t%0, %1", xops);
5223 if (TARGET_ARCH64)
5224 fputs ("\tbe,pn\t%xcc,", asm_out_file);
5225 else
5226 fputs ("\tbe\t", asm_out_file);
5227 assemble_name_raw (asm_out_file, end_lab);
5228 fputc ('\n', asm_out_file);
5230 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5231 xops[1] = GEN_INT (-PROBE_INTERVAL);
5232 output_asm_insn (" add\t%0, %1, %0", xops);
5234 /* Probe at TEST_ADDR and branch. */
5235 if (TARGET_ARCH64)
5236 fputs ("\tba,pt\t%xcc,", asm_out_file);
5237 else
5238 fputs ("\tba\t", asm_out_file);
5239 assemble_name_raw (asm_out_file, loop_lab);
5240 fputc ('\n', asm_out_file);
5241 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5242 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5244 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
5246 return "";
5249 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5250 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5251 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5252 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5253 the action to be performed if it returns false. Return the new offset. */
5255 typedef bool (*sorr_pred_t) (unsigned int, int);
5256 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5258 static int
5259 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5260 int offset, int leaf_function, sorr_pred_t save_p,
5261 sorr_act_t action_true, sorr_act_t action_false)
5263 unsigned int i;
5264 rtx mem;
5265 rtx_insn *insn;
5267 if (TARGET_ARCH64 && high <= 32)
5269 int fp_offset = -1;
5271 for (i = low; i < high; i++)
5273 if (save_p (i, leaf_function))
5275 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5276 base, offset));
5277 if (action_true == SORR_SAVE)
5279 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5280 RTX_FRAME_RELATED_P (insn) = 1;
5282 else /* action_true == SORR_RESTORE */
5284 /* The frame pointer must be restored last since its old
5285 value may be used as base address for the frame. This
5286 is problematic in 64-bit mode only because of the lack
5287 of double-word load instruction. */
5288 if (i == HARD_FRAME_POINTER_REGNUM)
5289 fp_offset = offset;
5290 else
5291 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5293 offset += 8;
5295 else if (action_false == SORR_ADVANCE)
5296 offset += 8;
5299 if (fp_offset >= 0)
5301 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5302 emit_move_insn (hard_frame_pointer_rtx, mem);
5305 else
5307 for (i = low; i < high; i += 2)
5309 bool reg0 = save_p (i, leaf_function);
5310 bool reg1 = save_p (i + 1, leaf_function);
5311 machine_mode mode;
5312 int regno;
5314 if (reg0 && reg1)
5316 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5317 regno = i;
5319 else if (reg0)
5321 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5322 regno = i;
5324 else if (reg1)
5326 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5327 regno = i + 1;
5328 offset += 4;
5330 else
5332 if (action_false == SORR_ADVANCE)
5333 offset += 8;
5334 continue;
5337 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5338 if (action_true == SORR_SAVE)
5340 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5341 RTX_FRAME_RELATED_P (insn) = 1;
5342 if (mode == DImode)
5344 rtx set1, set2;
5345 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5346 offset));
5347 set1 = gen_rtx_SET (VOIDmode, mem,
5348 gen_rtx_REG (SImode, regno));
5349 RTX_FRAME_RELATED_P (set1) = 1;
5351 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5352 offset + 4));
5353 set2 = gen_rtx_SET (VOIDmode, mem,
5354 gen_rtx_REG (SImode, regno + 1));
5355 RTX_FRAME_RELATED_P (set2) = 1;
5356 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5357 gen_rtx_PARALLEL (VOIDmode,
5358 gen_rtvec (2, set1, set2)));
5361 else /* action_true == SORR_RESTORE */
5362 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5364 /* Always preserve double-word alignment. */
5365 offset = (offset + 8) & -8;
5369 return offset;
5372 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5374 static rtx
5375 emit_adjust_base_to_offset (rtx base, int offset)
5377 /* ??? This might be optimized a little as %g1 might already have a
5378 value close enough that a single add insn will do. */
5379 /* ??? Although, all of this is probably only a temporary fix because
5380 if %g1 can hold a function result, then sparc_expand_epilogue will
5381 lose (the result will be clobbered). */
5382 rtx new_base = gen_rtx_REG (Pmode, 1);
5383 emit_move_insn (new_base, GEN_INT (offset));
5384 emit_insn (gen_rtx_SET (VOIDmode,
5385 new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5386 return new_base;
5389 /* Emit code to save/restore call-saved global and FP registers. */
5391 static void
5392 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5394 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5396 base = emit_adjust_base_to_offset (base, offset);
5397 offset = 0;
5400 offset
5401 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5402 save_global_or_fp_reg_p, action, SORR_NONE);
5403 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5404 save_global_or_fp_reg_p, action, SORR_NONE);
5407 /* Emit code to save/restore call-saved local and in registers. */
5409 static void
5410 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5412 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5414 base = emit_adjust_base_to_offset (base, offset);
5415 offset = 0;
5418 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5419 save_local_or_in_reg_p, action, SORR_ADVANCE);
5422 /* Emit a window_save insn. */
5424 static rtx_insn *
5425 emit_window_save (rtx increment)
5427 rtx_insn *insn = emit_insn (gen_window_save (increment));
5428 RTX_FRAME_RELATED_P (insn) = 1;
5430 /* The incoming return address (%o7) is saved in %i7. */
5431 add_reg_note (insn, REG_CFA_REGISTER,
5432 gen_rtx_SET (VOIDmode,
5433 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5434 gen_rtx_REG (Pmode,
5435 INCOMING_RETURN_ADDR_REGNUM)));
5437 /* The window save event. */
5438 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5440 /* The CFA is %fp, the hard frame pointer. */
5441 add_reg_note (insn, REG_CFA_DEF_CFA,
5442 plus_constant (Pmode, hard_frame_pointer_rtx,
5443 INCOMING_FRAME_SP_OFFSET));
5445 return insn;
5448 /* Generate an increment for the stack pointer. */
5450 static rtx
5451 gen_stack_pointer_inc (rtx increment)
5453 return gen_rtx_SET (VOIDmode,
5454 stack_pointer_rtx,
5455 gen_rtx_PLUS (Pmode,
5456 stack_pointer_rtx,
5457 increment));
5460 /* Expand the function prologue. The prologue is responsible for reserving
5461 storage for the frame, saving the call-saved registers and loading the
5462 GOT register if needed. */
5464 void
5465 sparc_expand_prologue (void)
5467 HOST_WIDE_INT size;
5468 rtx_insn *insn;
5470 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5471 on the final value of the flag means deferring the prologue/epilogue
5472 expansion until just before the second scheduling pass, which is too
5473 late to emit multiple epilogues or return insns.
5475 Of course we are making the assumption that the value of the flag
5476 will not change between now and its final value. Of the three parts
5477 of the formula, only the last one can reasonably vary. Let's take a
5478 closer look, after assuming that the first two ones are set to true
5479 (otherwise the last value is effectively silenced).
5481 If only_leaf_regs_used returns false, the global predicate will also
5482 be false so the actual frame size calculated below will be positive.
5483 As a consequence, the save_register_window insn will be emitted in
5484 the instruction stream; now this insn explicitly references %fp
5485 which is not a leaf register so only_leaf_regs_used will always
5486 return false subsequently.
5488 If only_leaf_regs_used returns true, we hope that the subsequent
5489 optimization passes won't cause non-leaf registers to pop up. For
5490 example, the regrename pass has special provisions to not rename to
5491 non-leaf registers in a leaf function. */
5492 sparc_leaf_function_p
5493 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5495 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5497 if (flag_stack_usage_info)
5498 current_function_static_stack_size = size;
5500 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5502 if (crtl->is_leaf && !cfun->calls_alloca)
5504 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5505 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5506 size - STACK_CHECK_PROTECT);
5508 else if (size > 0)
5509 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5512 if (size == 0)
5513 ; /* do nothing. */
5514 else if (sparc_leaf_function_p)
5516 rtx size_int_rtx = GEN_INT (-size);
5518 if (size <= 4096)
5519 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5520 else if (size <= 8192)
5522 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5523 RTX_FRAME_RELATED_P (insn) = 1;
5525 /* %sp is still the CFA register. */
5526 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5528 else
5530 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5531 emit_move_insn (size_rtx, size_int_rtx);
5532 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5533 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5534 gen_stack_pointer_inc (size_int_rtx));
5537 RTX_FRAME_RELATED_P (insn) = 1;
5539 else
5541 rtx size_int_rtx = GEN_INT (-size);
5543 if (size <= 4096)
5544 emit_window_save (size_int_rtx);
5545 else if (size <= 8192)
5547 emit_window_save (GEN_INT (-4096));
5549 /* %sp is not the CFA register anymore. */
5550 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5552 /* Make sure no %fp-based store is issued until after the frame is
5553 established. The offset between the frame pointer and the stack
5554 pointer is calculated relative to the value of the stack pointer
5555 at the end of the function prologue, and moving instructions that
5556 access the stack via the frame pointer between the instructions
5557 that decrement the stack pointer could result in accessing the
5558 register window save area, which is volatile. */
5559 emit_insn (gen_frame_blockage ());
5561 else
5563 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5564 emit_move_insn (size_rtx, size_int_rtx);
5565 emit_window_save (size_rtx);
5569 if (sparc_leaf_function_p)
5571 sparc_frame_base_reg = stack_pointer_rtx;
5572 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5574 else
5576 sparc_frame_base_reg = hard_frame_pointer_rtx;
5577 sparc_frame_base_offset = SPARC_STACK_BIAS;
5580 if (sparc_n_global_fp_regs > 0)
5581 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5582 sparc_frame_base_offset
5583 - sparc_apparent_frame_size,
5584 SORR_SAVE);
5586 /* Load the GOT register if needed. */
5587 if (crtl->uses_pic_offset_table)
5588 load_got_register ();
5590 /* Advertise that the data calculated just above are now valid. */
5591 sparc_prologue_data_valid_p = true;
5594 /* Expand the function prologue. The prologue is responsible for reserving
5595 storage for the frame, saving the call-saved registers and loading the
5596 GOT register if needed. */
5598 void
5599 sparc_flat_expand_prologue (void)
5601 HOST_WIDE_INT size;
5602 rtx_insn *insn;
5604 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5606 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5608 if (flag_stack_usage_info)
5609 current_function_static_stack_size = size;
5611 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5613 if (crtl->is_leaf && !cfun->calls_alloca)
5615 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5616 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5617 size - STACK_CHECK_PROTECT);
5619 else if (size > 0)
5620 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5623 if (sparc_save_local_in_regs_p)
5624 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5625 SORR_SAVE);
5627 if (size == 0)
5628 ; /* do nothing. */
5629 else
5631 rtx size_int_rtx, size_rtx;
5633 size_rtx = size_int_rtx = GEN_INT (-size);
5635 /* We establish the frame (i.e. decrement the stack pointer) first, even
5636 if we use a frame pointer, because we cannot clobber any call-saved
5637 registers, including the frame pointer, if we haven't created a new
5638 register save area, for the sake of compatibility with the ABI. */
5639 if (size <= 4096)
5640 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5641 else if (size <= 8192 && !frame_pointer_needed)
5643 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5644 RTX_FRAME_RELATED_P (insn) = 1;
5645 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5647 else
5649 size_rtx = gen_rtx_REG (Pmode, 1);
5650 emit_move_insn (size_rtx, size_int_rtx);
5651 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5652 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5653 gen_stack_pointer_inc (size_int_rtx));
5655 RTX_FRAME_RELATED_P (insn) = 1;
5657 /* Ensure nothing is scheduled until after the frame is established. */
5658 emit_insn (gen_blockage ());
5660 if (frame_pointer_needed)
5662 insn = emit_insn (gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5663 gen_rtx_MINUS (Pmode,
5664 stack_pointer_rtx,
5665 size_rtx)));
5666 RTX_FRAME_RELATED_P (insn) = 1;
5668 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5669 gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5670 plus_constant (Pmode, stack_pointer_rtx,
5671 size)));
5674 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5676 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5677 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5679 insn = emit_move_insn (i7, o7);
5680 RTX_FRAME_RELATED_P (insn) = 1;
5682 add_reg_note (insn, REG_CFA_REGISTER,
5683 gen_rtx_SET (VOIDmode, i7, o7));
5685 /* Prevent this instruction from ever being considered dead,
5686 even if this function has no epilogue. */
5687 emit_use (i7);
5691 if (frame_pointer_needed)
5693 sparc_frame_base_reg = hard_frame_pointer_rtx;
5694 sparc_frame_base_offset = SPARC_STACK_BIAS;
5696 else
5698 sparc_frame_base_reg = stack_pointer_rtx;
5699 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5702 if (sparc_n_global_fp_regs > 0)
5703 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5704 sparc_frame_base_offset
5705 - sparc_apparent_frame_size,
5706 SORR_SAVE);
5708 /* Load the GOT register if needed. */
5709 if (crtl->uses_pic_offset_table)
5710 load_got_register ();
5712 /* Advertise that the data calculated just above are now valid. */
5713 sparc_prologue_data_valid_p = true;
5716 /* This function generates the assembly code for function entry, which boils
5717 down to emitting the necessary .register directives. */
5719 static void
5720 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5722 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5723 if (!TARGET_FLAT)
5724 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5726 sparc_output_scratch_registers (file);
5729 /* Expand the function epilogue, either normal or part of a sibcall.
5730 We emit all the instructions except the return or the call. */
5732 void
5733 sparc_expand_epilogue (bool for_eh)
5735 HOST_WIDE_INT size = sparc_frame_size;
5737 if (sparc_n_global_fp_regs > 0)
5738 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5739 sparc_frame_base_offset
5740 - sparc_apparent_frame_size,
5741 SORR_RESTORE);
5743 if (size == 0 || for_eh)
5744 ; /* do nothing. */
5745 else if (sparc_leaf_function_p)
5747 if (size <= 4096)
5748 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5749 else if (size <= 8192)
5751 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5752 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5754 else
5756 rtx reg = gen_rtx_REG (Pmode, 1);
5757 emit_move_insn (reg, GEN_INT (size));
5758 emit_insn (gen_stack_pointer_inc (reg));
5763 /* Expand the function epilogue, either normal or part of a sibcall.
5764 We emit all the instructions except the return or the call. */
5766 void
5767 sparc_flat_expand_epilogue (bool for_eh)
5769 HOST_WIDE_INT size = sparc_frame_size;
5771 if (sparc_n_global_fp_regs > 0)
5772 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5773 sparc_frame_base_offset
5774 - sparc_apparent_frame_size,
5775 SORR_RESTORE);
5777 /* If we have a frame pointer, we'll need both to restore it before the
5778 frame is destroyed and use its current value in destroying the frame.
5779 Since we don't have an atomic way to do that in the flat window model,
5780 we save the current value into a temporary register (%g1). */
5781 if (frame_pointer_needed && !for_eh)
5782 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5784 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5785 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5786 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5788 if (sparc_save_local_in_regs_p)
5789 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5790 sparc_frame_base_offset,
5791 SORR_RESTORE);
5793 if (size == 0 || for_eh)
5794 ; /* do nothing. */
5795 else if (frame_pointer_needed)
5797 /* Make sure the frame is destroyed after everything else is done. */
5798 emit_insn (gen_blockage ());
5800 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5802 else
5804 /* Likewise. */
5805 emit_insn (gen_blockage ());
5807 if (size <= 4096)
5808 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5809 else if (size <= 8192)
5811 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5812 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5814 else
5816 rtx reg = gen_rtx_REG (Pmode, 1);
5817 emit_move_insn (reg, GEN_INT (size));
5818 emit_insn (gen_stack_pointer_inc (reg));
5823 /* Return true if it is appropriate to emit `return' instructions in the
5824 body of a function. */
5826 bool
5827 sparc_can_use_return_insn_p (void)
5829 return sparc_prologue_data_valid_p
5830 && sparc_n_global_fp_regs == 0
5831 && TARGET_FLAT
5832 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5833 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5836 /* This function generates the assembly code for function exit. */
5838 static void
5839 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5841 /* If the last two instructions of a function are "call foo; dslot;"
5842 the return address might point to the first instruction in the next
5843 function and we have to output a dummy nop for the sake of sane
5844 backtraces in such cases. This is pointless for sibling calls since
5845 the return address is explicitly adjusted. */
5847 rtx insn, last_real_insn;
5849 insn = get_last_insn ();
5851 last_real_insn = prev_real_insn (insn);
5852 if (last_real_insn
5853 && NONJUMP_INSN_P (last_real_insn)
5854 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5855 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5857 if (last_real_insn
5858 && CALL_P (last_real_insn)
5859 && !SIBLING_CALL_P (last_real_insn))
5860 fputs("\tnop\n", file);
5862 sparc_output_deferred_case_vectors ();
5865 /* Output a 'restore' instruction. */
5867 static void
5868 output_restore (rtx pat)
5870 rtx operands[3];
5872 if (! pat)
5874 fputs ("\t restore\n", asm_out_file);
5875 return;
5878 gcc_assert (GET_CODE (pat) == SET);
5880 operands[0] = SET_DEST (pat);
5881 pat = SET_SRC (pat);
5883 switch (GET_CODE (pat))
5885 case PLUS:
5886 operands[1] = XEXP (pat, 0);
5887 operands[2] = XEXP (pat, 1);
5888 output_asm_insn (" restore %r1, %2, %Y0", operands);
5889 break;
5890 case LO_SUM:
5891 operands[1] = XEXP (pat, 0);
5892 operands[2] = XEXP (pat, 1);
5893 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5894 break;
5895 case ASHIFT:
5896 operands[1] = XEXP (pat, 0);
5897 gcc_assert (XEXP (pat, 1) == const1_rtx);
5898 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5899 break;
5900 default:
5901 operands[1] = pat;
5902 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5903 break;
5907 /* Output a return. */
5909 const char *
5910 output_return (rtx_insn *insn)
5912 if (crtl->calls_eh_return)
5914 /* If the function uses __builtin_eh_return, the eh_return
5915 machinery occupies the delay slot. */
5916 gcc_assert (!final_sequence);
5918 if (flag_delayed_branch)
5920 if (!TARGET_FLAT && TARGET_V9)
5921 fputs ("\treturn\t%i7+8\n", asm_out_file);
5922 else
5924 if (!TARGET_FLAT)
5925 fputs ("\trestore\n", asm_out_file);
5927 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5930 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5932 else
5934 if (!TARGET_FLAT)
5935 fputs ("\trestore\n", asm_out_file);
5937 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5938 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5941 else if (sparc_leaf_function_p || TARGET_FLAT)
5943 /* This is a leaf or flat function so we don't have to bother restoring
5944 the register window, which frees us from dealing with the convoluted
5945 semantics of restore/return. We simply output the jump to the
5946 return address and the insn in the delay slot (if any). */
5948 return "jmp\t%%o7+%)%#";
5950 else
5952 /* This is a regular function so we have to restore the register window.
5953 We may have a pending insn for the delay slot, which will be either
5954 combined with the 'restore' instruction or put in the delay slot of
5955 the 'return' instruction. */
5957 if (final_sequence)
5959 rtx delay, pat;
5961 delay = NEXT_INSN (insn);
5962 gcc_assert (delay);
5964 pat = PATTERN (delay);
5966 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5968 epilogue_renumber (&pat, 0);
5969 return "return\t%%i7+%)%#";
5971 else
5973 output_asm_insn ("jmp\t%%i7+%)", NULL);
5974 output_restore (pat);
5975 PATTERN (delay) = gen_blockage ();
5976 INSN_CODE (delay) = -1;
5979 else
5981 /* The delay slot is empty. */
5982 if (TARGET_V9)
5983 return "return\t%%i7+%)\n\t nop";
5984 else if (flag_delayed_branch)
5985 return "jmp\t%%i7+%)\n\t restore";
5986 else
5987 return "restore\n\tjmp\t%%o7+%)\n\t nop";
5991 return "";
5994 /* Output a sibling call. */
5996 const char *
5997 output_sibcall (rtx_insn *insn, rtx call_operand)
5999 rtx operands[1];
6001 gcc_assert (flag_delayed_branch);
6003 operands[0] = call_operand;
6005 if (sparc_leaf_function_p || TARGET_FLAT)
6007 /* This is a leaf or flat function so we don't have to bother restoring
6008 the register window. We simply output the jump to the function and
6009 the insn in the delay slot (if any). */
6011 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6013 if (final_sequence)
6014 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6015 operands);
6016 else
6017 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6018 it into branch if possible. */
6019 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6020 operands);
6022 else
6024 /* This is a regular function so we have to restore the register window.
6025 We may have a pending insn for the delay slot, which will be combined
6026 with the 'restore' instruction. */
6028 output_asm_insn ("call\t%a0, 0", operands);
6030 if (final_sequence)
6032 rtx_insn *delay = NEXT_INSN (insn);
6033 gcc_assert (delay);
6035 output_restore (PATTERN (delay));
6037 PATTERN (delay) = gen_blockage ();
6038 INSN_CODE (delay) = -1;
6040 else
6041 output_restore (NULL_RTX);
6044 return "";
6047 /* Functions for handling argument passing.
6049 For 32-bit, the first 6 args are normally in registers and the rest are
6050 pushed. Any arg that starts within the first 6 words is at least
6051 partially passed in a register unless its data type forbids.
6053 For 64-bit, the argument registers are laid out as an array of 16 elements
6054 and arguments are added sequentially. The first 6 int args and up to the
6055 first 16 fp args (depending on size) are passed in regs.
6057 Slot Stack Integral Float Float in structure Double Long Double
6058 ---- ----- -------- ----- ------------------ ------ -----------
6059 15 [SP+248] %f31 %f30,%f31 %d30
6060 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6061 13 [SP+232] %f27 %f26,%f27 %d26
6062 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6063 11 [SP+216] %f23 %f22,%f23 %d22
6064 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6065 9 [SP+200] %f19 %f18,%f19 %d18
6066 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6067 7 [SP+184] %f15 %f14,%f15 %d14
6068 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6069 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6070 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6071 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6072 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6073 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6074 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6076 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6078 Integral arguments are always passed as 64-bit quantities appropriately
6079 extended.
6081 Passing of floating point values is handled as follows.
6082 If a prototype is in scope:
6083 If the value is in a named argument (i.e. not a stdarg function or a
6084 value not part of the `...') then the value is passed in the appropriate
6085 fp reg.
6086 If the value is part of the `...' and is passed in one of the first 6
6087 slots then the value is passed in the appropriate int reg.
6088 If the value is part of the `...' and is not passed in one of the first 6
6089 slots then the value is passed in memory.
6090 If a prototype is not in scope:
6091 If the value is one of the first 6 arguments the value is passed in the
6092 appropriate integer reg and the appropriate fp reg.
6093 If the value is not one of the first 6 arguments the value is passed in
6094 the appropriate fp reg and in memory.
6097 Summary of the calling conventions implemented by GCC on the SPARC:
6099 32-bit ABI:
6100 size argument return value
6102 small integer <4 int. reg. int. reg.
6103 word 4 int. reg. int. reg.
6104 double word 8 int. reg. int. reg.
6106 _Complex small integer <8 int. reg. int. reg.
6107 _Complex word 8 int. reg. int. reg.
6108 _Complex double word 16 memory int. reg.
6110 vector integer <=8 int. reg. FP reg.
6111 vector integer >8 memory memory
6113 float 4 int. reg. FP reg.
6114 double 8 int. reg. FP reg.
6115 long double 16 memory memory
6117 _Complex float 8 memory FP reg.
6118 _Complex double 16 memory FP reg.
6119 _Complex long double 32 memory FP reg.
6121 vector float any memory memory
6123 aggregate any memory memory
6127 64-bit ABI:
6128 size argument return value
6130 small integer <8 int. reg. int. reg.
6131 word 8 int. reg. int. reg.
6132 double word 16 int. reg. int. reg.
6134 _Complex small integer <16 int. reg. int. reg.
6135 _Complex word 16 int. reg. int. reg.
6136 _Complex double word 32 memory int. reg.
6138 vector integer <=16 FP reg. FP reg.
6139 vector integer 16<s<=32 memory FP reg.
6140 vector integer >32 memory memory
6142 float 4 FP reg. FP reg.
6143 double 8 FP reg. FP reg.
6144 long double 16 FP reg. FP reg.
6146 _Complex float 8 FP reg. FP reg.
6147 _Complex double 16 FP reg. FP reg.
6148 _Complex long double 32 memory FP reg.
6150 vector float <=16 FP reg. FP reg.
6151 vector float 16<s<=32 memory FP reg.
6152 vector float >32 memory memory
6154 aggregate <=16 reg. reg.
6155 aggregate 16<s<=32 memory reg.
6156 aggregate >32 memory memory
6160 Note #1: complex floating-point types follow the extended SPARC ABIs as
6161 implemented by the Sun compiler.
6163 Note #2: integral vector types follow the scalar floating-point types
6164 conventions to match what is implemented by the Sun VIS SDK.
6166 Note #3: floating-point vector types follow the aggregate types
6167 conventions. */
6170 /* Maximum number of int regs for args. */
6171 #define SPARC_INT_ARG_MAX 6
6172 /* Maximum number of fp regs for args. */
6173 #define SPARC_FP_ARG_MAX 16
6175 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
6177 /* Handle the INIT_CUMULATIVE_ARGS macro.
6178 Initialize a variable CUM of type CUMULATIVE_ARGS
6179 for a call to a function whose data type is FNTYPE.
6180 For a library call, FNTYPE is 0. */
6182 void
6183 init_cumulative_args (struct sparc_args *cum, tree fntype,
6184 rtx libname ATTRIBUTE_UNUSED,
6185 tree fndecl ATTRIBUTE_UNUSED)
6187 cum->words = 0;
6188 cum->prototype_p = fntype && prototype_p (fntype);
6189 cum->libcall_p = fntype == 0;
6192 /* Handle promotion of pointer and integer arguments. */
6194 static machine_mode
6195 sparc_promote_function_mode (const_tree type,
6196 machine_mode mode,
6197 int *punsignedp,
6198 const_tree fntype ATTRIBUTE_UNUSED,
6199 int for_return ATTRIBUTE_UNUSED)
6201 if (type != NULL_TREE && POINTER_TYPE_P (type))
6203 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6204 return Pmode;
6207 /* Integral arguments are passed as full words, as per the ABI. */
6208 if (GET_MODE_CLASS (mode) == MODE_INT
6209 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6210 return word_mode;
6212 return mode;
6215 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6217 static bool
6218 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6220 return TARGET_ARCH64 ? true : false;
6223 /* Scan the record type TYPE and return the following predicates:
6224 - INTREGS_P: the record contains at least one field or sub-field
6225 that is eligible for promotion in integer registers.
6226 - FP_REGS_P: the record contains at least one field or sub-field
6227 that is eligible for promotion in floating-point registers.
6228 - PACKED_P: the record contains at least one field that is packed.
6230 Sub-fields are not taken into account for the PACKED_P predicate. */
6232 static void
6233 scan_record_type (const_tree type, int *intregs_p, int *fpregs_p,
6234 int *packed_p)
6236 tree field;
6238 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6240 if (TREE_CODE (field) == FIELD_DECL)
6242 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6243 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
6244 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6245 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6246 && TARGET_FPU)
6247 *fpregs_p = 1;
6248 else
6249 *intregs_p = 1;
6251 if (packed_p && DECL_PACKED (field))
6252 *packed_p = 1;
6257 /* Compute the slot number to pass an argument in.
6258 Return the slot number or -1 if passing on the stack.
6260 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6261 the preceding args and about the function being called.
6262 MODE is the argument's machine mode.
6263 TYPE is the data type of the argument (as a tree).
6264 This is null for libcalls where that information may
6265 not be available.
6266 NAMED is nonzero if this argument is a named parameter
6267 (otherwise it is an extra parameter matching an ellipsis).
6268 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6269 *PREGNO records the register number to use if scalar type.
6270 *PPADDING records the amount of padding needed in words. */
6272 static int
6273 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6274 const_tree type, bool named, bool incoming_p,
6275 int *pregno, int *ppadding)
6277 int regbase = (incoming_p
6278 ? SPARC_INCOMING_INT_ARG_FIRST
6279 : SPARC_OUTGOING_INT_ARG_FIRST);
6280 int slotno = cum->words;
6281 enum mode_class mclass;
6282 int regno;
6284 *ppadding = 0;
6286 if (type && TREE_ADDRESSABLE (type))
6287 return -1;
6289 if (TARGET_ARCH32
6290 && mode == BLKmode
6291 && type
6292 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6293 return -1;
6295 /* For SPARC64, objects requiring 16-byte alignment get it. */
6296 if (TARGET_ARCH64
6297 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6298 && (slotno & 1) != 0)
6299 slotno++, *ppadding = 1;
6301 mclass = GET_MODE_CLASS (mode);
6302 if (type && TREE_CODE (type) == VECTOR_TYPE)
6304 /* Vector types deserve special treatment because they are
6305 polymorphic wrt their mode, depending upon whether VIS
6306 instructions are enabled. */
6307 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6309 /* The SPARC port defines no floating-point vector modes. */
6310 gcc_assert (mode == BLKmode);
6312 else
6314 /* Integral vector types should either have a vector
6315 mode or an integral mode, because we are guaranteed
6316 by pass_by_reference that their size is not greater
6317 than 16 bytes and TImode is 16-byte wide. */
6318 gcc_assert (mode != BLKmode);
6320 /* Vector integers are handled like floats according to
6321 the Sun VIS SDK. */
6322 mclass = MODE_FLOAT;
6326 switch (mclass)
6328 case MODE_FLOAT:
6329 case MODE_COMPLEX_FLOAT:
6330 case MODE_VECTOR_INT:
6331 if (TARGET_ARCH64 && TARGET_FPU && named)
6333 if (slotno >= SPARC_FP_ARG_MAX)
6334 return -1;
6335 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6336 /* Arguments filling only one single FP register are
6337 right-justified in the outer double FP register. */
6338 if (GET_MODE_SIZE (mode) <= 4)
6339 regno++;
6340 break;
6342 /* fallthrough */
6344 case MODE_INT:
6345 case MODE_COMPLEX_INT:
6346 if (slotno >= SPARC_INT_ARG_MAX)
6347 return -1;
6348 regno = regbase + slotno;
6349 break;
6351 case MODE_RANDOM:
6352 if (mode == VOIDmode)
6353 /* MODE is VOIDmode when generating the actual call. */
6354 return -1;
6356 gcc_assert (mode == BLKmode);
6358 if (TARGET_ARCH32
6359 || !type
6360 || (TREE_CODE (type) != VECTOR_TYPE
6361 && TREE_CODE (type) != RECORD_TYPE))
6363 if (slotno >= SPARC_INT_ARG_MAX)
6364 return -1;
6365 regno = regbase + slotno;
6367 else /* TARGET_ARCH64 && type */
6369 int intregs_p = 0, fpregs_p = 0, packed_p = 0;
6371 /* First see what kinds of registers we would need. */
6372 if (TREE_CODE (type) == VECTOR_TYPE)
6373 fpregs_p = 1;
6374 else
6375 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
6377 /* The ABI obviously doesn't specify how packed structures
6378 are passed. These are defined to be passed in int regs
6379 if possible, otherwise memory. */
6380 if (packed_p || !named)
6381 fpregs_p = 0, intregs_p = 1;
6383 /* If all arg slots are filled, then must pass on stack. */
6384 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
6385 return -1;
6387 /* If there are only int args and all int arg slots are filled,
6388 then must pass on stack. */
6389 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
6390 return -1;
6392 /* Note that even if all int arg slots are filled, fp members may
6393 still be passed in regs if such regs are available.
6394 *PREGNO isn't set because there may be more than one, it's up
6395 to the caller to compute them. */
6396 return slotno;
6398 break;
6400 default :
6401 gcc_unreachable ();
6404 *pregno = regno;
6405 return slotno;
6408 /* Handle recursive register counting for structure field layout. */
6410 struct function_arg_record_value_parms
6412 rtx ret; /* return expression being built. */
6413 int slotno; /* slot number of the argument. */
6414 int named; /* whether the argument is named. */
6415 int regbase; /* regno of the base register. */
6416 int stack; /* 1 if part of the argument is on the stack. */
6417 int intoffset; /* offset of the first pending integer field. */
6418 unsigned int nregs; /* number of words passed in registers. */
6421 static void function_arg_record_value_3
6422 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
6423 static void function_arg_record_value_2
6424 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6425 static void function_arg_record_value_1
6426 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6427 static rtx function_arg_record_value (const_tree, machine_mode, int, int, int);
6428 static rtx function_arg_union_value (int, machine_mode, int, int);
6430 /* A subroutine of function_arg_record_value. Traverse the structure
6431 recursively and determine how many registers will be required. */
6433 static void
6434 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
6435 struct function_arg_record_value_parms *parms,
6436 bool packed_p)
6438 tree field;
6440 /* We need to compute how many registers are needed so we can
6441 allocate the PARALLEL but before we can do that we need to know
6442 whether there are any packed fields. The ABI obviously doesn't
6443 specify how structures are passed in this case, so they are
6444 defined to be passed in int regs if possible, otherwise memory,
6445 regardless of whether there are fp values present. */
6447 if (! packed_p)
6448 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6450 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6452 packed_p = true;
6453 break;
6457 /* Compute how many registers we need. */
6458 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6460 if (TREE_CODE (field) == FIELD_DECL)
6462 HOST_WIDE_INT bitpos = startbitpos;
6464 if (DECL_SIZE (field) != 0)
6466 if (integer_zerop (DECL_SIZE (field)))
6467 continue;
6469 if (tree_fits_uhwi_p (bit_position (field)))
6470 bitpos += int_bit_position (field);
6473 /* ??? FIXME: else assume zero offset. */
6475 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6476 function_arg_record_value_1 (TREE_TYPE (field),
6477 bitpos,
6478 parms,
6479 packed_p);
6480 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6481 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6482 && TARGET_FPU
6483 && parms->named
6484 && ! packed_p)
6486 if (parms->intoffset != -1)
6488 unsigned int startbit, endbit;
6489 int intslots, this_slotno;
6491 startbit = parms->intoffset & -BITS_PER_WORD;
6492 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6494 intslots = (endbit - startbit) / BITS_PER_WORD;
6495 this_slotno = parms->slotno + parms->intoffset
6496 / BITS_PER_WORD;
6498 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6500 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6501 /* We need to pass this field on the stack. */
6502 parms->stack = 1;
6505 parms->nregs += intslots;
6506 parms->intoffset = -1;
6509 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
6510 If it wasn't true we wouldn't be here. */
6511 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6512 && DECL_MODE (field) == BLKmode)
6513 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6514 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6515 parms->nregs += 2;
6516 else
6517 parms->nregs += 1;
6519 else
6521 if (parms->intoffset == -1)
6522 parms->intoffset = bitpos;
6528 /* A subroutine of function_arg_record_value. Assign the bits of the
6529 structure between parms->intoffset and bitpos to integer registers. */
6531 static void
6532 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
6533 struct function_arg_record_value_parms *parms)
6535 machine_mode mode;
6536 unsigned int regno;
6537 unsigned int startbit, endbit;
6538 int this_slotno, intslots, intoffset;
6539 rtx reg;
6541 if (parms->intoffset == -1)
6542 return;
6544 intoffset = parms->intoffset;
6545 parms->intoffset = -1;
6547 startbit = intoffset & -BITS_PER_WORD;
6548 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6549 intslots = (endbit - startbit) / BITS_PER_WORD;
6550 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
6552 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
6553 if (intslots <= 0)
6554 return;
6556 /* If this is the trailing part of a word, only load that much into
6557 the register. Otherwise load the whole register. Note that in
6558 the latter case we may pick up unwanted bits. It's not a problem
6559 at the moment but may wish to revisit. */
6561 if (intoffset % BITS_PER_WORD != 0)
6562 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6563 MODE_INT);
6564 else
6565 mode = word_mode;
6567 intoffset /= BITS_PER_UNIT;
6570 regno = parms->regbase + this_slotno;
6571 reg = gen_rtx_REG (mode, regno);
6572 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6573 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6575 this_slotno += 1;
6576 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
6577 mode = word_mode;
6578 parms->nregs += 1;
6579 intslots -= 1;
6581 while (intslots > 0);
6584 /* A subroutine of function_arg_record_value. Traverse the structure
6585 recursively and assign bits to floating point registers. Track which
6586 bits in between need integer registers; invoke function_arg_record_value_3
6587 to make that happen. */
6589 static void
6590 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
6591 struct function_arg_record_value_parms *parms,
6592 bool packed_p)
6594 tree field;
6596 if (! packed_p)
6597 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6599 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6601 packed_p = true;
6602 break;
6606 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6608 if (TREE_CODE (field) == FIELD_DECL)
6610 HOST_WIDE_INT bitpos = startbitpos;
6612 if (DECL_SIZE (field) != 0)
6614 if (integer_zerop (DECL_SIZE (field)))
6615 continue;
6617 if (tree_fits_uhwi_p (bit_position (field)))
6618 bitpos += int_bit_position (field);
6621 /* ??? FIXME: else assume zero offset. */
6623 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6624 function_arg_record_value_2 (TREE_TYPE (field),
6625 bitpos,
6626 parms,
6627 packed_p);
6628 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6629 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6630 && TARGET_FPU
6631 && parms->named
6632 && ! packed_p)
6634 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
6635 int regno, nregs, pos;
6636 machine_mode mode = DECL_MODE (field);
6637 rtx reg;
6639 function_arg_record_value_3 (bitpos, parms);
6641 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6642 && mode == BLKmode)
6644 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6645 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6647 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6649 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6650 nregs = 2;
6652 else
6653 nregs = 1;
6655 regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6656 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6657 regno++;
6658 reg = gen_rtx_REG (mode, regno);
6659 pos = bitpos / BITS_PER_UNIT;
6660 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6661 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6662 parms->nregs += 1;
6663 while (--nregs > 0)
6665 regno += GET_MODE_SIZE (mode) / 4;
6666 reg = gen_rtx_REG (mode, regno);
6667 pos += GET_MODE_SIZE (mode);
6668 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6669 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6670 parms->nregs += 1;
6673 else
6675 if (parms->intoffset == -1)
6676 parms->intoffset = bitpos;
6682 /* Used by function_arg and sparc_function_value_1 to implement the complex
6683 conventions of the 64-bit ABI for passing and returning structures.
6684 Return an expression valid as a return value for the FUNCTION_ARG
6685 and TARGET_FUNCTION_VALUE.
6687 TYPE is the data type of the argument (as a tree).
6688 This is null for libcalls where that information may
6689 not be available.
6690 MODE is the argument's machine mode.
6691 SLOTNO is the index number of the argument's slot in the parameter array.
6692 NAMED is nonzero if this argument is a named parameter
6693 (otherwise it is an extra parameter matching an ellipsis).
6694 REGBASE is the regno of the base register for the parameter array. */
6696 static rtx
6697 function_arg_record_value (const_tree type, machine_mode mode,
6698 int slotno, int named, int regbase)
6700 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6701 struct function_arg_record_value_parms parms;
6702 unsigned int nregs;
6704 parms.ret = NULL_RTX;
6705 parms.slotno = slotno;
6706 parms.named = named;
6707 parms.regbase = regbase;
6708 parms.stack = 0;
6710 /* Compute how many registers we need. */
6711 parms.nregs = 0;
6712 parms.intoffset = 0;
6713 function_arg_record_value_1 (type, 0, &parms, false);
6715 /* Take into account pending integer fields. */
6716 if (parms.intoffset != -1)
6718 unsigned int startbit, endbit;
6719 int intslots, this_slotno;
6721 startbit = parms.intoffset & -BITS_PER_WORD;
6722 endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6723 intslots = (endbit - startbit) / BITS_PER_WORD;
6724 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
6726 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6728 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6729 /* We need to pass this field on the stack. */
6730 parms.stack = 1;
6733 parms.nregs += intslots;
6735 nregs = parms.nregs;
6737 /* Allocate the vector and handle some annoying special cases. */
6738 if (nregs == 0)
6740 /* ??? Empty structure has no value? Duh? */
6741 if (typesize <= 0)
6743 /* Though there's nothing really to store, return a word register
6744 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6745 leads to breakage due to the fact that there are zero bytes to
6746 load. */
6747 return gen_rtx_REG (mode, regbase);
6749 else
6751 /* ??? C++ has structures with no fields, and yet a size. Give up
6752 for now and pass everything back in integer registers. */
6753 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6755 if (nregs + slotno > SPARC_INT_ARG_MAX)
6756 nregs = SPARC_INT_ARG_MAX - slotno;
6758 gcc_assert (nregs != 0);
6760 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
6762 /* If at least one field must be passed on the stack, generate
6763 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6764 also be passed on the stack. We can't do much better because the
6765 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6766 of structures for which the fields passed exclusively in registers
6767 are not at the beginning of the structure. */
6768 if (parms.stack)
6769 XVECEXP (parms.ret, 0, 0)
6770 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6772 /* Fill in the entries. */
6773 parms.nregs = 0;
6774 parms.intoffset = 0;
6775 function_arg_record_value_2 (type, 0, &parms, false);
6776 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
6778 gcc_assert (parms.nregs == nregs);
6780 return parms.ret;
6783 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6784 of the 64-bit ABI for passing and returning unions.
6785 Return an expression valid as a return value for the FUNCTION_ARG
6786 and TARGET_FUNCTION_VALUE.
6788 SIZE is the size in bytes of the union.
6789 MODE is the argument's machine mode.
6790 REGNO is the hard register the union will be passed in. */
6792 static rtx
6793 function_arg_union_value (int size, machine_mode mode, int slotno,
6794 int regno)
6796 int nwords = ROUND_ADVANCE (size), i;
6797 rtx regs;
6799 /* See comment in previous function for empty structures. */
6800 if (nwords == 0)
6801 return gen_rtx_REG (mode, regno);
6803 if (slotno == SPARC_INT_ARG_MAX - 1)
6804 nwords = 1;
6806 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6808 for (i = 0; i < nwords; i++)
6810 /* Unions are passed left-justified. */
6811 XVECEXP (regs, 0, i)
6812 = gen_rtx_EXPR_LIST (VOIDmode,
6813 gen_rtx_REG (word_mode, regno),
6814 GEN_INT (UNITS_PER_WORD * i));
6815 regno++;
6818 return regs;
6821 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6822 for passing and returning BLKmode vectors.
6823 Return an expression valid as a return value for the FUNCTION_ARG
6824 and TARGET_FUNCTION_VALUE.
6826 SIZE is the size in bytes of the vector.
6827 REGNO is the FP hard register the vector will be passed in. */
6829 static rtx
6830 function_arg_vector_value (int size, int regno)
6832 const int nregs = MAX (1, size / 8);
6833 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6835 if (size < 8)
6836 XVECEXP (regs, 0, 0)
6837 = gen_rtx_EXPR_LIST (VOIDmode,
6838 gen_rtx_REG (SImode, regno),
6839 const0_rtx);
6840 else
6841 for (int i = 0; i < nregs; i++)
6842 XVECEXP (regs, 0, i)
6843 = gen_rtx_EXPR_LIST (VOIDmode,
6844 gen_rtx_REG (DImode, regno + 2*i),
6845 GEN_INT (i*8));
6847 return regs;
6850 /* Determine where to put an argument to a function.
6851 Value is zero to push the argument on the stack,
6852 or a hard register in which to store the argument.
6854 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6855 the preceding args and about the function being called.
6856 MODE is the argument's machine mode.
6857 TYPE is the data type of the argument (as a tree).
6858 This is null for libcalls where that information may
6859 not be available.
6860 NAMED is true if this argument is a named parameter
6861 (otherwise it is an extra parameter matching an ellipsis).
6862 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6863 TARGET_FUNCTION_INCOMING_ARG. */
6865 static rtx
6866 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
6867 const_tree type, bool named, bool incoming_p)
6869 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6871 int regbase = (incoming_p
6872 ? SPARC_INCOMING_INT_ARG_FIRST
6873 : SPARC_OUTGOING_INT_ARG_FIRST);
6874 int slotno, regno, padding;
6875 enum mode_class mclass = GET_MODE_CLASS (mode);
6877 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
6878 &regno, &padding);
6879 if (slotno == -1)
6880 return 0;
6882 /* Vector types deserve special treatment because they are polymorphic wrt
6883 their mode, depending upon whether VIS instructions are enabled. */
6884 if (type && TREE_CODE (type) == VECTOR_TYPE)
6886 HOST_WIDE_INT size = int_size_in_bytes (type);
6887 gcc_assert ((TARGET_ARCH32 && size <= 8)
6888 || (TARGET_ARCH64 && size <= 16));
6890 if (mode == BLKmode)
6891 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
6893 mclass = MODE_FLOAT;
6896 if (TARGET_ARCH32)
6897 return gen_rtx_REG (mode, regno);
6899 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6900 and are promoted to registers if possible. */
6901 if (type && TREE_CODE (type) == RECORD_TYPE)
6903 HOST_WIDE_INT size = int_size_in_bytes (type);
6904 gcc_assert (size <= 16);
6906 return function_arg_record_value (type, mode, slotno, named, regbase);
6909 /* Unions up to 16 bytes in size are passed in integer registers. */
6910 else if (type && TREE_CODE (type) == UNION_TYPE)
6912 HOST_WIDE_INT size = int_size_in_bytes (type);
6913 gcc_assert (size <= 16);
6915 return function_arg_union_value (size, mode, slotno, regno);
6918 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6919 but also have the slot allocated for them.
6920 If no prototype is in scope fp values in register slots get passed
6921 in two places, either fp regs and int regs or fp regs and memory. */
6922 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6923 && SPARC_FP_REG_P (regno))
6925 rtx reg = gen_rtx_REG (mode, regno);
6926 if (cum->prototype_p || cum->libcall_p)
6928 /* "* 2" because fp reg numbers are recorded in 4 byte
6929 quantities. */
6930 #if 0
6931 /* ??? This will cause the value to be passed in the fp reg and
6932 in the stack. When a prototype exists we want to pass the
6933 value in the reg but reserve space on the stack. That's an
6934 optimization, and is deferred [for a bit]. */
6935 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
6936 return gen_rtx_PARALLEL (mode,
6937 gen_rtvec (2,
6938 gen_rtx_EXPR_LIST (VOIDmode,
6939 NULL_RTX, const0_rtx),
6940 gen_rtx_EXPR_LIST (VOIDmode,
6941 reg, const0_rtx)));
6942 else
6943 #else
6944 /* ??? It seems that passing back a register even when past
6945 the area declared by REG_PARM_STACK_SPACE will allocate
6946 space appropriately, and will not copy the data onto the
6947 stack, exactly as we desire.
6949 This is due to locate_and_pad_parm being called in
6950 expand_call whenever reg_parm_stack_space > 0, which
6951 while beneficial to our example here, would seem to be
6952 in error from what had been intended. Ho hum... -- r~ */
6953 #endif
6954 return reg;
6956 else
6958 rtx v0, v1;
6960 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6962 int intreg;
6964 /* On incoming, we don't need to know that the value
6965 is passed in %f0 and %i0, and it confuses other parts
6966 causing needless spillage even on the simplest cases. */
6967 if (incoming_p)
6968 return reg;
6970 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6971 + (regno - SPARC_FP_ARG_FIRST) / 2);
6973 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6974 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6975 const0_rtx);
6976 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6978 else
6980 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6981 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6982 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6987 /* All other aggregate types are passed in an integer register in a mode
6988 corresponding to the size of the type. */
6989 else if (type && AGGREGATE_TYPE_P (type))
6991 HOST_WIDE_INT size = int_size_in_bytes (type);
6992 gcc_assert (size <= 16);
6994 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6997 return gen_rtx_REG (mode, regno);
7000 /* Handle the TARGET_FUNCTION_ARG target hook. */
7002 static rtx
7003 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
7004 const_tree type, bool named)
7006 return sparc_function_arg_1 (cum, mode, type, named, false);
7009 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7011 static rtx
7012 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
7013 const_tree type, bool named)
7015 return sparc_function_arg_1 (cum, mode, type, named, true);
7018 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7020 static unsigned int
7021 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7023 return ((TARGET_ARCH64
7024 && (GET_MODE_ALIGNMENT (mode) == 128
7025 || (type && TYPE_ALIGN (type) == 128)))
7026 ? 128
7027 : PARM_BOUNDARY);
7030 /* For an arg passed partly in registers and partly in memory,
7031 this is the number of bytes of registers used.
7032 For args passed entirely in registers or entirely in memory, zero.
7034 Any arg that starts in the first 6 regs but won't entirely fit in them
7035 needs partial registers on v8. On v9, structures with integer
7036 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7037 values that begin in the last fp reg [where "last fp reg" varies with the
7038 mode] will be split between that reg and memory. */
7040 static int
7041 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7042 tree type, bool named)
7044 int slotno, regno, padding;
7046 /* We pass false for incoming_p here, it doesn't matter. */
7047 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7048 false, &regno, &padding);
7050 if (slotno == -1)
7051 return 0;
7053 if (TARGET_ARCH32)
7055 if ((slotno + (mode == BLKmode
7056 ? ROUND_ADVANCE (int_size_in_bytes (type))
7057 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
7058 > SPARC_INT_ARG_MAX)
7059 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7061 else
7063 /* We are guaranteed by pass_by_reference that the size of the
7064 argument is not greater than 16 bytes, so we only need to return
7065 one word if the argument is partially passed in registers. */
7067 if (type && AGGREGATE_TYPE_P (type))
7069 int size = int_size_in_bytes (type);
7071 if (size > UNITS_PER_WORD
7072 && slotno == SPARC_INT_ARG_MAX - 1)
7073 return UNITS_PER_WORD;
7075 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7076 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7077 && ! (TARGET_FPU && named)))
7079 /* The complex types are passed as packed types. */
7080 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7081 && slotno == SPARC_INT_ARG_MAX - 1)
7082 return UNITS_PER_WORD;
7084 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7086 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7087 > SPARC_FP_ARG_MAX)
7088 return UNITS_PER_WORD;
7092 return 0;
7095 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7096 Specify whether to pass the argument by reference. */
7098 static bool
7099 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7100 machine_mode mode, const_tree type,
7101 bool named ATTRIBUTE_UNUSED)
7103 if (TARGET_ARCH32)
7104 /* Original SPARC 32-bit ABI says that structures and unions,
7105 and quad-precision floats are passed by reference. For Pascal,
7106 also pass arrays by reference. All other base types are passed
7107 in registers.
7109 Extended ABI (as implemented by the Sun compiler) says that all
7110 complex floats are passed by reference. Pass complex integers
7111 in registers up to 8 bytes. More generally, enforce the 2-word
7112 cap for passing arguments in registers.
7114 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7115 integers are passed like floats of the same size, that is in
7116 registers up to 8 bytes. Pass all vector floats by reference
7117 like structure and unions. */
7118 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7119 || mode == SCmode
7120 /* Catch CDImode, TFmode, DCmode and TCmode. */
7121 || GET_MODE_SIZE (mode) > 8
7122 || (type
7123 && TREE_CODE (type) == VECTOR_TYPE
7124 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7125 else
7126 /* Original SPARC 64-bit ABI says that structures and unions
7127 smaller than 16 bytes are passed in registers, as well as
7128 all other base types.
7130 Extended ABI (as implemented by the Sun compiler) says that
7131 complex floats are passed in registers up to 16 bytes. Pass
7132 all complex integers in registers up to 16 bytes. More generally,
7133 enforce the 2-word cap for passing arguments in registers.
7135 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7136 integers are passed like floats of the same size, that is in
7137 registers (up to 16 bytes). Pass all vector floats like structure
7138 and unions. */
7139 return ((type
7140 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7141 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7142 /* Catch CTImode and TCmode. */
7143 || GET_MODE_SIZE (mode) > 16);
7146 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7147 Update the data in CUM to advance over an argument
7148 of mode MODE and data type TYPE.
7149 TYPE is null for libcalls where that information may not be available. */
7151 static void
7152 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7153 const_tree type, bool named)
7155 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7156 int regno, padding;
7158 /* We pass false for incoming_p here, it doesn't matter. */
7159 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7161 /* If argument requires leading padding, add it. */
7162 cum->words += padding;
7164 if (TARGET_ARCH32)
7166 cum->words += (mode != BLKmode
7167 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7168 : ROUND_ADVANCE (int_size_in_bytes (type)));
7170 else
7172 if (type && AGGREGATE_TYPE_P (type))
7174 int size = int_size_in_bytes (type);
7176 if (size <= 8)
7177 ++cum->words;
7178 else if (size <= 16)
7179 cum->words += 2;
7180 else /* passed by reference */
7181 ++cum->words;
7183 else
7185 cum->words += (mode != BLKmode
7186 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7187 : ROUND_ADVANCE (int_size_in_bytes (type)));
7192 /* Handle the FUNCTION_ARG_PADDING macro.
7193 For the 64 bit ABI structs are always stored left shifted in their
7194 argument slot. */
7196 enum direction
7197 function_arg_padding (machine_mode mode, const_tree type)
7199 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
7200 return upward;
7202 /* Fall back to the default. */
7203 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7206 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7207 Specify whether to return the return value in memory. */
7209 static bool
7210 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7212 if (TARGET_ARCH32)
7213 /* Original SPARC 32-bit ABI says that structures and unions,
7214 and quad-precision floats are returned in memory. All other
7215 base types are returned in registers.
7217 Extended ABI (as implemented by the Sun compiler) says that
7218 all complex floats are returned in registers (8 FP registers
7219 at most for '_Complex long double'). Return all complex integers
7220 in registers (4 at most for '_Complex long long').
7222 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7223 integers are returned like floats of the same size, that is in
7224 registers up to 8 bytes and in memory otherwise. Return all
7225 vector floats in memory like structure and unions; note that
7226 they always have BLKmode like the latter. */
7227 return (TYPE_MODE (type) == BLKmode
7228 || TYPE_MODE (type) == TFmode
7229 || (TREE_CODE (type) == VECTOR_TYPE
7230 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7231 else
7232 /* Original SPARC 64-bit ABI says that structures and unions
7233 smaller than 32 bytes are returned in registers, as well as
7234 all other base types.
7236 Extended ABI (as implemented by the Sun compiler) says that all
7237 complex floats are returned in registers (8 FP registers at most
7238 for '_Complex long double'). Return all complex integers in
7239 registers (4 at most for '_Complex TItype').
7241 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7242 integers are returned like floats of the same size, that is in
7243 registers. Return all vector floats like structure and unions;
7244 note that they always have BLKmode like the latter. */
7245 return (TYPE_MODE (type) == BLKmode
7246 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7249 /* Handle the TARGET_STRUCT_VALUE target hook.
7250 Return where to find the structure return value address. */
7252 static rtx
7253 sparc_struct_value_rtx (tree fndecl, int incoming)
7255 if (TARGET_ARCH64)
7256 return 0;
7257 else
7259 rtx mem;
7261 if (incoming)
7262 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7263 STRUCT_VALUE_OFFSET));
7264 else
7265 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7266 STRUCT_VALUE_OFFSET));
7268 /* Only follow the SPARC ABI for fixed-size structure returns.
7269 Variable size structure returns are handled per the normal
7270 procedures in GCC. This is enabled by -mstd-struct-return */
7271 if (incoming == 2
7272 && sparc_std_struct_return
7273 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7274 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7276 /* We must check and adjust the return address, as it is
7277 optional as to whether the return object is really
7278 provided. */
7279 rtx ret_reg = gen_rtx_REG (Pmode, 31);
7280 rtx scratch = gen_reg_rtx (SImode);
7281 rtx_code_label *endlab = gen_label_rtx ();
7283 /* Calculate the return object size */
7284 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7285 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7286 /* Construct a temporary return value */
7287 rtx temp_val
7288 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7290 /* Implement SPARC 32-bit psABI callee return struct checking:
7292 Fetch the instruction where we will return to and see if
7293 it's an unimp instruction (the most significant 10 bits
7294 will be zero). */
7295 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7296 plus_constant (Pmode,
7297 ret_reg, 8)));
7298 /* Assume the size is valid and pre-adjust */
7299 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7300 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7301 0, endlab);
7302 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7303 /* Write the address of the memory pointed to by temp_val into
7304 the memory pointed to by mem */
7305 emit_move_insn (mem, XEXP (temp_val, 0));
7306 emit_label (endlab);
7309 return mem;
7313 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7314 For v9, function return values are subject to the same rules as arguments,
7315 except that up to 32 bytes may be returned in registers. */
7317 static rtx
7318 sparc_function_value_1 (const_tree type, machine_mode mode,
7319 bool outgoing)
7321 /* Beware that the two values are swapped here wrt function_arg. */
7322 int regbase = (outgoing
7323 ? SPARC_INCOMING_INT_ARG_FIRST
7324 : SPARC_OUTGOING_INT_ARG_FIRST);
7325 enum mode_class mclass = GET_MODE_CLASS (mode);
7326 int regno;
7328 /* Vector types deserve special treatment because they are polymorphic wrt
7329 their mode, depending upon whether VIS instructions are enabled. */
7330 if (type && TREE_CODE (type) == VECTOR_TYPE)
7332 HOST_WIDE_INT size = int_size_in_bytes (type);
7333 gcc_assert ((TARGET_ARCH32 && size <= 8)
7334 || (TARGET_ARCH64 && size <= 32));
7336 if (mode == BLKmode)
7337 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7339 mclass = MODE_FLOAT;
7342 if (TARGET_ARCH64 && type)
7344 /* Structures up to 32 bytes in size are returned in registers. */
7345 if (TREE_CODE (type) == RECORD_TYPE)
7347 HOST_WIDE_INT size = int_size_in_bytes (type);
7348 gcc_assert (size <= 32);
7350 return function_arg_record_value (type, mode, 0, 1, regbase);
7353 /* Unions up to 32 bytes in size are returned in integer registers. */
7354 else if (TREE_CODE (type) == UNION_TYPE)
7356 HOST_WIDE_INT size = int_size_in_bytes (type);
7357 gcc_assert (size <= 32);
7359 return function_arg_union_value (size, mode, 0, regbase);
7362 /* Objects that require it are returned in FP registers. */
7363 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7366 /* All other aggregate types are returned in an integer register in a
7367 mode corresponding to the size of the type. */
7368 else if (AGGREGATE_TYPE_P (type))
7370 /* All other aggregate types are passed in an integer register
7371 in a mode corresponding to the size of the type. */
7372 HOST_WIDE_INT size = int_size_in_bytes (type);
7373 gcc_assert (size <= 32);
7375 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7377 /* ??? We probably should have made the same ABI change in
7378 3.4.0 as the one we made for unions. The latter was
7379 required by the SCD though, while the former is not
7380 specified, so we favored compatibility and efficiency.
7382 Now we're stuck for aggregates larger than 16 bytes,
7383 because OImode vanished in the meantime. Let's not
7384 try to be unduly clever, and simply follow the ABI
7385 for unions in that case. */
7386 if (mode == BLKmode)
7387 return function_arg_union_value (size, mode, 0, regbase);
7388 else
7389 mclass = MODE_INT;
7392 /* We should only have pointer and integer types at this point. This
7393 must match sparc_promote_function_mode. */
7394 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7395 mode = word_mode;
7398 /* We should only have pointer and integer types at this point. This must
7399 match sparc_promote_function_mode. */
7400 else if (TARGET_ARCH32
7401 && mclass == MODE_INT
7402 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7403 mode = word_mode;
7405 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7406 regno = SPARC_FP_ARG_FIRST;
7407 else
7408 regno = regbase;
7410 return gen_rtx_REG (mode, regno);
7413 /* Handle TARGET_FUNCTION_VALUE.
7414 On the SPARC, the value is found in the first "output" register, but the
7415 called function leaves it in the first "input" register. */
7417 static rtx
7418 sparc_function_value (const_tree valtype,
7419 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7420 bool outgoing)
7422 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7425 /* Handle TARGET_LIBCALL_VALUE. */
7427 static rtx
7428 sparc_libcall_value (machine_mode mode,
7429 const_rtx fun ATTRIBUTE_UNUSED)
7431 return sparc_function_value_1 (NULL_TREE, mode, false);
7434 /* Handle FUNCTION_VALUE_REGNO_P.
7435 On the SPARC, the first "output" reg is used for integer values, and the
7436 first floating point register is used for floating point values. */
7438 static bool
7439 sparc_function_value_regno_p (const unsigned int regno)
7441 return (regno == 8 || regno == 32);
7444 /* Do what is necessary for `va_start'. We look at the current function
7445 to determine if stdarg or varargs is used and return the address of
7446 the first unnamed parameter. */
7448 static rtx
7449 sparc_builtin_saveregs (void)
7451 int first_reg = crtl->args.info.words;
7452 rtx address;
7453 int regno;
7455 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7456 emit_move_insn (gen_rtx_MEM (word_mode,
7457 gen_rtx_PLUS (Pmode,
7458 frame_pointer_rtx,
7459 GEN_INT (FIRST_PARM_OFFSET (0)
7460 + (UNITS_PER_WORD
7461 * regno)))),
7462 gen_rtx_REG (word_mode,
7463 SPARC_INCOMING_INT_ARG_FIRST + regno));
7465 address = gen_rtx_PLUS (Pmode,
7466 frame_pointer_rtx,
7467 GEN_INT (FIRST_PARM_OFFSET (0)
7468 + UNITS_PER_WORD * first_reg));
7470 return address;
7473 /* Implement `va_start' for stdarg. */
7475 static void
7476 sparc_va_start (tree valist, rtx nextarg)
7478 nextarg = expand_builtin_saveregs ();
7479 std_expand_builtin_va_start (valist, nextarg);
7482 /* Implement `va_arg' for stdarg. */
7484 static tree
7485 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7486 gimple_seq *post_p)
7488 HOST_WIDE_INT size, rsize, align;
7489 tree addr, incr;
7490 bool indirect;
7491 tree ptrtype = build_pointer_type (type);
7493 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7495 indirect = true;
7496 size = rsize = UNITS_PER_WORD;
7497 align = 0;
7499 else
7501 indirect = false;
7502 size = int_size_in_bytes (type);
7503 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7504 align = 0;
7506 if (TARGET_ARCH64)
7508 /* For SPARC64, objects requiring 16-byte alignment get it. */
7509 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7510 align = 2 * UNITS_PER_WORD;
7512 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7513 are left-justified in their slots. */
7514 if (AGGREGATE_TYPE_P (type))
7516 if (size == 0)
7517 size = rsize = UNITS_PER_WORD;
7518 else
7519 size = rsize;
7524 incr = valist;
7525 if (align)
7527 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7528 incr = fold_convert (sizetype, incr);
7529 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7530 size_int (-align));
7531 incr = fold_convert (ptr_type_node, incr);
7534 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7535 addr = incr;
7537 if (BYTES_BIG_ENDIAN && size < rsize)
7538 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7540 if (indirect)
7542 addr = fold_convert (build_pointer_type (ptrtype), addr);
7543 addr = build_va_arg_indirect_ref (addr);
7546 /* If the address isn't aligned properly for the type, we need a temporary.
7547 FIXME: This is inefficient, usually we can do this in registers. */
7548 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7550 tree tmp = create_tmp_var (type, "va_arg_tmp");
7551 tree dest_addr = build_fold_addr_expr (tmp);
7552 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7553 3, dest_addr, addr, size_int (rsize));
7554 TREE_ADDRESSABLE (tmp) = 1;
7555 gimplify_and_add (copy, pre_p);
7556 addr = dest_addr;
7559 else
7560 addr = fold_convert (ptrtype, addr);
7562 incr = fold_build_pointer_plus_hwi (incr, rsize);
7563 gimplify_assign (valist, incr, post_p);
7565 return build_va_arg_indirect_ref (addr);
7568 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7569 Specify whether the vector mode is supported by the hardware. */
7571 static bool
7572 sparc_vector_mode_supported_p (machine_mode mode)
7574 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7577 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7579 static machine_mode
7580 sparc_preferred_simd_mode (machine_mode mode)
7582 if (TARGET_VIS)
7583 switch (mode)
7585 case SImode:
7586 return V2SImode;
7587 case HImode:
7588 return V4HImode;
7589 case QImode:
7590 return V8QImode;
7592 default:;
7595 return word_mode;
7598 /* Return the string to output an unconditional branch to LABEL, which is
7599 the operand number of the label.
7601 DEST is the destination insn (i.e. the label), INSN is the source. */
7603 const char *
7604 output_ubranch (rtx dest, rtx_insn *insn)
7606 static char string[64];
7607 bool v9_form = false;
7608 int delta;
7609 char *p;
7611 /* Even if we are trying to use cbcond for this, evaluate
7612 whether we can use V9 branches as our backup plan. */
7614 delta = 5000000;
7615 if (INSN_ADDRESSES_SET_P ())
7616 delta = (INSN_ADDRESSES (INSN_UID (dest))
7617 - INSN_ADDRESSES (INSN_UID (insn)));
7619 /* Leave some instructions for "slop". */
7620 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7621 v9_form = true;
7623 if (TARGET_CBCOND)
7625 bool emit_nop = emit_cbcond_nop (insn);
7626 bool far = false;
7627 const char *rval;
7629 if (delta < -500 || delta > 500)
7630 far = true;
7632 if (far)
7634 if (v9_form)
7635 rval = "ba,a,pt\t%%xcc, %l0";
7636 else
7637 rval = "b,a\t%l0";
7639 else
7641 if (emit_nop)
7642 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7643 else
7644 rval = "cwbe\t%%g0, %%g0, %l0";
7646 return rval;
7649 if (v9_form)
7650 strcpy (string, "ba%*,pt\t%%xcc, ");
7651 else
7652 strcpy (string, "b%*\t");
7654 p = strchr (string, '\0');
7655 *p++ = '%';
7656 *p++ = 'l';
7657 *p++ = '0';
7658 *p++ = '%';
7659 *p++ = '(';
7660 *p = '\0';
7662 return string;
7665 /* Return the string to output a conditional branch to LABEL, which is
7666 the operand number of the label. OP is the conditional expression.
7667 XEXP (OP, 0) is assumed to be a condition code register (integer or
7668 floating point) and its mode specifies what kind of comparison we made.
7670 DEST is the destination insn (i.e. the label), INSN is the source.
7672 REVERSED is nonzero if we should reverse the sense of the comparison.
7674 ANNUL is nonzero if we should generate an annulling branch. */
7676 const char *
7677 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7678 rtx_insn *insn)
7680 static char string[64];
7681 enum rtx_code code = GET_CODE (op);
7682 rtx cc_reg = XEXP (op, 0);
7683 machine_mode mode = GET_MODE (cc_reg);
7684 const char *labelno, *branch;
7685 int spaces = 8, far;
7686 char *p;
7688 /* v9 branches are limited to +-1MB. If it is too far away,
7689 change
7691 bne,pt %xcc, .LC30
7695 be,pn %xcc, .+12
7697 ba .LC30
7701 fbne,a,pn %fcc2, .LC29
7705 fbe,pt %fcc2, .+16
7707 ba .LC29 */
7709 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7710 if (reversed ^ far)
7712 /* Reversal of FP compares takes care -- an ordered compare
7713 becomes an unordered compare and vice versa. */
7714 if (mode == CCFPmode || mode == CCFPEmode)
7715 code = reverse_condition_maybe_unordered (code);
7716 else
7717 code = reverse_condition (code);
7720 /* Start by writing the branch condition. */
7721 if (mode == CCFPmode || mode == CCFPEmode)
7723 switch (code)
7725 case NE:
7726 branch = "fbne";
7727 break;
7728 case EQ:
7729 branch = "fbe";
7730 break;
7731 case GE:
7732 branch = "fbge";
7733 break;
7734 case GT:
7735 branch = "fbg";
7736 break;
7737 case LE:
7738 branch = "fble";
7739 break;
7740 case LT:
7741 branch = "fbl";
7742 break;
7743 case UNORDERED:
7744 branch = "fbu";
7745 break;
7746 case ORDERED:
7747 branch = "fbo";
7748 break;
7749 case UNGT:
7750 branch = "fbug";
7751 break;
7752 case UNLT:
7753 branch = "fbul";
7754 break;
7755 case UNEQ:
7756 branch = "fbue";
7757 break;
7758 case UNGE:
7759 branch = "fbuge";
7760 break;
7761 case UNLE:
7762 branch = "fbule";
7763 break;
7764 case LTGT:
7765 branch = "fblg";
7766 break;
7768 default:
7769 gcc_unreachable ();
7772 /* ??? !v9: FP branches cannot be preceded by another floating point
7773 insn. Because there is currently no concept of pre-delay slots,
7774 we can fix this only by always emitting a nop before a floating
7775 point branch. */
7777 string[0] = '\0';
7778 if (! TARGET_V9)
7779 strcpy (string, "nop\n\t");
7780 strcat (string, branch);
7782 else
7784 switch (code)
7786 case NE:
7787 branch = "bne";
7788 break;
7789 case EQ:
7790 branch = "be";
7791 break;
7792 case GE:
7793 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7794 branch = "bpos";
7795 else
7796 branch = "bge";
7797 break;
7798 case GT:
7799 branch = "bg";
7800 break;
7801 case LE:
7802 branch = "ble";
7803 break;
7804 case LT:
7805 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7806 branch = "bneg";
7807 else
7808 branch = "bl";
7809 break;
7810 case GEU:
7811 branch = "bgeu";
7812 break;
7813 case GTU:
7814 branch = "bgu";
7815 break;
7816 case LEU:
7817 branch = "bleu";
7818 break;
7819 case LTU:
7820 branch = "blu";
7821 break;
7823 default:
7824 gcc_unreachable ();
7826 strcpy (string, branch);
7828 spaces -= strlen (branch);
7829 p = strchr (string, '\0');
7831 /* Now add the annulling, the label, and a possible noop. */
7832 if (annul && ! far)
7834 strcpy (p, ",a");
7835 p += 2;
7836 spaces -= 2;
7839 if (TARGET_V9)
7841 rtx note;
7842 int v8 = 0;
7844 if (! far && insn && INSN_ADDRESSES_SET_P ())
7846 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7847 - INSN_ADDRESSES (INSN_UID (insn)));
7848 /* Leave some instructions for "slop". */
7849 if (delta < -260000 || delta >= 260000)
7850 v8 = 1;
7853 if (mode == CCFPmode || mode == CCFPEmode)
7855 static char v9_fcc_labelno[] = "%%fccX, ";
7856 /* Set the char indicating the number of the fcc reg to use. */
7857 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7858 labelno = v9_fcc_labelno;
7859 if (v8)
7861 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7862 labelno = "";
7865 else if (mode == CCXmode || mode == CCX_NOOVmode)
7867 labelno = "%%xcc, ";
7868 gcc_assert (! v8);
7870 else
7872 labelno = "%%icc, ";
7873 if (v8)
7874 labelno = "";
7877 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7879 strcpy (p,
7880 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
7881 ? ",pt" : ",pn");
7882 p += 3;
7883 spaces -= 3;
7886 else
7887 labelno = "";
7889 if (spaces > 0)
7890 *p++ = '\t';
7891 else
7892 *p++ = ' ';
7893 strcpy (p, labelno);
7894 p = strchr (p, '\0');
7895 if (far)
7897 strcpy (p, ".+12\n\t nop\n\tb\t");
7898 /* Skip the next insn if requested or
7899 if we know that it will be a nop. */
7900 if (annul || ! final_sequence)
7901 p[3] = '6';
7902 p += 14;
7904 *p++ = '%';
7905 *p++ = 'l';
7906 *p++ = label + '0';
7907 *p++ = '%';
7908 *p++ = '#';
7909 *p = '\0';
7911 return string;
7914 /* Emit a library call comparison between floating point X and Y.
7915 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7916 Return the new operator to be used in the comparison sequence.
7918 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7919 values as arguments instead of the TFmode registers themselves,
7920 that's why we cannot call emit_float_lib_cmp. */
7923 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7925 const char *qpfunc;
7926 rtx slot0, slot1, result, tem, tem2, libfunc;
7927 machine_mode mode;
7928 enum rtx_code new_comparison;
7930 switch (comparison)
7932 case EQ:
7933 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7934 break;
7936 case NE:
7937 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7938 break;
7940 case GT:
7941 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7942 break;
7944 case GE:
7945 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7946 break;
7948 case LT:
7949 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7950 break;
7952 case LE:
7953 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7954 break;
7956 case ORDERED:
7957 case UNORDERED:
7958 case UNGT:
7959 case UNLT:
7960 case UNEQ:
7961 case UNGE:
7962 case UNLE:
7963 case LTGT:
7964 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7965 break;
7967 default:
7968 gcc_unreachable ();
7971 if (TARGET_ARCH64)
7973 if (MEM_P (x))
7975 tree expr = MEM_EXPR (x);
7976 if (expr)
7977 mark_addressable (expr);
7978 slot0 = x;
7980 else
7982 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7983 emit_move_insn (slot0, x);
7986 if (MEM_P (y))
7988 tree expr = MEM_EXPR (y);
7989 if (expr)
7990 mark_addressable (expr);
7991 slot1 = y;
7993 else
7995 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7996 emit_move_insn (slot1, y);
7999 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8000 emit_library_call (libfunc, LCT_NORMAL,
8001 DImode, 2,
8002 XEXP (slot0, 0), Pmode,
8003 XEXP (slot1, 0), Pmode);
8004 mode = DImode;
8006 else
8008 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8009 emit_library_call (libfunc, LCT_NORMAL,
8010 SImode, 2,
8011 x, TFmode, y, TFmode);
8012 mode = SImode;
8016 /* Immediately move the result of the libcall into a pseudo
8017 register so reload doesn't clobber the value if it needs
8018 the return register for a spill reg. */
8019 result = gen_reg_rtx (mode);
8020 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8022 switch (comparison)
8024 default:
8025 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8026 case ORDERED:
8027 case UNORDERED:
8028 new_comparison = (comparison == UNORDERED ? EQ : NE);
8029 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8030 case UNGT:
8031 case UNGE:
8032 new_comparison = (comparison == UNGT ? GT : NE);
8033 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8034 case UNLE:
8035 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8036 case UNLT:
8037 tem = gen_reg_rtx (mode);
8038 if (TARGET_ARCH32)
8039 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8040 else
8041 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8042 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8043 case UNEQ:
8044 case LTGT:
8045 tem = gen_reg_rtx (mode);
8046 if (TARGET_ARCH32)
8047 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8048 else
8049 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8050 tem2 = gen_reg_rtx (mode);
8051 if (TARGET_ARCH32)
8052 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8053 else
8054 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8055 new_comparison = (comparison == UNEQ ? EQ : NE);
8056 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8059 gcc_unreachable ();
8062 /* Generate an unsigned DImode to FP conversion. This is the same code
8063 optabs would emit if we didn't have TFmode patterns. */
8065 void
8066 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8068 rtx i0, i1, f0, in, out;
8070 out = operands[0];
8071 in = force_reg (DImode, operands[1]);
8072 rtx_code_label *neglab = gen_label_rtx ();
8073 rtx_code_label *donelab = gen_label_rtx ();
8074 i0 = gen_reg_rtx (DImode);
8075 i1 = gen_reg_rtx (DImode);
8076 f0 = gen_reg_rtx (mode);
8078 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8080 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
8081 emit_jump_insn (gen_jump (donelab));
8082 emit_barrier ();
8084 emit_label (neglab);
8086 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8087 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8088 emit_insn (gen_iordi3 (i0, i0, i1));
8089 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
8090 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
8092 emit_label (donelab);
8095 /* Generate an FP to unsigned DImode conversion. This is the same code
8096 optabs would emit if we didn't have TFmode patterns. */
8098 void
8099 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8101 rtx i0, i1, f0, in, out, limit;
8103 out = operands[0];
8104 in = force_reg (mode, operands[1]);
8105 rtx_code_label *neglab = gen_label_rtx ();
8106 rtx_code_label *donelab = gen_label_rtx ();
8107 i0 = gen_reg_rtx (DImode);
8108 i1 = gen_reg_rtx (DImode);
8109 limit = gen_reg_rtx (mode);
8110 f0 = gen_reg_rtx (mode);
8112 emit_move_insn (limit,
8113 CONST_DOUBLE_FROM_REAL_VALUE (
8114 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8115 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8117 emit_insn (gen_rtx_SET (VOIDmode,
8118 out,
8119 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8120 emit_jump_insn (gen_jump (donelab));
8121 emit_barrier ();
8123 emit_label (neglab);
8125 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
8126 emit_insn (gen_rtx_SET (VOIDmode,
8128 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8129 emit_insn (gen_movdi (i1, const1_rtx));
8130 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8131 emit_insn (gen_xordi3 (out, i0, i1));
8133 emit_label (donelab);
8136 /* Return the string to output a compare and branch instruction to DEST.
8137 DEST is the destination insn (i.e. the label), INSN is the source,
8138 and OP is the conditional expression. */
8140 const char *
8141 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8143 machine_mode mode = GET_MODE (XEXP (op, 0));
8144 enum rtx_code code = GET_CODE (op);
8145 const char *cond_str, *tmpl;
8146 int far, emit_nop, len;
8147 static char string[64];
8148 char size_char;
8150 /* Compare and Branch is limited to +-2KB. If it is too far away,
8151 change
8153 cxbne X, Y, .LC30
8157 cxbe X, Y, .+16
8159 ba,pt xcc, .LC30
8160 nop */
8162 len = get_attr_length (insn);
8164 far = len == 4;
8165 emit_nop = len == 2;
8167 if (far)
8168 code = reverse_condition (code);
8170 size_char = ((mode == SImode) ? 'w' : 'x');
8172 switch (code)
8174 case NE:
8175 cond_str = "ne";
8176 break;
8178 case EQ:
8179 cond_str = "e";
8180 break;
8182 case GE:
8183 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8184 cond_str = "pos";
8185 else
8186 cond_str = "ge";
8187 break;
8189 case GT:
8190 cond_str = "g";
8191 break;
8193 case LE:
8194 cond_str = "le";
8195 break;
8197 case LT:
8198 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8199 cond_str = "neg";
8200 else
8201 cond_str = "l";
8202 break;
8204 case GEU:
8205 cond_str = "cc";
8206 break;
8208 case GTU:
8209 cond_str = "gu";
8210 break;
8212 case LEU:
8213 cond_str = "leu";
8214 break;
8216 case LTU:
8217 cond_str = "cs";
8218 break;
8220 default:
8221 gcc_unreachable ();
8224 if (far)
8226 int veryfar = 1, delta;
8228 if (INSN_ADDRESSES_SET_P ())
8230 delta = (INSN_ADDRESSES (INSN_UID (dest))
8231 - INSN_ADDRESSES (INSN_UID (insn)));
8232 /* Leave some instructions for "slop". */
8233 if (delta >= -260000 && delta < 260000)
8234 veryfar = 0;
8237 if (veryfar)
8238 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8239 else
8240 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8242 else
8244 if (emit_nop)
8245 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8246 else
8247 tmpl = "c%cb%s\t%%1, %%2, %%3";
8250 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8252 return string;
8255 /* Return the string to output a conditional branch to LABEL, testing
8256 register REG. LABEL is the operand number of the label; REG is the
8257 operand number of the reg. OP is the conditional expression. The mode
8258 of REG says what kind of comparison we made.
8260 DEST is the destination insn (i.e. the label), INSN is the source.
8262 REVERSED is nonzero if we should reverse the sense of the comparison.
8264 ANNUL is nonzero if we should generate an annulling branch. */
8266 const char *
8267 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8268 int annul, rtx_insn *insn)
8270 static char string[64];
8271 enum rtx_code code = GET_CODE (op);
8272 machine_mode mode = GET_MODE (XEXP (op, 0));
8273 rtx note;
8274 int far;
8275 char *p;
8277 /* branch on register are limited to +-128KB. If it is too far away,
8278 change
8280 brnz,pt %g1, .LC30
8284 brz,pn %g1, .+12
8286 ba,pt %xcc, .LC30
8290 brgez,a,pn %o1, .LC29
8294 brlz,pt %o1, .+16
8296 ba,pt %xcc, .LC29 */
8298 far = get_attr_length (insn) >= 3;
8300 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8301 if (reversed ^ far)
8302 code = reverse_condition (code);
8304 /* Only 64 bit versions of these instructions exist. */
8305 gcc_assert (mode == DImode);
8307 /* Start by writing the branch condition. */
8309 switch (code)
8311 case NE:
8312 strcpy (string, "brnz");
8313 break;
8315 case EQ:
8316 strcpy (string, "brz");
8317 break;
8319 case GE:
8320 strcpy (string, "brgez");
8321 break;
8323 case LT:
8324 strcpy (string, "brlz");
8325 break;
8327 case LE:
8328 strcpy (string, "brlez");
8329 break;
8331 case GT:
8332 strcpy (string, "brgz");
8333 break;
8335 default:
8336 gcc_unreachable ();
8339 p = strchr (string, '\0');
8341 /* Now add the annulling, reg, label, and nop. */
8342 if (annul && ! far)
8344 strcpy (p, ",a");
8345 p += 2;
8348 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8350 strcpy (p,
8351 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
8352 ? ",pt" : ",pn");
8353 p += 3;
8356 *p = p < string + 8 ? '\t' : ' ';
8357 p++;
8358 *p++ = '%';
8359 *p++ = '0' + reg;
8360 *p++ = ',';
8361 *p++ = ' ';
8362 if (far)
8364 int veryfar = 1, delta;
8366 if (INSN_ADDRESSES_SET_P ())
8368 delta = (INSN_ADDRESSES (INSN_UID (dest))
8369 - INSN_ADDRESSES (INSN_UID (insn)));
8370 /* Leave some instructions for "slop". */
8371 if (delta >= -260000 && delta < 260000)
8372 veryfar = 0;
8375 strcpy (p, ".+12\n\t nop\n\t");
8376 /* Skip the next insn if requested or
8377 if we know that it will be a nop. */
8378 if (annul || ! final_sequence)
8379 p[3] = '6';
8380 p += 12;
8381 if (veryfar)
8383 strcpy (p, "b\t");
8384 p += 2;
8386 else
8388 strcpy (p, "ba,pt\t%%xcc, ");
8389 p += 13;
8392 *p++ = '%';
8393 *p++ = 'l';
8394 *p++ = '0' + label;
8395 *p++ = '%';
8396 *p++ = '#';
8397 *p = '\0';
8399 return string;
8402 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8403 Such instructions cannot be used in the delay slot of return insn on v9.
8404 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8407 static int
8408 epilogue_renumber (register rtx *where, int test)
8410 register const char *fmt;
8411 register int i;
8412 register enum rtx_code code;
8414 if (*where == 0)
8415 return 0;
8417 code = GET_CODE (*where);
8419 switch (code)
8421 case REG:
8422 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8423 return 1;
8424 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8425 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8426 case SCRATCH:
8427 case CC0:
8428 case PC:
8429 case CONST_INT:
8430 case CONST_DOUBLE:
8431 return 0;
8433 /* Do not replace the frame pointer with the stack pointer because
8434 it can cause the delayed instruction to load below the stack.
8435 This occurs when instructions like:
8437 (set (reg/i:SI 24 %i0)
8438 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8439 (const_int -20 [0xffffffec])) 0))
8441 are in the return delayed slot. */
8442 case PLUS:
8443 if (GET_CODE (XEXP (*where, 0)) == REG
8444 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8445 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8446 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8447 return 1;
8448 break;
8450 case MEM:
8451 if (SPARC_STACK_BIAS
8452 && GET_CODE (XEXP (*where, 0)) == REG
8453 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8454 return 1;
8455 break;
8457 default:
8458 break;
8461 fmt = GET_RTX_FORMAT (code);
8463 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8465 if (fmt[i] == 'E')
8467 register int j;
8468 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8469 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8470 return 1;
8472 else if (fmt[i] == 'e'
8473 && epilogue_renumber (&(XEXP (*where, i)), test))
8474 return 1;
8476 return 0;
8479 /* Leaf functions and non-leaf functions have different needs. */
8481 static const int
8482 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8484 static const int
8485 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8487 static const int *const reg_alloc_orders[] = {
8488 reg_leaf_alloc_order,
8489 reg_nonleaf_alloc_order};
8491 void
8492 order_regs_for_local_alloc (void)
8494 static int last_order_nonleaf = 1;
8496 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8498 last_order_nonleaf = !last_order_nonleaf;
8499 memcpy ((char *) reg_alloc_order,
8500 (const char *) reg_alloc_orders[last_order_nonleaf],
8501 FIRST_PSEUDO_REGISTER * sizeof (int));
8505 /* Return 1 if REG and MEM are legitimate enough to allow the various
8506 mem<-->reg splits to be run. */
8509 sparc_splitdi_legitimate (rtx reg, rtx mem)
8511 /* Punt if we are here by mistake. */
8512 gcc_assert (reload_completed);
8514 /* We must have an offsettable memory reference. */
8515 if (! offsettable_memref_p (mem))
8516 return 0;
8518 /* If we have legitimate args for ldd/std, we do not want
8519 the split to happen. */
8520 if ((REGNO (reg) % 2) == 0
8521 && mem_min_alignment (mem, 8))
8522 return 0;
8524 /* Success. */
8525 return 1;
8528 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8531 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8533 int regno1, regno2;
8535 if (GET_CODE (reg1) == SUBREG)
8536 reg1 = SUBREG_REG (reg1);
8537 if (GET_CODE (reg1) != REG)
8538 return 0;
8539 regno1 = REGNO (reg1);
8541 if (GET_CODE (reg2) == SUBREG)
8542 reg2 = SUBREG_REG (reg2);
8543 if (GET_CODE (reg2) != REG)
8544 return 0;
8545 regno2 = REGNO (reg2);
8547 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8548 return 1;
8550 if (TARGET_VIS3)
8552 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8553 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8554 return 1;
8557 return 0;
8560 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8561 This makes them candidates for using ldd and std insns.
8563 Note reg1 and reg2 *must* be hard registers. */
8566 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8568 /* We might have been passed a SUBREG. */
8569 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8570 return 0;
8572 if (REGNO (reg1) % 2 != 0)
8573 return 0;
8575 /* Integer ldd is deprecated in SPARC V9 */
8576 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8577 return 0;
8579 return (REGNO (reg1) == REGNO (reg2) - 1);
8582 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8583 an ldd or std insn.
8585 This can only happen when addr1 and addr2, the addresses in mem1
8586 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8587 addr1 must also be aligned on a 64-bit boundary.
8589 Also iff dependent_reg_rtx is not null it should not be used to
8590 compute the address for mem1, i.e. we cannot optimize a sequence
8591 like:
8592 ld [%o0], %o0
8593 ld [%o0 + 4], %o1
8595 ldd [%o0], %o0
8596 nor:
8597 ld [%g3 + 4], %g3
8598 ld [%g3], %g2
8600 ldd [%g3], %g2
8602 But, note that the transformation from:
8603 ld [%g2 + 4], %g3
8604 ld [%g2], %g2
8606 ldd [%g2], %g2
8607 is perfectly fine. Thus, the peephole2 patterns always pass us
8608 the destination register of the first load, never the second one.
8610 For stores we don't have a similar problem, so dependent_reg_rtx is
8611 NULL_RTX. */
8614 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8616 rtx addr1, addr2;
8617 unsigned int reg1;
8618 HOST_WIDE_INT offset1;
8620 /* The mems cannot be volatile. */
8621 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8622 return 0;
8624 /* MEM1 should be aligned on a 64-bit boundary. */
8625 if (MEM_ALIGN (mem1) < 64)
8626 return 0;
8628 addr1 = XEXP (mem1, 0);
8629 addr2 = XEXP (mem2, 0);
8631 /* Extract a register number and offset (if used) from the first addr. */
8632 if (GET_CODE (addr1) == PLUS)
8634 /* If not a REG, return zero. */
8635 if (GET_CODE (XEXP (addr1, 0)) != REG)
8636 return 0;
8637 else
8639 reg1 = REGNO (XEXP (addr1, 0));
8640 /* The offset must be constant! */
8641 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8642 return 0;
8643 offset1 = INTVAL (XEXP (addr1, 1));
8646 else if (GET_CODE (addr1) != REG)
8647 return 0;
8648 else
8650 reg1 = REGNO (addr1);
8651 /* This was a simple (mem (reg)) expression. Offset is 0. */
8652 offset1 = 0;
8655 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8656 if (GET_CODE (addr2) != PLUS)
8657 return 0;
8659 if (GET_CODE (XEXP (addr2, 0)) != REG
8660 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8661 return 0;
8663 if (reg1 != REGNO (XEXP (addr2, 0)))
8664 return 0;
8666 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8667 return 0;
8669 /* The first offset must be evenly divisible by 8 to ensure the
8670 address is 64 bit aligned. */
8671 if (offset1 % 8 != 0)
8672 return 0;
8674 /* The offset for the second addr must be 4 more than the first addr. */
8675 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8676 return 0;
8678 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8679 instructions. */
8680 return 1;
8683 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8686 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
8688 rtx x = widen_memory_access (mem1, mode, 0);
8689 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8690 return x;
8693 /* Return 1 if reg is a pseudo, or is the first register in
8694 a hard register pair. This makes it suitable for use in
8695 ldd and std insns. */
8698 register_ok_for_ldd (rtx reg)
8700 /* We might have been passed a SUBREG. */
8701 if (!REG_P (reg))
8702 return 0;
8704 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8705 return (REGNO (reg) % 2 == 0);
8707 return 1;
8710 /* Return 1 if OP, a MEM, has an address which is known to be
8711 aligned to an 8-byte boundary. */
8714 memory_ok_for_ldd (rtx op)
8716 /* In 64-bit mode, we assume that the address is word-aligned. */
8717 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8718 return 0;
8720 if (! can_create_pseudo_p ()
8721 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8722 return 0;
8724 return 1;
8727 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8729 static bool
8730 sparc_print_operand_punct_valid_p (unsigned char code)
8732 if (code == '#'
8733 || code == '*'
8734 || code == '('
8735 || code == ')'
8736 || code == '_'
8737 || code == '&')
8738 return true;
8740 return false;
8743 /* Implement TARGET_PRINT_OPERAND.
8744 Print operand X (an rtx) in assembler syntax to file FILE.
8745 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8746 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8748 static void
8749 sparc_print_operand (FILE *file, rtx x, int code)
8751 switch (code)
8753 case '#':
8754 /* Output an insn in a delay slot. */
8755 if (final_sequence)
8756 sparc_indent_opcode = 1;
8757 else
8758 fputs ("\n\t nop", file);
8759 return;
8760 case '*':
8761 /* Output an annul flag if there's nothing for the delay slot and we
8762 are optimizing. This is always used with '(' below.
8763 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8764 this is a dbx bug. So, we only do this when optimizing.
8765 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8766 Always emit a nop in case the next instruction is a branch. */
8767 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8768 fputs (",a", file);
8769 return;
8770 case '(':
8771 /* Output a 'nop' if there's nothing for the delay slot and we are
8772 not optimizing. This is always used with '*' above. */
8773 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8774 fputs ("\n\t nop", file);
8775 else if (final_sequence)
8776 sparc_indent_opcode = 1;
8777 return;
8778 case ')':
8779 /* Output the right displacement from the saved PC on function return.
8780 The caller may have placed an "unimp" insn immediately after the call
8781 so we have to account for it. This insn is used in the 32-bit ABI
8782 when calling a function that returns a non zero-sized structure. The
8783 64-bit ABI doesn't have it. Be careful to have this test be the same
8784 as that for the call. The exception is when sparc_std_struct_return
8785 is enabled, the psABI is followed exactly and the adjustment is made
8786 by the code in sparc_struct_value_rtx. The call emitted is the same
8787 when sparc_std_struct_return is enabled. */
8788 if (!TARGET_ARCH64
8789 && cfun->returns_struct
8790 && !sparc_std_struct_return
8791 && DECL_SIZE (DECL_RESULT (current_function_decl))
8792 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8793 == INTEGER_CST
8794 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8795 fputs ("12", file);
8796 else
8797 fputc ('8', file);
8798 return;
8799 case '_':
8800 /* Output the Embedded Medium/Anywhere code model base register. */
8801 fputs (EMBMEDANY_BASE_REG, file);
8802 return;
8803 case '&':
8804 /* Print some local dynamic TLS name. */
8805 if (const char *name = get_some_local_dynamic_name ())
8806 assemble_name (file, name);
8807 else
8808 output_operand_lossage ("'%%&' used without any "
8809 "local dynamic TLS references");
8810 return;
8812 case 'Y':
8813 /* Adjust the operand to take into account a RESTORE operation. */
8814 if (GET_CODE (x) == CONST_INT)
8815 break;
8816 else if (GET_CODE (x) != REG)
8817 output_operand_lossage ("invalid %%Y operand");
8818 else if (REGNO (x) < 8)
8819 fputs (reg_names[REGNO (x)], file);
8820 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8821 fputs (reg_names[REGNO (x)-16], file);
8822 else
8823 output_operand_lossage ("invalid %%Y operand");
8824 return;
8825 case 'L':
8826 /* Print out the low order register name of a register pair. */
8827 if (WORDS_BIG_ENDIAN)
8828 fputs (reg_names[REGNO (x)+1], file);
8829 else
8830 fputs (reg_names[REGNO (x)], file);
8831 return;
8832 case 'H':
8833 /* Print out the high order register name of a register pair. */
8834 if (WORDS_BIG_ENDIAN)
8835 fputs (reg_names[REGNO (x)], file);
8836 else
8837 fputs (reg_names[REGNO (x)+1], file);
8838 return;
8839 case 'R':
8840 /* Print out the second register name of a register pair or quad.
8841 I.e., R (%o0) => %o1. */
8842 fputs (reg_names[REGNO (x)+1], file);
8843 return;
8844 case 'S':
8845 /* Print out the third register name of a register quad.
8846 I.e., S (%o0) => %o2. */
8847 fputs (reg_names[REGNO (x)+2], file);
8848 return;
8849 case 'T':
8850 /* Print out the fourth register name of a register quad.
8851 I.e., T (%o0) => %o3. */
8852 fputs (reg_names[REGNO (x)+3], file);
8853 return;
8854 case 'x':
8855 /* Print a condition code register. */
8856 if (REGNO (x) == SPARC_ICC_REG)
8858 /* We don't handle CC[X]_NOOVmode because they're not supposed
8859 to occur here. */
8860 if (GET_MODE (x) == CCmode)
8861 fputs ("%icc", file);
8862 else if (GET_MODE (x) == CCXmode)
8863 fputs ("%xcc", file);
8864 else
8865 gcc_unreachable ();
8867 else
8868 /* %fccN register */
8869 fputs (reg_names[REGNO (x)], file);
8870 return;
8871 case 'm':
8872 /* Print the operand's address only. */
8873 output_address (XEXP (x, 0));
8874 return;
8875 case 'r':
8876 /* In this case we need a register. Use %g0 if the
8877 operand is const0_rtx. */
8878 if (x == const0_rtx
8879 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8881 fputs ("%g0", file);
8882 return;
8884 else
8885 break;
8887 case 'A':
8888 switch (GET_CODE (x))
8890 case IOR: fputs ("or", file); break;
8891 case AND: fputs ("and", file); break;
8892 case XOR: fputs ("xor", file); break;
8893 default: output_operand_lossage ("invalid %%A operand");
8895 return;
8897 case 'B':
8898 switch (GET_CODE (x))
8900 case IOR: fputs ("orn", file); break;
8901 case AND: fputs ("andn", file); break;
8902 case XOR: fputs ("xnor", file); break;
8903 default: output_operand_lossage ("invalid %%B operand");
8905 return;
8907 /* This is used by the conditional move instructions. */
8908 case 'C':
8910 enum rtx_code rc = GET_CODE (x);
8912 switch (rc)
8914 case NE: fputs ("ne", file); break;
8915 case EQ: fputs ("e", file); break;
8916 case GE: fputs ("ge", file); break;
8917 case GT: fputs ("g", file); break;
8918 case LE: fputs ("le", file); break;
8919 case LT: fputs ("l", file); break;
8920 case GEU: fputs ("geu", file); break;
8921 case GTU: fputs ("gu", file); break;
8922 case LEU: fputs ("leu", file); break;
8923 case LTU: fputs ("lu", file); break;
8924 case LTGT: fputs ("lg", file); break;
8925 case UNORDERED: fputs ("u", file); break;
8926 case ORDERED: fputs ("o", file); break;
8927 case UNLT: fputs ("ul", file); break;
8928 case UNLE: fputs ("ule", file); break;
8929 case UNGT: fputs ("ug", file); break;
8930 case UNGE: fputs ("uge", file); break;
8931 case UNEQ: fputs ("ue", file); break;
8932 default: output_operand_lossage ("invalid %%C operand");
8934 return;
8937 /* This are used by the movr instruction pattern. */
8938 case 'D':
8940 enum rtx_code rc = GET_CODE (x);
8941 switch (rc)
8943 case NE: fputs ("ne", file); break;
8944 case EQ: fputs ("e", file); break;
8945 case GE: fputs ("gez", file); break;
8946 case LT: fputs ("lz", file); break;
8947 case LE: fputs ("lez", file); break;
8948 case GT: fputs ("gz", file); break;
8949 default: output_operand_lossage ("invalid %%D operand");
8951 return;
8954 case 'b':
8956 /* Print a sign-extended character. */
8957 int i = trunc_int_for_mode (INTVAL (x), QImode);
8958 fprintf (file, "%d", i);
8959 return;
8962 case 'f':
8963 /* Operand must be a MEM; write its address. */
8964 if (GET_CODE (x) != MEM)
8965 output_operand_lossage ("invalid %%f operand");
8966 output_address (XEXP (x, 0));
8967 return;
8969 case 's':
8971 /* Print a sign-extended 32-bit value. */
8972 HOST_WIDE_INT i;
8973 if (GET_CODE(x) == CONST_INT)
8974 i = INTVAL (x);
8975 else if (GET_CODE(x) == CONST_DOUBLE)
8976 i = CONST_DOUBLE_LOW (x);
8977 else
8979 output_operand_lossage ("invalid %%s operand");
8980 return;
8982 i = trunc_int_for_mode (i, SImode);
8983 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
8984 return;
8987 case 0:
8988 /* Do nothing special. */
8989 break;
8991 default:
8992 /* Undocumented flag. */
8993 output_operand_lossage ("invalid operand output code");
8996 if (GET_CODE (x) == REG)
8997 fputs (reg_names[REGNO (x)], file);
8998 else if (GET_CODE (x) == MEM)
9000 fputc ('[', file);
9001 /* Poor Sun assembler doesn't understand absolute addressing. */
9002 if (CONSTANT_P (XEXP (x, 0)))
9003 fputs ("%g0+", file);
9004 output_address (XEXP (x, 0));
9005 fputc (']', file);
9007 else if (GET_CODE (x) == HIGH)
9009 fputs ("%hi(", file);
9010 output_addr_const (file, XEXP (x, 0));
9011 fputc (')', file);
9013 else if (GET_CODE (x) == LO_SUM)
9015 sparc_print_operand (file, XEXP (x, 0), 0);
9016 if (TARGET_CM_MEDMID)
9017 fputs ("+%l44(", file);
9018 else
9019 fputs ("+%lo(", file);
9020 output_addr_const (file, XEXP (x, 1));
9021 fputc (')', file);
9023 else if (GET_CODE (x) == CONST_DOUBLE
9024 && (GET_MODE (x) == VOIDmode
9025 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
9027 if (CONST_DOUBLE_HIGH (x) == 0)
9028 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
9029 else if (CONST_DOUBLE_HIGH (x) == -1
9030 && CONST_DOUBLE_LOW (x) < 0)
9031 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
9032 else
9033 output_operand_lossage ("long long constant not a valid immediate operand");
9035 else if (GET_CODE (x) == CONST_DOUBLE)
9036 output_operand_lossage ("floating point constant not a valid immediate operand");
9037 else { output_addr_const (file, x); }
9040 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9042 static void
9043 sparc_print_operand_address (FILE *file, rtx x)
9045 register rtx base, index = 0;
9046 int offset = 0;
9047 register rtx addr = x;
9049 if (REG_P (addr))
9050 fputs (reg_names[REGNO (addr)], file);
9051 else if (GET_CODE (addr) == PLUS)
9053 if (CONST_INT_P (XEXP (addr, 0)))
9054 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9055 else if (CONST_INT_P (XEXP (addr, 1)))
9056 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9057 else
9058 base = XEXP (addr, 0), index = XEXP (addr, 1);
9059 if (GET_CODE (base) == LO_SUM)
9061 gcc_assert (USE_AS_OFFSETABLE_LO10
9062 && TARGET_ARCH64
9063 && ! TARGET_CM_MEDMID);
9064 output_operand (XEXP (base, 0), 0);
9065 fputs ("+%lo(", file);
9066 output_address (XEXP (base, 1));
9067 fprintf (file, ")+%d", offset);
9069 else
9071 fputs (reg_names[REGNO (base)], file);
9072 if (index == 0)
9073 fprintf (file, "%+d", offset);
9074 else if (REG_P (index))
9075 fprintf (file, "+%s", reg_names[REGNO (index)]);
9076 else if (GET_CODE (index) == SYMBOL_REF
9077 || GET_CODE (index) == LABEL_REF
9078 || GET_CODE (index) == CONST)
9079 fputc ('+', file), output_addr_const (file, index);
9080 else gcc_unreachable ();
9083 else if (GET_CODE (addr) == MINUS
9084 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9086 output_addr_const (file, XEXP (addr, 0));
9087 fputs ("-(", file);
9088 output_addr_const (file, XEXP (addr, 1));
9089 fputs ("-.)", file);
9091 else if (GET_CODE (addr) == LO_SUM)
9093 output_operand (XEXP (addr, 0), 0);
9094 if (TARGET_CM_MEDMID)
9095 fputs ("+%l44(", file);
9096 else
9097 fputs ("+%lo(", file);
9098 output_address (XEXP (addr, 1));
9099 fputc (')', file);
9101 else if (flag_pic
9102 && GET_CODE (addr) == CONST
9103 && GET_CODE (XEXP (addr, 0)) == MINUS
9104 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9105 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9106 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9108 addr = XEXP (addr, 0);
9109 output_addr_const (file, XEXP (addr, 0));
9110 /* Group the args of the second CONST in parenthesis. */
9111 fputs ("-(", file);
9112 /* Skip past the second CONST--it does nothing for us. */
9113 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9114 /* Close the parenthesis. */
9115 fputc (')', file);
9117 else
9119 output_addr_const (file, addr);
9123 /* Target hook for assembling integer objects. The sparc version has
9124 special handling for aligned DI-mode objects. */
9126 static bool
9127 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9129 /* ??? We only output .xword's for symbols and only then in environments
9130 where the assembler can handle them. */
9131 if (aligned_p && size == 8
9132 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
9134 if (TARGET_V9)
9136 assemble_integer_with_op ("\t.xword\t", x);
9137 return true;
9139 else
9141 assemble_aligned_integer (4, const0_rtx);
9142 assemble_aligned_integer (4, x);
9143 return true;
9146 return default_assemble_integer (x, size, aligned_p);
9149 /* Return the value of a code used in the .proc pseudo-op that says
9150 what kind of result this function returns. For non-C types, we pick
9151 the closest C type. */
9153 #ifndef SHORT_TYPE_SIZE
9154 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9155 #endif
9157 #ifndef INT_TYPE_SIZE
9158 #define INT_TYPE_SIZE BITS_PER_WORD
9159 #endif
9161 #ifndef LONG_TYPE_SIZE
9162 #define LONG_TYPE_SIZE BITS_PER_WORD
9163 #endif
9165 #ifndef LONG_LONG_TYPE_SIZE
9166 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9167 #endif
9169 #ifndef FLOAT_TYPE_SIZE
9170 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9171 #endif
9173 #ifndef DOUBLE_TYPE_SIZE
9174 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9175 #endif
9177 #ifndef LONG_DOUBLE_TYPE_SIZE
9178 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9179 #endif
9181 unsigned long
9182 sparc_type_code (register tree type)
9184 register unsigned long qualifiers = 0;
9185 register unsigned shift;
9187 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9188 setting more, since some assemblers will give an error for this. Also,
9189 we must be careful to avoid shifts of 32 bits or more to avoid getting
9190 unpredictable results. */
9192 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9194 switch (TREE_CODE (type))
9196 case ERROR_MARK:
9197 return qualifiers;
9199 case ARRAY_TYPE:
9200 qualifiers |= (3 << shift);
9201 break;
9203 case FUNCTION_TYPE:
9204 case METHOD_TYPE:
9205 qualifiers |= (2 << shift);
9206 break;
9208 case POINTER_TYPE:
9209 case REFERENCE_TYPE:
9210 case OFFSET_TYPE:
9211 qualifiers |= (1 << shift);
9212 break;
9214 case RECORD_TYPE:
9215 return (qualifiers | 8);
9217 case UNION_TYPE:
9218 case QUAL_UNION_TYPE:
9219 return (qualifiers | 9);
9221 case ENUMERAL_TYPE:
9222 return (qualifiers | 10);
9224 case VOID_TYPE:
9225 return (qualifiers | 16);
9227 case INTEGER_TYPE:
9228 /* If this is a range type, consider it to be the underlying
9229 type. */
9230 if (TREE_TYPE (type) != 0)
9231 break;
9233 /* Carefully distinguish all the standard types of C,
9234 without messing up if the language is not C. We do this by
9235 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9236 look at both the names and the above fields, but that's redundant.
9237 Any type whose size is between two C types will be considered
9238 to be the wider of the two types. Also, we do not have a
9239 special code to use for "long long", so anything wider than
9240 long is treated the same. Note that we can't distinguish
9241 between "int" and "long" in this code if they are the same
9242 size, but that's fine, since neither can the assembler. */
9244 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9245 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9247 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9248 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9250 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9251 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9253 else
9254 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9256 case REAL_TYPE:
9257 /* If this is a range type, consider it to be the underlying
9258 type. */
9259 if (TREE_TYPE (type) != 0)
9260 break;
9262 /* Carefully distinguish all the standard types of C,
9263 without messing up if the language is not C. */
9265 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9266 return (qualifiers | 6);
9268 else
9269 return (qualifiers | 7);
9271 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9272 /* ??? We need to distinguish between double and float complex types,
9273 but I don't know how yet because I can't reach this code from
9274 existing front-ends. */
9275 return (qualifiers | 7); /* Who knows? */
9277 case VECTOR_TYPE:
9278 case BOOLEAN_TYPE: /* Boolean truth value type. */
9279 case LANG_TYPE:
9280 case NULLPTR_TYPE:
9281 return qualifiers;
9283 default:
9284 gcc_unreachable (); /* Not a type! */
9288 return qualifiers;
9291 /* Nested function support. */
9293 /* Emit RTL insns to initialize the variable parts of a trampoline.
9294 FNADDR is an RTX for the address of the function's pure code.
9295 CXT is an RTX for the static chain value for the function.
9297 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9298 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9299 (to store insns). This is a bit excessive. Perhaps a different
9300 mechanism would be better here.
9302 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9304 static void
9305 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9307 /* SPARC 32-bit trampoline:
9309 sethi %hi(fn), %g1
9310 sethi %hi(static), %g2
9311 jmp %g1+%lo(fn)
9312 or %g2, %lo(static), %g2
9314 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9315 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9318 emit_move_insn
9319 (adjust_address (m_tramp, SImode, 0),
9320 expand_binop (SImode, ior_optab,
9321 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9322 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9323 NULL_RTX, 1, OPTAB_DIRECT));
9325 emit_move_insn
9326 (adjust_address (m_tramp, SImode, 4),
9327 expand_binop (SImode, ior_optab,
9328 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9329 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9330 NULL_RTX, 1, OPTAB_DIRECT));
9332 emit_move_insn
9333 (adjust_address (m_tramp, SImode, 8),
9334 expand_binop (SImode, ior_optab,
9335 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9336 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9337 NULL_RTX, 1, OPTAB_DIRECT));
9339 emit_move_insn
9340 (adjust_address (m_tramp, SImode, 12),
9341 expand_binop (SImode, ior_optab,
9342 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9343 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9344 NULL_RTX, 1, OPTAB_DIRECT));
9346 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9347 aligned on a 16 byte boundary so one flush clears it all. */
9348 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9349 if (sparc_cpu != PROCESSOR_ULTRASPARC
9350 && sparc_cpu != PROCESSOR_ULTRASPARC3
9351 && sparc_cpu != PROCESSOR_NIAGARA
9352 && sparc_cpu != PROCESSOR_NIAGARA2
9353 && sparc_cpu != PROCESSOR_NIAGARA3
9354 && sparc_cpu != PROCESSOR_NIAGARA4)
9355 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9357 /* Call __enable_execute_stack after writing onto the stack to make sure
9358 the stack address is accessible. */
9359 #ifdef HAVE_ENABLE_EXECUTE_STACK
9360 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9361 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9362 #endif
9366 /* The 64-bit version is simpler because it makes more sense to load the
9367 values as "immediate" data out of the trampoline. It's also easier since
9368 we can read the PC without clobbering a register. */
9370 static void
9371 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9373 /* SPARC 64-bit trampoline:
9375 rd %pc, %g1
9376 ldx [%g1+24], %g5
9377 jmp %g5
9378 ldx [%g1+16], %g5
9379 +16 bytes data
9382 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9383 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9384 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9385 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9386 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9387 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9388 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9389 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9390 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9391 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9392 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9394 if (sparc_cpu != PROCESSOR_ULTRASPARC
9395 && sparc_cpu != PROCESSOR_ULTRASPARC3
9396 && sparc_cpu != PROCESSOR_NIAGARA
9397 && sparc_cpu != PROCESSOR_NIAGARA2
9398 && sparc_cpu != PROCESSOR_NIAGARA3
9399 && sparc_cpu != PROCESSOR_NIAGARA4)
9400 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9402 /* Call __enable_execute_stack after writing onto the stack to make sure
9403 the stack address is accessible. */
9404 #ifdef HAVE_ENABLE_EXECUTE_STACK
9405 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9406 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9407 #endif
9410 /* Worker for TARGET_TRAMPOLINE_INIT. */
9412 static void
9413 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9415 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9416 cxt = force_reg (Pmode, cxt);
9417 if (TARGET_ARCH64)
9418 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9419 else
9420 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9423 /* Adjust the cost of a scheduling dependency. Return the new cost of
9424 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9426 static int
9427 supersparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
9429 enum attr_type insn_type;
9431 if (! recog_memoized (insn))
9432 return 0;
9434 insn_type = get_attr_type (insn);
9436 if (REG_NOTE_KIND (link) == 0)
9438 /* Data dependency; DEP_INSN writes a register that INSN reads some
9439 cycles later. */
9441 /* if a load, then the dependence must be on the memory address;
9442 add an extra "cycle". Note that the cost could be two cycles
9443 if the reg was written late in an instruction group; we ca not tell
9444 here. */
9445 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9446 return cost + 3;
9448 /* Get the delay only if the address of the store is the dependence. */
9449 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9451 rtx pat = PATTERN(insn);
9452 rtx dep_pat = PATTERN (dep_insn);
9454 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9455 return cost; /* This should not happen! */
9457 /* The dependency between the two instructions was on the data that
9458 is being stored. Assume that this implies that the address of the
9459 store is not dependent. */
9460 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9461 return cost;
9463 return cost + 3; /* An approximation. */
9466 /* A shift instruction cannot receive its data from an instruction
9467 in the same cycle; add a one cycle penalty. */
9468 if (insn_type == TYPE_SHIFT)
9469 return cost + 3; /* Split before cascade into shift. */
9471 else
9473 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9474 INSN writes some cycles later. */
9476 /* These are only significant for the fpu unit; writing a fp reg before
9477 the fpu has finished with it stalls the processor. */
9479 /* Reusing an integer register causes no problems. */
9480 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9481 return 0;
9484 return cost;
9487 static int
9488 hypersparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
9490 enum attr_type insn_type, dep_type;
9491 rtx pat = PATTERN(insn);
9492 rtx dep_pat = PATTERN (dep_insn);
9494 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9495 return cost;
9497 insn_type = get_attr_type (insn);
9498 dep_type = get_attr_type (dep_insn);
9500 switch (REG_NOTE_KIND (link))
9502 case 0:
9503 /* Data dependency; DEP_INSN writes a register that INSN reads some
9504 cycles later. */
9506 switch (insn_type)
9508 case TYPE_STORE:
9509 case TYPE_FPSTORE:
9510 /* Get the delay iff the address of the store is the dependence. */
9511 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9512 return cost;
9514 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9515 return cost;
9516 return cost + 3;
9518 case TYPE_LOAD:
9519 case TYPE_SLOAD:
9520 case TYPE_FPLOAD:
9521 /* If a load, then the dependence must be on the memory address. If
9522 the addresses aren't equal, then it might be a false dependency */
9523 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9525 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9526 || GET_CODE (SET_DEST (dep_pat)) != MEM
9527 || GET_CODE (SET_SRC (pat)) != MEM
9528 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9529 XEXP (SET_SRC (pat), 0)))
9530 return cost + 2;
9532 return cost + 8;
9534 break;
9536 case TYPE_BRANCH:
9537 /* Compare to branch latency is 0. There is no benefit from
9538 separating compare and branch. */
9539 if (dep_type == TYPE_COMPARE)
9540 return 0;
9541 /* Floating point compare to branch latency is less than
9542 compare to conditional move. */
9543 if (dep_type == TYPE_FPCMP)
9544 return cost - 1;
9545 break;
9546 default:
9547 break;
9549 break;
9551 case REG_DEP_ANTI:
9552 /* Anti-dependencies only penalize the fpu unit. */
9553 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9554 return 0;
9555 break;
9557 default:
9558 break;
9561 return cost;
9564 static int
9565 sparc_adjust_cost(rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
9567 switch (sparc_cpu)
9569 case PROCESSOR_SUPERSPARC:
9570 cost = supersparc_adjust_cost (insn, link, dep, cost);
9571 break;
9572 case PROCESSOR_HYPERSPARC:
9573 case PROCESSOR_SPARCLITE86X:
9574 cost = hypersparc_adjust_cost (insn, link, dep, cost);
9575 break;
9576 default:
9577 break;
9579 return cost;
9582 static void
9583 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9584 int sched_verbose ATTRIBUTE_UNUSED,
9585 int max_ready ATTRIBUTE_UNUSED)
9588 static int
9589 sparc_use_sched_lookahead (void)
9591 if (sparc_cpu == PROCESSOR_NIAGARA
9592 || sparc_cpu == PROCESSOR_NIAGARA2
9593 || sparc_cpu == PROCESSOR_NIAGARA3)
9594 return 0;
9595 if (sparc_cpu == PROCESSOR_NIAGARA4)
9596 return 2;
9597 if (sparc_cpu == PROCESSOR_ULTRASPARC
9598 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9599 return 4;
9600 if ((1 << sparc_cpu) &
9601 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9602 (1 << PROCESSOR_SPARCLITE86X)))
9603 return 3;
9604 return 0;
9607 static int
9608 sparc_issue_rate (void)
9610 switch (sparc_cpu)
9612 case PROCESSOR_NIAGARA:
9613 case PROCESSOR_NIAGARA2:
9614 case PROCESSOR_NIAGARA3:
9615 default:
9616 return 1;
9617 case PROCESSOR_NIAGARA4:
9618 case PROCESSOR_V9:
9619 /* Assume V9 processors are capable of at least dual-issue. */
9620 return 2;
9621 case PROCESSOR_SUPERSPARC:
9622 return 3;
9623 case PROCESSOR_HYPERSPARC:
9624 case PROCESSOR_SPARCLITE86X:
9625 return 2;
9626 case PROCESSOR_ULTRASPARC:
9627 case PROCESSOR_ULTRASPARC3:
9628 return 4;
9632 static int
9633 set_extends (rtx_insn *insn)
9635 register rtx pat = PATTERN (insn);
9637 switch (GET_CODE (SET_SRC (pat)))
9639 /* Load and some shift instructions zero extend. */
9640 case MEM:
9641 case ZERO_EXTEND:
9642 /* sethi clears the high bits */
9643 case HIGH:
9644 /* LO_SUM is used with sethi. sethi cleared the high
9645 bits and the values used with lo_sum are positive */
9646 case LO_SUM:
9647 /* Store flag stores 0 or 1 */
9648 case LT: case LTU:
9649 case GT: case GTU:
9650 case LE: case LEU:
9651 case GE: case GEU:
9652 case EQ:
9653 case NE:
9654 return 1;
9655 case AND:
9657 rtx op0 = XEXP (SET_SRC (pat), 0);
9658 rtx op1 = XEXP (SET_SRC (pat), 1);
9659 if (GET_CODE (op1) == CONST_INT)
9660 return INTVAL (op1) >= 0;
9661 if (GET_CODE (op0) != REG)
9662 return 0;
9663 if (sparc_check_64 (op0, insn) == 1)
9664 return 1;
9665 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9667 case IOR:
9668 case XOR:
9670 rtx op0 = XEXP (SET_SRC (pat), 0);
9671 rtx op1 = XEXP (SET_SRC (pat), 1);
9672 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9673 return 0;
9674 if (GET_CODE (op1) == CONST_INT)
9675 return INTVAL (op1) >= 0;
9676 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9678 case LSHIFTRT:
9679 return GET_MODE (SET_SRC (pat)) == SImode;
9680 /* Positive integers leave the high bits zero. */
9681 case CONST_DOUBLE:
9682 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
9683 case CONST_INT:
9684 return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
9685 case ASHIFTRT:
9686 case SIGN_EXTEND:
9687 return - (GET_MODE (SET_SRC (pat)) == SImode);
9688 case REG:
9689 return sparc_check_64 (SET_SRC (pat), insn);
9690 default:
9691 return 0;
9695 /* We _ought_ to have only one kind per function, but... */
9696 static GTY(()) rtx sparc_addr_diff_list;
9697 static GTY(()) rtx sparc_addr_list;
9699 void
9700 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9702 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9703 if (diff)
9704 sparc_addr_diff_list
9705 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9706 else
9707 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9710 static void
9711 sparc_output_addr_vec (rtx vec)
9713 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9714 int idx, vlen = XVECLEN (body, 0);
9716 #ifdef ASM_OUTPUT_ADDR_VEC_START
9717 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9718 #endif
9720 #ifdef ASM_OUTPUT_CASE_LABEL
9721 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9722 NEXT_INSN (lab));
9723 #else
9724 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9725 #endif
9727 for (idx = 0; idx < vlen; idx++)
9729 ASM_OUTPUT_ADDR_VEC_ELT
9730 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9733 #ifdef ASM_OUTPUT_ADDR_VEC_END
9734 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9735 #endif
9738 static void
9739 sparc_output_addr_diff_vec (rtx vec)
9741 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9742 rtx base = XEXP (XEXP (body, 0), 0);
9743 int idx, vlen = XVECLEN (body, 1);
9745 #ifdef ASM_OUTPUT_ADDR_VEC_START
9746 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9747 #endif
9749 #ifdef ASM_OUTPUT_CASE_LABEL
9750 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9751 NEXT_INSN (lab));
9752 #else
9753 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9754 #endif
9756 for (idx = 0; idx < vlen; idx++)
9758 ASM_OUTPUT_ADDR_DIFF_ELT
9759 (asm_out_file,
9760 body,
9761 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9762 CODE_LABEL_NUMBER (base));
9765 #ifdef ASM_OUTPUT_ADDR_VEC_END
9766 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9767 #endif
9770 static void
9771 sparc_output_deferred_case_vectors (void)
9773 rtx t;
9774 int align;
9776 if (sparc_addr_list == NULL_RTX
9777 && sparc_addr_diff_list == NULL_RTX)
9778 return;
9780 /* Align to cache line in the function's code section. */
9781 switch_to_section (current_function_section ());
9783 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9784 if (align > 0)
9785 ASM_OUTPUT_ALIGN (asm_out_file, align);
9787 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9788 sparc_output_addr_vec (XEXP (t, 0));
9789 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9790 sparc_output_addr_diff_vec (XEXP (t, 0));
9792 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9795 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9796 unknown. Return 1 if the high bits are zero, -1 if the register is
9797 sign extended. */
9799 sparc_check_64 (rtx x, rtx_insn *insn)
9801 /* If a register is set only once it is safe to ignore insns this
9802 code does not know how to handle. The loop will either recognize
9803 the single set and return the correct value or fail to recognize
9804 it and return 0. */
9805 int set_once = 0;
9806 rtx y = x;
9808 gcc_assert (GET_CODE (x) == REG);
9810 if (GET_MODE (x) == DImode)
9811 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9813 if (flag_expensive_optimizations
9814 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9815 set_once = 1;
9817 if (insn == 0)
9819 if (set_once)
9820 insn = get_last_insn_anywhere ();
9821 else
9822 return 0;
9825 while ((insn = PREV_INSN (insn)))
9827 switch (GET_CODE (insn))
9829 case JUMP_INSN:
9830 case NOTE:
9831 break;
9832 case CODE_LABEL:
9833 case CALL_INSN:
9834 default:
9835 if (! set_once)
9836 return 0;
9837 break;
9838 case INSN:
9840 rtx pat = PATTERN (insn);
9841 if (GET_CODE (pat) != SET)
9842 return 0;
9843 if (rtx_equal_p (x, SET_DEST (pat)))
9844 return set_extends (insn);
9845 if (y && rtx_equal_p (y, SET_DEST (pat)))
9846 return set_extends (insn);
9847 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9848 return 0;
9852 return 0;
9855 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9856 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9858 const char *
9859 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
9861 static char asm_code[60];
9863 /* The scratch register is only required when the destination
9864 register is not a 64-bit global or out register. */
9865 if (which_alternative != 2)
9866 operands[3] = operands[0];
9868 /* We can only shift by constants <= 63. */
9869 if (GET_CODE (operands[2]) == CONST_INT)
9870 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9872 if (GET_CODE (operands[1]) == CONST_INT)
9874 output_asm_insn ("mov\t%1, %3", operands);
9876 else
9878 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9879 if (sparc_check_64 (operands[1], insn) <= 0)
9880 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9881 output_asm_insn ("or\t%L1, %3, %3", operands);
9884 strcpy (asm_code, opcode);
9886 if (which_alternative != 2)
9887 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9888 else
9889 return
9890 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9893 /* Output rtl to increment the profiler label LABELNO
9894 for profiling a function entry. */
9896 void
9897 sparc_profile_hook (int labelno)
9899 char buf[32];
9900 rtx lab, fun;
9902 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9903 if (NO_PROFILE_COUNTERS)
9905 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9907 else
9909 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9910 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9911 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9915 #ifdef TARGET_SOLARIS
9916 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9918 static void
9919 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9920 tree decl ATTRIBUTE_UNUSED)
9922 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9924 solaris_elf_asm_comdat_section (name, flags, decl);
9925 return;
9928 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9930 if (!(flags & SECTION_DEBUG))
9931 fputs (",#alloc", asm_out_file);
9932 if (flags & SECTION_WRITE)
9933 fputs (",#write", asm_out_file);
9934 if (flags & SECTION_TLS)
9935 fputs (",#tls", asm_out_file);
9936 if (flags & SECTION_CODE)
9937 fputs (",#execinstr", asm_out_file);
9939 /* Sun as only supports #nobits/#progbits since Solaris 10. */
9940 if (HAVE_AS_SPARC_NOBITS)
9942 if (flags & SECTION_BSS)
9943 fputs (",#nobits", asm_out_file);
9944 else
9945 fputs (",#progbits", asm_out_file);
9948 fputc ('\n', asm_out_file);
9950 #endif /* TARGET_SOLARIS */
9952 /* We do not allow indirect calls to be optimized into sibling calls.
9954 We cannot use sibling calls when delayed branches are disabled
9955 because they will likely require the call delay slot to be filled.
9957 Also, on SPARC 32-bit we cannot emit a sibling call when the
9958 current function returns a structure. This is because the "unimp
9959 after call" convention would cause the callee to return to the
9960 wrong place. The generic code already disallows cases where the
9961 function being called returns a structure.
9963 It may seem strange how this last case could occur. Usually there
9964 is code after the call which jumps to epilogue code which dumps the
9965 return value into the struct return area. That ought to invalidate
9966 the sibling call right? Well, in the C++ case we can end up passing
9967 the pointer to the struct return area to a constructor (which returns
9968 void) and then nothing else happens. Such a sibling call would look
9969 valid without the added check here.
9971 VxWorks PIC PLT entries require the global pointer to be initialized
9972 on entry. We therefore can't emit sibling calls to them. */
9973 static bool
9974 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9976 return (decl
9977 && flag_delayed_branch
9978 && (TARGET_ARCH64 || ! cfun->returns_struct)
9979 && !(TARGET_VXWORKS_RTP
9980 && flag_pic
9981 && !targetm.binds_local_p (decl)));
9984 /* libfunc renaming. */
9986 static void
9987 sparc_init_libfuncs (void)
9989 if (TARGET_ARCH32)
9991 /* Use the subroutines that Sun's library provides for integer
9992 multiply and divide. The `*' prevents an underscore from
9993 being prepended by the compiler. .umul is a little faster
9994 than .mul. */
9995 set_optab_libfunc (smul_optab, SImode, "*.umul");
9996 set_optab_libfunc (sdiv_optab, SImode, "*.div");
9997 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
9998 set_optab_libfunc (smod_optab, SImode, "*.rem");
9999 set_optab_libfunc (umod_optab, SImode, "*.urem");
10001 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10002 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10003 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10004 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10005 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10006 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10008 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10009 is because with soft-float, the SFmode and DFmode sqrt
10010 instructions will be absent, and the compiler will notice and
10011 try to use the TFmode sqrt instruction for calls to the
10012 builtin function sqrt, but this fails. */
10013 if (TARGET_FPU)
10014 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10016 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10017 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10018 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10019 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10020 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10021 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10023 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10024 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10025 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10026 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10028 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10029 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10030 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10031 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10033 if (DITF_CONVERSION_LIBFUNCS)
10035 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10036 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10037 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10038 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10041 if (SUN_CONVERSION_LIBFUNCS)
10043 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10044 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10045 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10046 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10049 if (TARGET_ARCH64)
10051 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10052 do not exist in the library. Make sure the compiler does not
10053 emit calls to them by accident. (It should always use the
10054 hardware instructions.) */
10055 set_optab_libfunc (smul_optab, SImode, 0);
10056 set_optab_libfunc (sdiv_optab, SImode, 0);
10057 set_optab_libfunc (udiv_optab, SImode, 0);
10058 set_optab_libfunc (smod_optab, SImode, 0);
10059 set_optab_libfunc (umod_optab, SImode, 0);
10061 if (SUN_INTEGER_MULTIPLY_64)
10063 set_optab_libfunc (smul_optab, DImode, "__mul64");
10064 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10065 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10066 set_optab_libfunc (smod_optab, DImode, "__rem64");
10067 set_optab_libfunc (umod_optab, DImode, "__urem64");
10070 if (SUN_CONVERSION_LIBFUNCS)
10072 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10073 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10074 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10075 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10080 /* SPARC builtins. */
10081 enum sparc_builtins
10083 /* FPU builtins. */
10084 SPARC_BUILTIN_LDFSR,
10085 SPARC_BUILTIN_STFSR,
10087 /* VIS 1.0 builtins. */
10088 SPARC_BUILTIN_FPACK16,
10089 SPARC_BUILTIN_FPACK32,
10090 SPARC_BUILTIN_FPACKFIX,
10091 SPARC_BUILTIN_FEXPAND,
10092 SPARC_BUILTIN_FPMERGE,
10093 SPARC_BUILTIN_FMUL8X16,
10094 SPARC_BUILTIN_FMUL8X16AU,
10095 SPARC_BUILTIN_FMUL8X16AL,
10096 SPARC_BUILTIN_FMUL8SUX16,
10097 SPARC_BUILTIN_FMUL8ULX16,
10098 SPARC_BUILTIN_FMULD8SUX16,
10099 SPARC_BUILTIN_FMULD8ULX16,
10100 SPARC_BUILTIN_FALIGNDATAV4HI,
10101 SPARC_BUILTIN_FALIGNDATAV8QI,
10102 SPARC_BUILTIN_FALIGNDATAV2SI,
10103 SPARC_BUILTIN_FALIGNDATADI,
10104 SPARC_BUILTIN_WRGSR,
10105 SPARC_BUILTIN_RDGSR,
10106 SPARC_BUILTIN_ALIGNADDR,
10107 SPARC_BUILTIN_ALIGNADDRL,
10108 SPARC_BUILTIN_PDIST,
10109 SPARC_BUILTIN_EDGE8,
10110 SPARC_BUILTIN_EDGE8L,
10111 SPARC_BUILTIN_EDGE16,
10112 SPARC_BUILTIN_EDGE16L,
10113 SPARC_BUILTIN_EDGE32,
10114 SPARC_BUILTIN_EDGE32L,
10115 SPARC_BUILTIN_FCMPLE16,
10116 SPARC_BUILTIN_FCMPLE32,
10117 SPARC_BUILTIN_FCMPNE16,
10118 SPARC_BUILTIN_FCMPNE32,
10119 SPARC_BUILTIN_FCMPGT16,
10120 SPARC_BUILTIN_FCMPGT32,
10121 SPARC_BUILTIN_FCMPEQ16,
10122 SPARC_BUILTIN_FCMPEQ32,
10123 SPARC_BUILTIN_FPADD16,
10124 SPARC_BUILTIN_FPADD16S,
10125 SPARC_BUILTIN_FPADD32,
10126 SPARC_BUILTIN_FPADD32S,
10127 SPARC_BUILTIN_FPSUB16,
10128 SPARC_BUILTIN_FPSUB16S,
10129 SPARC_BUILTIN_FPSUB32,
10130 SPARC_BUILTIN_FPSUB32S,
10131 SPARC_BUILTIN_ARRAY8,
10132 SPARC_BUILTIN_ARRAY16,
10133 SPARC_BUILTIN_ARRAY32,
10135 /* VIS 2.0 builtins. */
10136 SPARC_BUILTIN_EDGE8N,
10137 SPARC_BUILTIN_EDGE8LN,
10138 SPARC_BUILTIN_EDGE16N,
10139 SPARC_BUILTIN_EDGE16LN,
10140 SPARC_BUILTIN_EDGE32N,
10141 SPARC_BUILTIN_EDGE32LN,
10142 SPARC_BUILTIN_BMASK,
10143 SPARC_BUILTIN_BSHUFFLEV4HI,
10144 SPARC_BUILTIN_BSHUFFLEV8QI,
10145 SPARC_BUILTIN_BSHUFFLEV2SI,
10146 SPARC_BUILTIN_BSHUFFLEDI,
10148 /* VIS 3.0 builtins. */
10149 SPARC_BUILTIN_CMASK8,
10150 SPARC_BUILTIN_CMASK16,
10151 SPARC_BUILTIN_CMASK32,
10152 SPARC_BUILTIN_FCHKSM16,
10153 SPARC_BUILTIN_FSLL16,
10154 SPARC_BUILTIN_FSLAS16,
10155 SPARC_BUILTIN_FSRL16,
10156 SPARC_BUILTIN_FSRA16,
10157 SPARC_BUILTIN_FSLL32,
10158 SPARC_BUILTIN_FSLAS32,
10159 SPARC_BUILTIN_FSRL32,
10160 SPARC_BUILTIN_FSRA32,
10161 SPARC_BUILTIN_PDISTN,
10162 SPARC_BUILTIN_FMEAN16,
10163 SPARC_BUILTIN_FPADD64,
10164 SPARC_BUILTIN_FPSUB64,
10165 SPARC_BUILTIN_FPADDS16,
10166 SPARC_BUILTIN_FPADDS16S,
10167 SPARC_BUILTIN_FPSUBS16,
10168 SPARC_BUILTIN_FPSUBS16S,
10169 SPARC_BUILTIN_FPADDS32,
10170 SPARC_BUILTIN_FPADDS32S,
10171 SPARC_BUILTIN_FPSUBS32,
10172 SPARC_BUILTIN_FPSUBS32S,
10173 SPARC_BUILTIN_FUCMPLE8,
10174 SPARC_BUILTIN_FUCMPNE8,
10175 SPARC_BUILTIN_FUCMPGT8,
10176 SPARC_BUILTIN_FUCMPEQ8,
10177 SPARC_BUILTIN_FHADDS,
10178 SPARC_BUILTIN_FHADDD,
10179 SPARC_BUILTIN_FHSUBS,
10180 SPARC_BUILTIN_FHSUBD,
10181 SPARC_BUILTIN_FNHADDS,
10182 SPARC_BUILTIN_FNHADDD,
10183 SPARC_BUILTIN_UMULXHI,
10184 SPARC_BUILTIN_XMULX,
10185 SPARC_BUILTIN_XMULXHI,
10187 SPARC_BUILTIN_MAX
10190 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10191 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10193 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10194 function decl or NULL_TREE if the builtin was not added. */
10196 static tree
10197 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10198 tree type)
10200 tree t
10201 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10203 if (t)
10205 sparc_builtins[code] = t;
10206 sparc_builtins_icode[code] = icode;
10209 return t;
10212 /* Likewise, but also marks the function as "const". */
10214 static tree
10215 def_builtin_const (const char *name, enum insn_code icode,
10216 enum sparc_builtins code, tree type)
10218 tree t = def_builtin (name, icode, code, type);
10220 if (t)
10221 TREE_READONLY (t) = 1;
10223 return t;
10226 /* Implement the TARGET_INIT_BUILTINS target hook.
10227 Create builtin functions for special SPARC instructions. */
10229 static void
10230 sparc_init_builtins (void)
10232 if (TARGET_FPU)
10233 sparc_fpu_init_builtins ();
10235 if (TARGET_VIS)
10236 sparc_vis_init_builtins ();
10239 /* Create builtin functions for FPU instructions. */
10241 static void
10242 sparc_fpu_init_builtins (void)
10244 tree ftype
10245 = build_function_type_list (void_type_node,
10246 build_pointer_type (unsigned_type_node), 0);
10247 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10248 SPARC_BUILTIN_LDFSR, ftype);
10249 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10250 SPARC_BUILTIN_STFSR, ftype);
10253 /* Create builtin functions for VIS instructions. */
10255 static void
10256 sparc_vis_init_builtins (void)
10258 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10259 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10260 tree v4hi = build_vector_type (intHI_type_node, 4);
10261 tree v2hi = build_vector_type (intHI_type_node, 2);
10262 tree v2si = build_vector_type (intSI_type_node, 2);
10263 tree v1si = build_vector_type (intSI_type_node, 1);
10265 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10266 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10267 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10268 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10269 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10270 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10271 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10272 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10273 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10274 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10275 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10276 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10277 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10278 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10279 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10280 v8qi, v8qi,
10281 intDI_type_node, 0);
10282 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10283 v8qi, v8qi, 0);
10284 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10285 v8qi, v8qi, 0);
10286 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10287 intDI_type_node,
10288 intDI_type_node, 0);
10289 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10290 intSI_type_node,
10291 intSI_type_node, 0);
10292 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10293 ptr_type_node,
10294 intSI_type_node, 0);
10295 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10296 ptr_type_node,
10297 intDI_type_node, 0);
10298 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10299 ptr_type_node,
10300 ptr_type_node, 0);
10301 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10302 ptr_type_node,
10303 ptr_type_node, 0);
10304 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10305 v4hi, v4hi, 0);
10306 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10307 v2si, v2si, 0);
10308 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10309 v4hi, v4hi, 0);
10310 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10311 v2si, v2si, 0);
10312 tree void_ftype_di = build_function_type_list (void_type_node,
10313 intDI_type_node, 0);
10314 tree di_ftype_void = build_function_type_list (intDI_type_node,
10315 void_type_node, 0);
10316 tree void_ftype_si = build_function_type_list (void_type_node,
10317 intSI_type_node, 0);
10318 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10319 float_type_node,
10320 float_type_node, 0);
10321 tree df_ftype_df_df = build_function_type_list (double_type_node,
10322 double_type_node,
10323 double_type_node, 0);
10325 /* Packing and expanding vectors. */
10326 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10327 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10328 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10329 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10330 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10331 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10332 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10333 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10334 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10335 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10337 /* Multiplications. */
10338 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10339 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10340 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10341 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10342 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10343 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10344 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10345 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10346 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10347 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10348 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10349 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10350 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10351 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10353 /* Data aligning. */
10354 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10355 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10356 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10357 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10358 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10359 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10360 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10361 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10363 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10364 SPARC_BUILTIN_WRGSR, void_ftype_di);
10365 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10366 SPARC_BUILTIN_RDGSR, di_ftype_void);
10368 if (TARGET_ARCH64)
10370 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10371 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10372 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10373 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10375 else
10377 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10378 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10379 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10380 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10383 /* Pixel distance. */
10384 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10385 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10387 /* Edge handling. */
10388 if (TARGET_ARCH64)
10390 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10391 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10392 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10393 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10394 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10395 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10396 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10397 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10398 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10399 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10400 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10401 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10403 else
10405 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10406 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10407 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10408 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10409 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10410 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10411 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10412 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10413 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10414 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10415 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10416 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10419 /* Pixel compare. */
10420 if (TARGET_ARCH64)
10422 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10423 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10424 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10425 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10426 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10427 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10428 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10429 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10430 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10431 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10432 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10433 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10434 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10435 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10436 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10437 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10439 else
10441 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10442 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10443 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10444 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10445 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10446 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10447 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10448 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10449 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10450 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10451 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10452 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10453 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10454 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10455 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10456 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10459 /* Addition and subtraction. */
10460 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10461 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10462 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10463 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10464 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10465 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10466 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10467 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10468 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10469 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10470 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10471 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10472 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10473 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10474 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10475 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10477 /* Three-dimensional array addressing. */
10478 if (TARGET_ARCH64)
10480 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10481 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10482 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10483 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10484 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10485 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10487 else
10489 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10490 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10491 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10492 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10493 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10494 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10497 if (TARGET_VIS2)
10499 /* Edge handling. */
10500 if (TARGET_ARCH64)
10502 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10503 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10504 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10505 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10506 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10507 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10508 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10509 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10510 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10511 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10512 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10513 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10515 else
10517 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10518 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10519 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10520 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10521 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10522 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10523 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10524 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10525 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10526 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10527 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10528 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10531 /* Byte mask and shuffle. */
10532 if (TARGET_ARCH64)
10533 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10534 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10535 else
10536 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10537 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10538 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10539 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10540 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10541 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10542 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10543 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10544 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10545 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10548 if (TARGET_VIS3)
10550 if (TARGET_ARCH64)
10552 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10553 SPARC_BUILTIN_CMASK8, void_ftype_di);
10554 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10555 SPARC_BUILTIN_CMASK16, void_ftype_di);
10556 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10557 SPARC_BUILTIN_CMASK32, void_ftype_di);
10559 else
10561 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10562 SPARC_BUILTIN_CMASK8, void_ftype_si);
10563 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10564 SPARC_BUILTIN_CMASK16, void_ftype_si);
10565 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10566 SPARC_BUILTIN_CMASK32, void_ftype_si);
10569 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10570 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
10572 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10573 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
10574 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10575 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
10576 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10577 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
10578 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10579 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
10580 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10581 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
10582 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10583 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
10584 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10585 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
10586 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10587 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
10589 if (TARGET_ARCH64)
10590 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10591 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
10592 else
10593 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10594 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
10596 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10597 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
10598 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10599 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
10600 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10601 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
10603 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10604 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
10605 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10606 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
10607 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10608 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
10609 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10610 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
10611 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10612 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
10613 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10614 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
10615 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10616 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
10617 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10618 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
10620 if (TARGET_ARCH64)
10622 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10623 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
10624 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10625 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
10626 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10627 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
10628 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10629 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
10631 else
10633 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10634 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
10635 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10636 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
10637 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10638 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
10639 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10640 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
10643 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10644 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
10645 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10646 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
10647 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10648 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
10649 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10650 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
10651 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10652 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
10653 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10654 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
10656 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10657 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
10658 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10659 SPARC_BUILTIN_XMULX, di_ftype_di_di);
10660 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10661 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
10665 /* Implement TARGET_BUILTIN_DECL hook. */
10667 static tree
10668 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10670 if (code >= SPARC_BUILTIN_MAX)
10671 return error_mark_node;
10673 return sparc_builtins[code];
10676 /* Implemented TARGET_EXPAND_BUILTIN hook. */
10678 static rtx
10679 sparc_expand_builtin (tree exp, rtx target,
10680 rtx subtarget ATTRIBUTE_UNUSED,
10681 machine_mode tmode ATTRIBUTE_UNUSED,
10682 int ignore ATTRIBUTE_UNUSED)
10684 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10685 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10686 enum insn_code icode = sparc_builtins_icode[code];
10687 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10688 call_expr_arg_iterator iter;
10689 int arg_count = 0;
10690 rtx pat, op[4];
10691 tree arg;
10693 if (nonvoid)
10695 machine_mode tmode = insn_data[icode].operand[0].mode;
10696 if (!target
10697 || GET_MODE (target) != tmode
10698 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10699 op[0] = gen_reg_rtx (tmode);
10700 else
10701 op[0] = target;
10704 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10706 const struct insn_operand_data *insn_op;
10707 int idx;
10709 if (arg == error_mark_node)
10710 return NULL_RTX;
10712 arg_count++;
10713 idx = arg_count - !nonvoid;
10714 insn_op = &insn_data[icode].operand[idx];
10715 op[arg_count] = expand_normal (arg);
10717 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
10719 if (!address_operand (op[arg_count], SImode))
10721 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
10722 op[arg_count] = copy_addr_to_reg (op[arg_count]);
10724 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
10727 else if (insn_op->mode == V1DImode
10728 && GET_MODE (op[arg_count]) == DImode)
10729 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10731 else if (insn_op->mode == V1SImode
10732 && GET_MODE (op[arg_count]) == SImode)
10733 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10735 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10736 insn_op->mode))
10737 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10740 switch (arg_count)
10742 case 0:
10743 pat = GEN_FCN (icode) (op[0]);
10744 break;
10745 case 1:
10746 if (nonvoid)
10747 pat = GEN_FCN (icode) (op[0], op[1]);
10748 else
10749 pat = GEN_FCN (icode) (op[1]);
10750 break;
10751 case 2:
10752 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10753 break;
10754 case 3:
10755 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10756 break;
10757 default:
10758 gcc_unreachable ();
10761 if (!pat)
10762 return NULL_RTX;
10764 emit_insn (pat);
10766 return (nonvoid ? op[0] : const0_rtx);
10769 /* Return the upper 16 bits of the 8x16 multiplication. */
10771 static int
10772 sparc_vis_mul8x16 (int e8, int e16)
10774 return (e8 * e16 + 128) / 256;
10777 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10778 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10780 static void
10781 sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode,
10782 tree inner_type, tree cst0, tree cst1)
10784 unsigned i, num = VECTOR_CST_NELTS (cst0);
10785 int scale;
10787 switch (fncode)
10789 case SPARC_BUILTIN_FMUL8X16:
10790 for (i = 0; i < num; ++i)
10792 int val
10793 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10794 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10795 n_elts[i] = build_int_cst (inner_type, val);
10797 break;
10799 case SPARC_BUILTIN_FMUL8X16AU:
10800 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10802 for (i = 0; i < num; ++i)
10804 int val
10805 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10806 scale);
10807 n_elts[i] = build_int_cst (inner_type, val);
10809 break;
10811 case SPARC_BUILTIN_FMUL8X16AL:
10812 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10814 for (i = 0; i < num; ++i)
10816 int val
10817 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10818 scale);
10819 n_elts[i] = build_int_cst (inner_type, val);
10821 break;
10823 default:
10824 gcc_unreachable ();
10828 /* Implement TARGET_FOLD_BUILTIN hook.
10830 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
10831 result of the function call is ignored. NULL_TREE is returned if the
10832 function could not be folded. */
10834 static tree
10835 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10836 tree *args, bool ignore)
10838 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10839 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10840 tree arg0, arg1, arg2;
10842 if (ignore)
10843 switch (code)
10845 case SPARC_BUILTIN_LDFSR:
10846 case SPARC_BUILTIN_STFSR:
10847 case SPARC_BUILTIN_ALIGNADDR:
10848 case SPARC_BUILTIN_WRGSR:
10849 case SPARC_BUILTIN_BMASK:
10850 case SPARC_BUILTIN_CMASK8:
10851 case SPARC_BUILTIN_CMASK16:
10852 case SPARC_BUILTIN_CMASK32:
10853 break;
10855 default:
10856 return build_zero_cst (rtype);
10859 switch (code)
10861 case SPARC_BUILTIN_FEXPAND:
10862 arg0 = args[0];
10863 STRIP_NOPS (arg0);
10865 if (TREE_CODE (arg0) == VECTOR_CST)
10867 tree inner_type = TREE_TYPE (rtype);
10868 tree *n_elts;
10869 unsigned i;
10871 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10872 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10873 n_elts[i] = build_int_cst (inner_type,
10874 TREE_INT_CST_LOW
10875 (VECTOR_CST_ELT (arg0, i)) << 4);
10876 return build_vector (rtype, n_elts);
10878 break;
10880 case SPARC_BUILTIN_FMUL8X16:
10881 case SPARC_BUILTIN_FMUL8X16AU:
10882 case SPARC_BUILTIN_FMUL8X16AL:
10883 arg0 = args[0];
10884 arg1 = args[1];
10885 STRIP_NOPS (arg0);
10886 STRIP_NOPS (arg1);
10888 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10890 tree inner_type = TREE_TYPE (rtype);
10891 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10892 sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1);
10893 return build_vector (rtype, n_elts);
10895 break;
10897 case SPARC_BUILTIN_FPMERGE:
10898 arg0 = args[0];
10899 arg1 = args[1];
10900 STRIP_NOPS (arg0);
10901 STRIP_NOPS (arg1);
10903 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10905 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
10906 unsigned i;
10907 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10909 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
10910 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
10913 return build_vector (rtype, n_elts);
10915 break;
10917 case SPARC_BUILTIN_PDIST:
10918 case SPARC_BUILTIN_PDISTN:
10919 arg0 = args[0];
10920 arg1 = args[1];
10921 STRIP_NOPS (arg0);
10922 STRIP_NOPS (arg1);
10923 if (code == SPARC_BUILTIN_PDIST)
10925 arg2 = args[2];
10926 STRIP_NOPS (arg2);
10928 else
10929 arg2 = integer_zero_node;
10931 if (TREE_CODE (arg0) == VECTOR_CST
10932 && TREE_CODE (arg1) == VECTOR_CST
10933 && TREE_CODE (arg2) == INTEGER_CST)
10935 bool overflow = false;
10936 widest_int result = wi::to_widest (arg2);
10937 widest_int tmp;
10938 unsigned i;
10940 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10942 tree e0 = VECTOR_CST_ELT (arg0, i);
10943 tree e1 = VECTOR_CST_ELT (arg1, i);
10945 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
10947 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
10948 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
10949 if (wi::neg_p (tmp))
10950 tmp = wi::neg (tmp, &neg2_ovf);
10951 else
10952 neg2_ovf = false;
10953 result = wi::add (result, tmp, SIGNED, &add2_ovf);
10954 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
10957 gcc_assert (!overflow);
10959 return wide_int_to_tree (rtype, result);
10962 default:
10963 break;
10966 return NULL_TREE;
10969 /* ??? This duplicates information provided to the compiler by the
10970 ??? scheduler description. Some day, teach genautomata to output
10971 ??? the latencies and then CSE will just use that. */
10973 static bool
10974 sparc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10975 int *total, bool speed ATTRIBUTE_UNUSED)
10977 machine_mode mode = GET_MODE (x);
10978 bool float_mode_p = FLOAT_MODE_P (mode);
10980 switch (code)
10982 case CONST_INT:
10983 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
10985 *total = 0;
10986 return true;
10988 /* FALLTHRU */
10990 case HIGH:
10991 *total = 2;
10992 return true;
10994 case CONST:
10995 case LABEL_REF:
10996 case SYMBOL_REF:
10997 *total = 4;
10998 return true;
11000 case CONST_DOUBLE:
11001 if (GET_MODE (x) == VOIDmode
11002 && ((CONST_DOUBLE_HIGH (x) == 0
11003 && CONST_DOUBLE_LOW (x) < 0x1000)
11004 || (CONST_DOUBLE_HIGH (x) == -1
11005 && CONST_DOUBLE_LOW (x) < 0
11006 && CONST_DOUBLE_LOW (x) >= -0x1000)))
11007 *total = 0;
11008 else
11009 *total = 8;
11010 return true;
11012 case MEM:
11013 /* If outer-code was a sign or zero extension, a cost
11014 of COSTS_N_INSNS (1) was already added in. This is
11015 why we are subtracting it back out. */
11016 if (outer_code == ZERO_EXTEND)
11018 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11020 else if (outer_code == SIGN_EXTEND)
11022 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11024 else if (float_mode_p)
11026 *total = sparc_costs->float_load;
11028 else
11030 *total = sparc_costs->int_load;
11033 return true;
11035 case PLUS:
11036 case MINUS:
11037 if (float_mode_p)
11038 *total = sparc_costs->float_plusminus;
11039 else
11040 *total = COSTS_N_INSNS (1);
11041 return false;
11043 case FMA:
11045 rtx sub;
11047 gcc_assert (float_mode_p);
11048 *total = sparc_costs->float_mul;
11050 sub = XEXP (x, 0);
11051 if (GET_CODE (sub) == NEG)
11052 sub = XEXP (sub, 0);
11053 *total += rtx_cost (sub, FMA, 0, speed);
11055 sub = XEXP (x, 2);
11056 if (GET_CODE (sub) == NEG)
11057 sub = XEXP (sub, 0);
11058 *total += rtx_cost (sub, FMA, 2, speed);
11059 return true;
11062 case MULT:
11063 if (float_mode_p)
11064 *total = sparc_costs->float_mul;
11065 else if (! TARGET_HARD_MUL)
11066 *total = COSTS_N_INSNS (25);
11067 else
11069 int bit_cost;
11071 bit_cost = 0;
11072 if (sparc_costs->int_mul_bit_factor)
11074 int nbits;
11076 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11078 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11079 for (nbits = 0; value != 0; value &= value - 1)
11080 nbits++;
11082 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
11083 && GET_MODE (XEXP (x, 1)) == VOIDmode)
11085 rtx x1 = XEXP (x, 1);
11086 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
11087 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
11089 for (nbits = 0; value1 != 0; value1 &= value1 - 1)
11090 nbits++;
11091 for (; value2 != 0; value2 &= value2 - 1)
11092 nbits++;
11094 else
11095 nbits = 7;
11097 if (nbits < 3)
11098 nbits = 3;
11099 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11100 bit_cost = COSTS_N_INSNS (bit_cost);
11103 if (mode == DImode)
11104 *total = sparc_costs->int_mulX + bit_cost;
11105 else
11106 *total = sparc_costs->int_mul + bit_cost;
11108 return false;
11110 case ASHIFT:
11111 case ASHIFTRT:
11112 case LSHIFTRT:
11113 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11114 return false;
11116 case DIV:
11117 case UDIV:
11118 case MOD:
11119 case UMOD:
11120 if (float_mode_p)
11122 if (mode == DFmode)
11123 *total = sparc_costs->float_div_df;
11124 else
11125 *total = sparc_costs->float_div_sf;
11127 else
11129 if (mode == DImode)
11130 *total = sparc_costs->int_divX;
11131 else
11132 *total = sparc_costs->int_div;
11134 return false;
11136 case NEG:
11137 if (! float_mode_p)
11139 *total = COSTS_N_INSNS (1);
11140 return false;
11142 /* FALLTHRU */
11144 case ABS:
11145 case FLOAT:
11146 case UNSIGNED_FLOAT:
11147 case FIX:
11148 case UNSIGNED_FIX:
11149 case FLOAT_EXTEND:
11150 case FLOAT_TRUNCATE:
11151 *total = sparc_costs->float_move;
11152 return false;
11154 case SQRT:
11155 if (mode == DFmode)
11156 *total = sparc_costs->float_sqrt_df;
11157 else
11158 *total = sparc_costs->float_sqrt_sf;
11159 return false;
11161 case COMPARE:
11162 if (float_mode_p)
11163 *total = sparc_costs->float_cmp;
11164 else
11165 *total = COSTS_N_INSNS (1);
11166 return false;
11168 case IF_THEN_ELSE:
11169 if (float_mode_p)
11170 *total = sparc_costs->float_cmove;
11171 else
11172 *total = sparc_costs->int_cmove;
11173 return false;
11175 case IOR:
11176 /* Handle the NAND vector patterns. */
11177 if (sparc_vector_mode_supported_p (GET_MODE (x))
11178 && GET_CODE (XEXP (x, 0)) == NOT
11179 && GET_CODE (XEXP (x, 1)) == NOT)
11181 *total = COSTS_N_INSNS (1);
11182 return true;
11184 else
11185 return false;
11187 default:
11188 return false;
11192 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11194 static inline bool
11195 general_or_i64_p (reg_class_t rclass)
11197 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11200 /* Implement TARGET_REGISTER_MOVE_COST. */
11202 static int
11203 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11204 reg_class_t from, reg_class_t to)
11206 bool need_memory = false;
11208 if (from == FPCC_REGS || to == FPCC_REGS)
11209 need_memory = true;
11210 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11211 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11213 if (TARGET_VIS3)
11215 int size = GET_MODE_SIZE (mode);
11216 if (size == 8 || size == 4)
11218 if (! TARGET_ARCH32 || size == 4)
11219 return 4;
11220 else
11221 return 6;
11224 need_memory = true;
11227 if (need_memory)
11229 if (sparc_cpu == PROCESSOR_ULTRASPARC
11230 || sparc_cpu == PROCESSOR_ULTRASPARC3
11231 || sparc_cpu == PROCESSOR_NIAGARA
11232 || sparc_cpu == PROCESSOR_NIAGARA2
11233 || sparc_cpu == PROCESSOR_NIAGARA3
11234 || sparc_cpu == PROCESSOR_NIAGARA4)
11235 return 12;
11237 return 6;
11240 return 2;
11243 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11244 This is achieved by means of a manual dynamic stack space allocation in
11245 the current frame. We make the assumption that SEQ doesn't contain any
11246 function calls, with the possible exception of calls to the GOT helper. */
11248 static void
11249 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11251 /* We must preserve the lowest 16 words for the register save area. */
11252 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11253 /* We really need only 2 words of fresh stack space. */
11254 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11256 rtx slot
11257 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11258 SPARC_STACK_BIAS + offset));
11260 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11261 emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
11262 if (reg2)
11263 emit_insn (gen_rtx_SET (VOIDmode,
11264 adjust_address (slot, word_mode, UNITS_PER_WORD),
11265 reg2));
11266 emit_insn (seq);
11267 if (reg2)
11268 emit_insn (gen_rtx_SET (VOIDmode,
11269 reg2,
11270 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11271 emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
11272 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11275 /* Output the assembler code for a thunk function. THUNK_DECL is the
11276 declaration for the thunk function itself, FUNCTION is the decl for
11277 the target function. DELTA is an immediate constant offset to be
11278 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11279 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11281 static void
11282 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11283 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11284 tree function)
11286 rtx this_rtx, funexp;
11287 rtx_insn *insn;
11288 unsigned int int_arg_first;
11290 reload_completed = 1;
11291 epilogue_completed = 1;
11293 emit_note (NOTE_INSN_PROLOGUE_END);
11295 if (TARGET_FLAT)
11297 sparc_leaf_function_p = 1;
11299 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11301 else if (flag_delayed_branch)
11303 /* We will emit a regular sibcall below, so we need to instruct
11304 output_sibcall that we are in a leaf function. */
11305 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11307 /* This will cause final.c to invoke leaf_renumber_regs so we
11308 must behave as if we were in a not-yet-leafified function. */
11309 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11311 else
11313 /* We will emit the sibcall manually below, so we will need to
11314 manually spill non-leaf registers. */
11315 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11317 /* We really are in a leaf function. */
11318 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11321 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11322 returns a structure, the structure return pointer is there instead. */
11323 if (TARGET_ARCH64
11324 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11325 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11326 else
11327 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11329 /* Add DELTA. When possible use a plain add, otherwise load it into
11330 a register first. */
11331 if (delta)
11333 rtx delta_rtx = GEN_INT (delta);
11335 if (! SPARC_SIMM13_P (delta))
11337 rtx scratch = gen_rtx_REG (Pmode, 1);
11338 emit_move_insn (scratch, delta_rtx);
11339 delta_rtx = scratch;
11342 /* THIS_RTX += DELTA. */
11343 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11346 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
11347 if (vcall_offset)
11349 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11350 rtx scratch = gen_rtx_REG (Pmode, 1);
11352 gcc_assert (vcall_offset < 0);
11354 /* SCRATCH = *THIS_RTX. */
11355 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11357 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11358 may not have any available scratch register at this point. */
11359 if (SPARC_SIMM13_P (vcall_offset))
11361 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11362 else if (! fixed_regs[5]
11363 /* The below sequence is made up of at least 2 insns,
11364 while the default method may need only one. */
11365 && vcall_offset < -8192)
11367 rtx scratch2 = gen_rtx_REG (Pmode, 5);
11368 emit_move_insn (scratch2, vcall_offset_rtx);
11369 vcall_offset_rtx = scratch2;
11371 else
11373 rtx increment = GEN_INT (-4096);
11375 /* VCALL_OFFSET is a negative number whose typical range can be
11376 estimated as -32768..0 in 32-bit mode. In almost all cases
11377 it is therefore cheaper to emit multiple add insns than
11378 spilling and loading the constant into a register (at least
11379 6 insns). */
11380 while (! SPARC_SIMM13_P (vcall_offset))
11382 emit_insn (gen_add2_insn (scratch, increment));
11383 vcall_offset += 4096;
11385 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11388 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
11389 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11390 gen_rtx_PLUS (Pmode,
11391 scratch,
11392 vcall_offset_rtx)));
11394 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
11395 emit_insn (gen_add2_insn (this_rtx, scratch));
11398 /* Generate a tail call to the target function. */
11399 if (! TREE_USED (function))
11401 assemble_external (function);
11402 TREE_USED (function) = 1;
11404 funexp = XEXP (DECL_RTL (function), 0);
11406 if (flag_delayed_branch)
11408 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11409 insn = emit_call_insn (gen_sibcall (funexp));
11410 SIBLING_CALL_P (insn) = 1;
11412 else
11414 /* The hoops we have to jump through in order to generate a sibcall
11415 without using delay slots... */
11416 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11418 if (flag_pic)
11420 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
11421 start_sequence ();
11422 load_got_register (); /* clobbers %o7 */
11423 scratch = sparc_legitimize_pic_address (funexp, scratch);
11424 seq = get_insns ();
11425 end_sequence ();
11426 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11428 else if (TARGET_ARCH32)
11430 emit_insn (gen_rtx_SET (VOIDmode,
11431 scratch,
11432 gen_rtx_HIGH (SImode, funexp)));
11433 emit_insn (gen_rtx_SET (VOIDmode,
11434 scratch,
11435 gen_rtx_LO_SUM (SImode, scratch, funexp)));
11437 else /* TARGET_ARCH64 */
11439 switch (sparc_cmodel)
11441 case CM_MEDLOW:
11442 case CM_MEDMID:
11443 /* The destination can serve as a temporary. */
11444 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11445 break;
11447 case CM_MEDANY:
11448 case CM_EMBMEDANY:
11449 /* The destination cannot serve as a temporary. */
11450 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11451 start_sequence ();
11452 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11453 seq = get_insns ();
11454 end_sequence ();
11455 emit_and_preserve (seq, spill_reg, 0);
11456 break;
11458 default:
11459 gcc_unreachable ();
11463 emit_jump_insn (gen_indirect_jump (scratch));
11466 emit_barrier ();
11468 /* Run just enough of rest_of_compilation to get the insns emitted.
11469 There's not really enough bulk here to make other passes such as
11470 instruction scheduling worth while. Note that use_thunk calls
11471 assemble_start_function and assemble_end_function. */
11472 insn = get_insns ();
11473 shorten_branches (insn);
11474 final_start_function (insn, file, 1);
11475 final (insn, file, 1);
11476 final_end_function ();
11478 reload_completed = 0;
11479 epilogue_completed = 0;
11482 /* Return true if sparc_output_mi_thunk would be able to output the
11483 assembler code for the thunk function specified by the arguments
11484 it is passed, and false otherwise. */
11485 static bool
11486 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11487 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11488 HOST_WIDE_INT vcall_offset,
11489 const_tree function ATTRIBUTE_UNUSED)
11491 /* Bound the loop used in the default method above. */
11492 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11495 /* How to allocate a 'struct machine_function'. */
11497 static struct machine_function *
11498 sparc_init_machine_status (void)
11500 return ggc_cleared_alloc<machine_function> ();
11503 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11504 We need to emit DTP-relative relocations. */
11506 static void
11507 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11509 switch (size)
11511 case 4:
11512 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11513 break;
11514 case 8:
11515 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11516 break;
11517 default:
11518 gcc_unreachable ();
11520 output_addr_const (file, x);
11521 fputs (")", file);
11524 /* Do whatever processing is required at the end of a file. */
11526 static void
11527 sparc_file_end (void)
11529 /* If we need to emit the special GOT helper function, do so now. */
11530 if (got_helper_rtx)
11532 const char *name = XSTR (got_helper_rtx, 0);
11533 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11534 #ifdef DWARF2_UNWIND_INFO
11535 bool do_cfi;
11536 #endif
11538 if (USE_HIDDEN_LINKONCE)
11540 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11541 get_identifier (name),
11542 build_function_type_list (void_type_node,
11543 NULL_TREE));
11544 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11545 NULL_TREE, void_type_node);
11546 TREE_PUBLIC (decl) = 1;
11547 TREE_STATIC (decl) = 1;
11548 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11549 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11550 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11551 resolve_unique_section (decl, 0, flag_function_sections);
11552 allocate_struct_function (decl, true);
11553 cfun->is_thunk = 1;
11554 current_function_decl = decl;
11555 init_varasm_status ();
11556 assemble_start_function (decl, name);
11558 else
11560 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11561 switch_to_section (text_section);
11562 if (align > 0)
11563 ASM_OUTPUT_ALIGN (asm_out_file, align);
11564 ASM_OUTPUT_LABEL (asm_out_file, name);
11567 #ifdef DWARF2_UNWIND_INFO
11568 do_cfi = dwarf2out_do_cfi_asm ();
11569 if (do_cfi)
11570 fprintf (asm_out_file, "\t.cfi_startproc\n");
11571 #endif
11572 if (flag_delayed_branch)
11573 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11574 reg_name, reg_name);
11575 else
11576 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11577 reg_name, reg_name);
11578 #ifdef DWARF2_UNWIND_INFO
11579 if (do_cfi)
11580 fprintf (asm_out_file, "\t.cfi_endproc\n");
11581 #endif
11584 if (NEED_INDICATE_EXEC_STACK)
11585 file_end_indicate_exec_stack ();
11587 #ifdef TARGET_SOLARIS
11588 solaris_file_end ();
11589 #endif
11592 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11593 /* Implement TARGET_MANGLE_TYPE. */
11595 static const char *
11596 sparc_mangle_type (const_tree type)
11598 if (!TARGET_64BIT
11599 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11600 && TARGET_LONG_DOUBLE_128)
11601 return "g";
11603 /* For all other types, use normal C++ mangling. */
11604 return NULL;
11606 #endif
11608 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11609 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11610 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11612 void
11613 sparc_emit_membar_for_model (enum memmodel model,
11614 int load_store, int before_after)
11616 /* Bits for the MEMBAR mmask field. */
11617 const int LoadLoad = 1;
11618 const int StoreLoad = 2;
11619 const int LoadStore = 4;
11620 const int StoreStore = 8;
11622 int mm = 0, implied = 0;
11624 switch (sparc_memory_model)
11626 case SMM_SC:
11627 /* Sequential Consistency. All memory transactions are immediately
11628 visible in sequential execution order. No barriers needed. */
11629 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11630 break;
11632 case SMM_TSO:
11633 /* Total Store Ordering: all memory transactions with store semantics
11634 are followed by an implied StoreStore. */
11635 implied |= StoreStore;
11637 /* If we're not looking for a raw barrer (before+after), then atomic
11638 operations get the benefit of being both load and store. */
11639 if (load_store == 3 && before_after == 1)
11640 implied |= StoreLoad;
11641 /* FALLTHRU */
11643 case SMM_PSO:
11644 /* Partial Store Ordering: all memory transactions with load semantics
11645 are followed by an implied LoadLoad | LoadStore. */
11646 implied |= LoadLoad | LoadStore;
11648 /* If we're not looking for a raw barrer (before+after), then atomic
11649 operations get the benefit of being both load and store. */
11650 if (load_store == 3 && before_after == 2)
11651 implied |= StoreLoad | StoreStore;
11652 /* FALLTHRU */
11654 case SMM_RMO:
11655 /* Relaxed Memory Ordering: no implicit bits. */
11656 break;
11658 default:
11659 gcc_unreachable ();
11662 if (before_after & 1)
11664 if (model == MEMMODEL_RELEASE
11665 || model == MEMMODEL_ACQ_REL
11666 || model == MEMMODEL_SEQ_CST)
11668 if (load_store & 1)
11669 mm |= LoadLoad | StoreLoad;
11670 if (load_store & 2)
11671 mm |= LoadStore | StoreStore;
11674 if (before_after & 2)
11676 if (model == MEMMODEL_ACQUIRE
11677 || model == MEMMODEL_ACQ_REL
11678 || model == MEMMODEL_SEQ_CST)
11680 if (load_store & 1)
11681 mm |= LoadLoad | LoadStore;
11682 if (load_store & 2)
11683 mm |= StoreLoad | StoreStore;
11687 /* Remove the bits implied by the system memory model. */
11688 mm &= ~implied;
11690 /* For raw barriers (before+after), always emit a barrier.
11691 This will become a compile-time barrier if needed. */
11692 if (mm || before_after == 3)
11693 emit_insn (gen_membar (GEN_INT (mm)));
11696 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11697 compare and swap on the word containing the byte or half-word. */
11699 static void
11700 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11701 rtx oldval, rtx newval)
11703 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11704 rtx addr = gen_reg_rtx (Pmode);
11705 rtx off = gen_reg_rtx (SImode);
11706 rtx oldv = gen_reg_rtx (SImode);
11707 rtx newv = gen_reg_rtx (SImode);
11708 rtx oldvalue = gen_reg_rtx (SImode);
11709 rtx newvalue = gen_reg_rtx (SImode);
11710 rtx res = gen_reg_rtx (SImode);
11711 rtx resv = gen_reg_rtx (SImode);
11712 rtx memsi, val, mask, cc;
11714 emit_insn (gen_rtx_SET (VOIDmode, addr,
11715 gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11717 if (Pmode != SImode)
11718 addr1 = gen_lowpart (SImode, addr1);
11719 emit_insn (gen_rtx_SET (VOIDmode, off,
11720 gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11722 memsi = gen_rtx_MEM (SImode, addr);
11723 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11724 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11726 val = copy_to_reg (memsi);
11728 emit_insn (gen_rtx_SET (VOIDmode, off,
11729 gen_rtx_XOR (SImode, off,
11730 GEN_INT (GET_MODE (mem) == QImode
11731 ? 3 : 2))));
11733 emit_insn (gen_rtx_SET (VOIDmode, off,
11734 gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11736 if (GET_MODE (mem) == QImode)
11737 mask = force_reg (SImode, GEN_INT (0xff));
11738 else
11739 mask = force_reg (SImode, GEN_INT (0xffff));
11741 emit_insn (gen_rtx_SET (VOIDmode, mask,
11742 gen_rtx_ASHIFT (SImode, mask, off)));
11744 emit_insn (gen_rtx_SET (VOIDmode, val,
11745 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11746 val)));
11748 oldval = gen_lowpart (SImode, oldval);
11749 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11750 gen_rtx_ASHIFT (SImode, oldval, off)));
11752 newval = gen_lowpart_common (SImode, newval);
11753 emit_insn (gen_rtx_SET (VOIDmode, newv,
11754 gen_rtx_ASHIFT (SImode, newval, off)));
11756 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11757 gen_rtx_AND (SImode, oldv, mask)));
11759 emit_insn (gen_rtx_SET (VOIDmode, newv,
11760 gen_rtx_AND (SImode, newv, mask)));
11762 rtx_code_label *end_label = gen_label_rtx ();
11763 rtx_code_label *loop_label = gen_label_rtx ();
11764 emit_label (loop_label);
11766 emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
11767 gen_rtx_IOR (SImode, oldv, val)));
11769 emit_insn (gen_rtx_SET (VOIDmode, newvalue,
11770 gen_rtx_IOR (SImode, newv, val)));
11772 emit_move_insn (bool_result, const1_rtx);
11774 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11776 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11778 emit_insn (gen_rtx_SET (VOIDmode, resv,
11779 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11780 res)));
11782 emit_move_insn (bool_result, const0_rtx);
11784 cc = gen_compare_reg_1 (NE, resv, val);
11785 emit_insn (gen_rtx_SET (VOIDmode, val, resv));
11787 /* Use cbranchcc4 to separate the compare and branch! */
11788 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11789 cc, const0_rtx, loop_label));
11791 emit_label (end_label);
11793 emit_insn (gen_rtx_SET (VOIDmode, res,
11794 gen_rtx_AND (SImode, res, mask)));
11796 emit_insn (gen_rtx_SET (VOIDmode, res,
11797 gen_rtx_LSHIFTRT (SImode, res, off)));
11799 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11802 /* Expand code to perform a compare-and-swap. */
11804 void
11805 sparc_expand_compare_and_swap (rtx operands[])
11807 rtx bval, retval, mem, oldval, newval;
11808 machine_mode mode;
11809 enum memmodel model;
11811 bval = operands[0];
11812 retval = operands[1];
11813 mem = operands[2];
11814 oldval = operands[3];
11815 newval = operands[4];
11816 model = (enum memmodel) INTVAL (operands[6]);
11817 mode = GET_MODE (mem);
11819 sparc_emit_membar_for_model (model, 3, 1);
11821 if (reg_overlap_mentioned_p (retval, oldval))
11822 oldval = copy_to_reg (oldval);
11824 if (mode == QImode || mode == HImode)
11825 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11826 else
11828 rtx (*gen) (rtx, rtx, rtx, rtx);
11829 rtx x;
11831 if (mode == SImode)
11832 gen = gen_atomic_compare_and_swapsi_1;
11833 else
11834 gen = gen_atomic_compare_and_swapdi_1;
11835 emit_insn (gen (retval, mem, oldval, newval));
11837 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11838 if (x != bval)
11839 convert_move (bval, x, 1);
11842 sparc_emit_membar_for_model (model, 3, 2);
11845 void
11846 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
11848 rtx t_1, t_2, t_3;
11850 sel = gen_lowpart (DImode, sel);
11851 switch (vmode)
11853 case V2SImode:
11854 /* inp = xxxxxxxAxxxxxxxB */
11855 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11856 NULL_RTX, 1, OPTAB_DIRECT);
11857 /* t_1 = ....xxxxxxxAxxx. */
11858 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11859 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11860 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11861 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11862 /* sel = .......B */
11863 /* t_1 = ...A.... */
11864 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11865 /* sel = ...A...B */
11866 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11867 /* sel = AAAABBBB * 4 */
11868 t_1 = force_reg (SImode, GEN_INT (0x01230123));
11869 /* sel = { A*4, A*4+1, A*4+2, ... } */
11870 break;
11872 case V4HImode:
11873 /* inp = xxxAxxxBxxxCxxxD */
11874 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11875 NULL_RTX, 1, OPTAB_DIRECT);
11876 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11877 NULL_RTX, 1, OPTAB_DIRECT);
11878 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11879 NULL_RTX, 1, OPTAB_DIRECT);
11880 /* t_1 = ..xxxAxxxBxxxCxx */
11881 /* t_2 = ....xxxAxxxBxxxC */
11882 /* t_3 = ......xxxAxxxBxx */
11883 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11884 GEN_INT (0x07),
11885 NULL_RTX, 1, OPTAB_DIRECT);
11886 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11887 GEN_INT (0x0700),
11888 NULL_RTX, 1, OPTAB_DIRECT);
11889 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11890 GEN_INT (0x070000),
11891 NULL_RTX, 1, OPTAB_DIRECT);
11892 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11893 GEN_INT (0x07000000),
11894 NULL_RTX, 1, OPTAB_DIRECT);
11895 /* sel = .......D */
11896 /* t_1 = .....C.. */
11897 /* t_2 = ...B.... */
11898 /* t_3 = .A...... */
11899 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11900 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11901 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11902 /* sel = .A.B.C.D */
11903 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11904 /* sel = AABBCCDD * 2 */
11905 t_1 = force_reg (SImode, GEN_INT (0x01010101));
11906 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11907 break;
11909 case V8QImode:
11910 /* input = xAxBxCxDxExFxGxH */
11911 sel = expand_simple_binop (DImode, AND, sel,
11912 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11913 | 0x0f0f0f0f),
11914 NULL_RTX, 1, OPTAB_DIRECT);
11915 /* sel = .A.B.C.D.E.F.G.H */
11916 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11917 NULL_RTX, 1, OPTAB_DIRECT);
11918 /* t_1 = ..A.B.C.D.E.F.G. */
11919 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11920 NULL_RTX, 1, OPTAB_DIRECT);
11921 /* sel = .AABBCCDDEEFFGGH */
11922 sel = expand_simple_binop (DImode, AND, sel,
11923 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11924 | 0xff00ff),
11925 NULL_RTX, 1, OPTAB_DIRECT);
11926 /* sel = ..AB..CD..EF..GH */
11927 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11928 NULL_RTX, 1, OPTAB_DIRECT);
11929 /* t_1 = ....AB..CD..EF.. */
11930 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11931 NULL_RTX, 1, OPTAB_DIRECT);
11932 /* sel = ..ABABCDCDEFEFGH */
11933 sel = expand_simple_binop (DImode, AND, sel,
11934 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11935 NULL_RTX, 1, OPTAB_DIRECT);
11936 /* sel = ....ABCD....EFGH */
11937 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11938 NULL_RTX, 1, OPTAB_DIRECT);
11939 /* t_1 = ........ABCD.... */
11940 sel = gen_lowpart (SImode, sel);
11941 t_1 = gen_lowpart (SImode, t_1);
11942 break;
11944 default:
11945 gcc_unreachable ();
11948 /* Always perform the final addition/merge within the bmask insn. */
11949 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, t_1));
11952 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
11954 static bool
11955 sparc_frame_pointer_required (void)
11957 /* If the stack pointer is dynamically modified in the function, it cannot
11958 serve as the frame pointer. */
11959 if (cfun->calls_alloca)
11960 return true;
11962 /* If the function receives nonlocal gotos, it needs to save the frame
11963 pointer in the nonlocal_goto_save_area object. */
11964 if (cfun->has_nonlocal_label)
11965 return true;
11967 /* In flat mode, that's it. */
11968 if (TARGET_FLAT)
11969 return false;
11971 /* Otherwise, the frame pointer is required if the function isn't leaf. */
11972 return !(crtl->is_leaf && only_leaf_regs_used ());
11975 /* The way this is structured, we can't eliminate SFP in favor of SP
11976 if the frame pointer is required: we want to use the SFP->HFP elimination
11977 in that case. But the test in update_eliminables doesn't know we are
11978 assuming below that we only do the former elimination. */
11980 static bool
11981 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
11983 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
11986 /* Return the hard frame pointer directly to bypass the stack bias. */
11988 static rtx
11989 sparc_builtin_setjmp_frame_value (void)
11991 return hard_frame_pointer_rtx;
11994 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
11995 they won't be allocated. */
11997 static void
11998 sparc_conditional_register_usage (void)
12000 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12002 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12003 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12005 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12006 /* then honor it. */
12007 if (TARGET_ARCH32 && fixed_regs[5])
12008 fixed_regs[5] = 1;
12009 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12010 fixed_regs[5] = 0;
12011 if (! TARGET_V9)
12013 int regno;
12014 for (regno = SPARC_FIRST_V9_FP_REG;
12015 regno <= SPARC_LAST_V9_FP_REG;
12016 regno++)
12017 fixed_regs[regno] = 1;
12018 /* %fcc0 is used by v8 and v9. */
12019 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12020 regno <= SPARC_LAST_V9_FCC_REG;
12021 regno++)
12022 fixed_regs[regno] = 1;
12024 if (! TARGET_FPU)
12026 int regno;
12027 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12028 fixed_regs[regno] = 1;
12030 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12031 /* then honor it. Likewise with g3 and g4. */
12032 if (fixed_regs[2] == 2)
12033 fixed_regs[2] = ! TARGET_APP_REGS;
12034 if (fixed_regs[3] == 2)
12035 fixed_regs[3] = ! TARGET_APP_REGS;
12036 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12037 fixed_regs[4] = ! TARGET_APP_REGS;
12038 else if (TARGET_CM_EMBMEDANY)
12039 fixed_regs[4] = 1;
12040 else if (fixed_regs[4] == 2)
12041 fixed_regs[4] = 0;
12042 if (TARGET_FLAT)
12044 int regno;
12045 /* Disable leaf functions. */
12046 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12047 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12048 leaf_reg_remap [regno] = regno;
12050 if (TARGET_VIS)
12051 global_regs[SPARC_GSR_REG] = 1;
12054 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12056 - We can't load constants into FP registers.
12057 - We can't load FP constants into integer registers when soft-float,
12058 because there is no soft-float pattern with a r/F constraint.
12059 - We can't load FP constants into integer registers for TFmode unless
12060 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12061 - Try and reload integer constants (symbolic or otherwise) back into
12062 registers directly, rather than having them dumped to memory. */
12064 static reg_class_t
12065 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12067 machine_mode mode = GET_MODE (x);
12068 if (CONSTANT_P (x))
12070 if (FP_REG_CLASS_P (rclass)
12071 || rclass == GENERAL_OR_FP_REGS
12072 || rclass == GENERAL_OR_EXTRA_FP_REGS
12073 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12074 || (mode == TFmode && ! const_zero_operand (x, mode)))
12075 return NO_REGS;
12077 if (GET_MODE_CLASS (mode) == MODE_INT)
12078 return GENERAL_REGS;
12080 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12082 if (! FP_REG_CLASS_P (rclass)
12083 || !(const_zero_operand (x, mode)
12084 || const_all_ones_operand (x, mode)))
12085 return NO_REGS;
12089 if (TARGET_VIS3
12090 && ! TARGET_ARCH64
12091 && (rclass == EXTRA_FP_REGS
12092 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12094 int regno = true_regnum (x);
12096 if (SPARC_INT_REG_P (regno))
12097 return (rclass == EXTRA_FP_REGS
12098 ? FP_REGS : GENERAL_OR_FP_REGS);
12101 return rclass;
12104 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12105 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12107 const char *
12108 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12110 char mulstr[32];
12112 gcc_assert (! TARGET_ARCH64);
12114 if (sparc_check_64 (operands[1], insn) <= 0)
12115 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12116 if (which_alternative == 1)
12117 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12118 if (GET_CODE (operands[2]) == CONST_INT)
12120 if (which_alternative == 1)
12122 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12123 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12124 output_asm_insn (mulstr, operands);
12125 return "srlx\t%L0, 32, %H0";
12127 else
12129 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12130 output_asm_insn ("or\t%L1, %3, %3", operands);
12131 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12132 output_asm_insn (mulstr, operands);
12133 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12134 return "mov\t%3, %L0";
12137 else if (rtx_equal_p (operands[1], operands[2]))
12139 if (which_alternative == 1)
12141 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12142 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12143 output_asm_insn (mulstr, operands);
12144 return "srlx\t%L0, 32, %H0";
12146 else
12148 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12149 output_asm_insn ("or\t%L1, %3, %3", operands);
12150 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12151 output_asm_insn (mulstr, operands);
12152 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12153 return "mov\t%3, %L0";
12156 if (sparc_check_64 (operands[2], insn) <= 0)
12157 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12158 if (which_alternative == 1)
12160 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12161 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12162 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12163 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12164 output_asm_insn (mulstr, operands);
12165 return "srlx\t%L0, 32, %H0";
12167 else
12169 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12170 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12171 output_asm_insn ("or\t%L1, %3, %3", operands);
12172 output_asm_insn ("or\t%L2, %4, %4", operands);
12173 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12174 output_asm_insn (mulstr, operands);
12175 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12176 return "mov\t%3, %L0";
12180 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12181 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12182 and INNER_MODE are the modes describing TARGET. */
12184 static void
12185 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12186 machine_mode inner_mode)
12188 rtx t1, final_insn, sel;
12189 int bmask;
12191 t1 = gen_reg_rtx (mode);
12193 elt = convert_modes (SImode, inner_mode, elt, true);
12194 emit_move_insn (gen_lowpart(SImode, t1), elt);
12196 switch (mode)
12198 case V2SImode:
12199 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12200 bmask = 0x45674567;
12201 break;
12202 case V4HImode:
12203 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12204 bmask = 0x67676767;
12205 break;
12206 case V8QImode:
12207 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12208 bmask = 0x77777777;
12209 break;
12210 default:
12211 gcc_unreachable ();
12214 sel = force_reg (SImode, GEN_INT (bmask));
12215 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx));
12216 emit_insn (final_insn);
12219 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12220 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12222 static void
12223 vector_init_fpmerge (rtx target, rtx elt)
12225 rtx t1, t2, t2_low, t3, t3_low;
12227 t1 = gen_reg_rtx (V4QImode);
12228 elt = convert_modes (SImode, QImode, elt, true);
12229 emit_move_insn (gen_lowpart (SImode, t1), elt);
12231 t2 = gen_reg_rtx (V8QImode);
12232 t2_low = gen_lowpart (V4QImode, t2);
12233 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12235 t3 = gen_reg_rtx (V8QImode);
12236 t3_low = gen_lowpart (V4QImode, t3);
12237 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12239 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12242 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12243 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12245 static void
12246 vector_init_faligndata (rtx target, rtx elt)
12248 rtx t1 = gen_reg_rtx (V4HImode);
12249 int i;
12251 elt = convert_modes (SImode, HImode, elt, true);
12252 emit_move_insn (gen_lowpart (SImode, t1), elt);
12254 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12255 force_reg (SImode, GEN_INT (6)),
12256 const0_rtx));
12258 for (i = 0; i < 4; i++)
12259 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12262 /* Emit code to initialize TARGET to values for individual fields VALS. */
12264 void
12265 sparc_expand_vector_init (rtx target, rtx vals)
12267 const machine_mode mode = GET_MODE (target);
12268 const machine_mode inner_mode = GET_MODE_INNER (mode);
12269 const int n_elts = GET_MODE_NUNITS (mode);
12270 int i, n_var = 0;
12271 bool all_same;
12272 rtx mem;
12274 all_same = true;
12275 for (i = 0; i < n_elts; i++)
12277 rtx x = XVECEXP (vals, 0, i);
12278 if (!CONSTANT_P (x))
12279 n_var++;
12281 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12282 all_same = false;
12285 if (n_var == 0)
12287 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12288 return;
12291 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12293 if (GET_MODE_SIZE (inner_mode) == 4)
12295 emit_move_insn (gen_lowpart (SImode, target),
12296 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12297 return;
12299 else if (GET_MODE_SIZE (inner_mode) == 8)
12301 emit_move_insn (gen_lowpart (DImode, target),
12302 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12303 return;
12306 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12307 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12309 emit_move_insn (gen_highpart (word_mode, target),
12310 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12311 emit_move_insn (gen_lowpart (word_mode, target),
12312 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12313 return;
12316 if (all_same && GET_MODE_SIZE (mode) == 8)
12318 if (TARGET_VIS2)
12320 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12321 return;
12323 if (mode == V8QImode)
12325 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12326 return;
12328 if (mode == V4HImode)
12330 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12331 return;
12335 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12336 for (i = 0; i < n_elts; i++)
12337 emit_move_insn (adjust_address_nv (mem, inner_mode,
12338 i * GET_MODE_SIZE (inner_mode)),
12339 XVECEXP (vals, 0, i));
12340 emit_move_insn (target, mem);
12343 /* Implement TARGET_SECONDARY_RELOAD. */
12345 static reg_class_t
12346 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12347 machine_mode mode, secondary_reload_info *sri)
12349 enum reg_class rclass = (enum reg_class) rclass_i;
12351 sri->icode = CODE_FOR_nothing;
12352 sri->extra_cost = 0;
12354 /* We need a temporary when loading/storing a HImode/QImode value
12355 between memory and the FPU registers. This can happen when combine puts
12356 a paradoxical subreg in a float/fix conversion insn. */
12357 if (FP_REG_CLASS_P (rclass)
12358 && (mode == HImode || mode == QImode)
12359 && (GET_CODE (x) == MEM
12360 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12361 && true_regnum (x) == -1)))
12362 return GENERAL_REGS;
12364 /* On 32-bit we need a temporary when loading/storing a DFmode value
12365 between unaligned memory and the upper FPU registers. */
12366 if (TARGET_ARCH32
12367 && rclass == EXTRA_FP_REGS
12368 && mode == DFmode
12369 && GET_CODE (x) == MEM
12370 && ! mem_min_alignment (x, 8))
12371 return FP_REGS;
12373 if (((TARGET_CM_MEDANY
12374 && symbolic_operand (x, mode))
12375 || (TARGET_CM_EMBMEDANY
12376 && text_segment_operand (x, mode)))
12377 && ! flag_pic)
12379 if (in_p)
12380 sri->icode = direct_optab_handler (reload_in_optab, mode);
12381 else
12382 sri->icode = direct_optab_handler (reload_out_optab, mode);
12383 return NO_REGS;
12386 if (TARGET_VIS3 && TARGET_ARCH32)
12388 int regno = true_regnum (x);
12390 /* When using VIS3 fp<-->int register moves, on 32-bit we have
12391 to move 8-byte values in 4-byte pieces. This only works via
12392 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
12393 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12394 an FP_REGS intermediate move. */
12395 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12396 || ((general_or_i64_p (rclass)
12397 || rclass == GENERAL_OR_FP_REGS)
12398 && SPARC_FP_REG_P (regno)))
12400 sri->extra_cost = 2;
12401 return FP_REGS;
12405 return NO_REGS;
12408 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12409 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
12411 bool
12412 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
12414 enum rtx_code rc = GET_CODE (operands[1]);
12415 machine_mode cmp_mode;
12416 rtx cc_reg, dst, cmp;
12418 cmp = operands[1];
12419 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12420 return false;
12422 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12423 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12425 cmp_mode = GET_MODE (XEXP (cmp, 0));
12426 rc = GET_CODE (cmp);
12428 dst = operands[0];
12429 if (! rtx_equal_p (operands[2], dst)
12430 && ! rtx_equal_p (operands[3], dst))
12432 if (reg_overlap_mentioned_p (dst, cmp))
12433 dst = gen_reg_rtx (mode);
12435 emit_move_insn (dst, operands[3]);
12437 else if (operands[2] == dst)
12439 operands[2] = operands[3];
12441 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12442 rc = reverse_condition_maybe_unordered (rc);
12443 else
12444 rc = reverse_condition (rc);
12447 if (XEXP (cmp, 1) == const0_rtx
12448 && GET_CODE (XEXP (cmp, 0)) == REG
12449 && cmp_mode == DImode
12450 && v9_regcmp_p (rc))
12451 cc_reg = XEXP (cmp, 0);
12452 else
12453 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12455 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12457 emit_insn (gen_rtx_SET (VOIDmode, dst,
12458 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12460 if (dst != operands[0])
12461 emit_move_insn (operands[0], dst);
12463 return true;
12466 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12467 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12468 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12469 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12470 code to be used for the condition mask. */
12472 void
12473 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
12475 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12476 enum rtx_code code = GET_CODE (operands[3]);
12478 mask = gen_reg_rtx (Pmode);
12479 cop0 = operands[4];
12480 cop1 = operands[5];
12481 if (code == LT || code == GE)
12483 rtx t;
12485 code = swap_condition (code);
12486 t = cop0; cop0 = cop1; cop1 = t;
12489 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12491 fcmp = gen_rtx_UNSPEC (Pmode,
12492 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12493 fcode);
12495 cmask = gen_rtx_UNSPEC (DImode,
12496 gen_rtvec (2, mask, gsr),
12497 ccode);
12499 bshuf = gen_rtx_UNSPEC (mode,
12500 gen_rtvec (3, operands[1], operands[2], gsr),
12501 UNSPEC_BSHUFFLE);
12503 emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp));
12504 emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask));
12506 emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf));
12509 /* On sparc, any mode which naturally allocates into the float
12510 registers should return 4 here. */
12512 unsigned int
12513 sparc_regmode_natural_size (machine_mode mode)
12515 int size = UNITS_PER_WORD;
12517 if (TARGET_ARCH64)
12519 enum mode_class mclass = GET_MODE_CLASS (mode);
12521 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12522 size = 4;
12525 return size;
12528 /* Return TRUE if it is a good idea to tie two pseudo registers
12529 when one has mode MODE1 and one has mode MODE2.
12530 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12531 for any hard reg, then this must be FALSE for correct output.
12533 For V9 we have to deal with the fact that only the lower 32 floating
12534 point registers are 32-bit addressable. */
12536 bool
12537 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
12539 enum mode_class mclass1, mclass2;
12540 unsigned short size1, size2;
12542 if (mode1 == mode2)
12543 return true;
12545 mclass1 = GET_MODE_CLASS (mode1);
12546 mclass2 = GET_MODE_CLASS (mode2);
12547 if (mclass1 != mclass2)
12548 return false;
12550 if (! TARGET_V9)
12551 return true;
12553 /* Classes are the same and we are V9 so we have to deal with upper
12554 vs. lower floating point registers. If one of the modes is a
12555 4-byte mode, and the other is not, we have to mark them as not
12556 tieable because only the lower 32 floating point register are
12557 addressable 32-bits at a time.
12559 We can't just test explicitly for SFmode, otherwise we won't
12560 cover the vector mode cases properly. */
12562 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12563 return true;
12565 size1 = GET_MODE_SIZE (mode1);
12566 size2 = GET_MODE_SIZE (mode2);
12567 if ((size1 > 4 && size2 == 4)
12568 || (size2 > 4 && size1 == 4))
12569 return false;
12571 return true;
12574 /* Implement TARGET_CSTORE_MODE. */
12576 static machine_mode
12577 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12579 return (TARGET_ARCH64 ? DImode : SImode);
12582 /* Return the compound expression made of T1 and T2. */
12584 static inline tree
12585 compound_expr (tree t1, tree t2)
12587 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
12590 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
12592 static void
12593 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12595 if (!TARGET_FPU)
12596 return;
12598 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
12599 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
12601 /* We generate the equivalent of feholdexcept (&fenv_var):
12603 unsigned int fenv_var;
12604 __builtin_store_fsr (&fenv_var);
12606 unsigned int tmp1_var;
12607 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
12609 __builtin_load_fsr (&tmp1_var); */
12611 tree fenv_var = create_tmp_var (unsigned_type_node);
12612 mark_addressable (fenv_var);
12613 tree fenv_addr = build_fold_addr_expr (fenv_var);
12614 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
12615 tree hold_stfsr = build_call_expr (stfsr, 1, fenv_addr);
12617 tree tmp1_var = create_tmp_var (unsigned_type_node);
12618 mark_addressable (tmp1_var);
12619 tree masked_fenv_var
12620 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
12621 build_int_cst (unsigned_type_node,
12622 ~(accrued_exception_mask | trap_enable_mask)));
12623 tree hold_mask
12624 = build2 (MODIFY_EXPR, void_type_node, tmp1_var, masked_fenv_var);
12626 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
12627 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
12628 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
12630 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
12632 /* We reload the value of tmp1_var to clear the exceptions:
12634 __builtin_load_fsr (&tmp1_var); */
12636 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
12638 /* We generate the equivalent of feupdateenv (&fenv_var):
12640 unsigned int tmp2_var;
12641 __builtin_store_fsr (&tmp2_var);
12643 __builtin_load_fsr (&fenv_var);
12645 if (SPARC_LOW_FE_EXCEPT_VALUES)
12646 tmp2_var >>= 5;
12647 __atomic_feraiseexcept ((int) tmp2_var); */
12649 tree tmp2_var = create_tmp_var (unsigned_type_node);
12650 mark_addressable (tmp2_var);
12651 tree tmp3_addr = build_fold_addr_expr (tmp2_var);
12652 tree update_stfsr = build_call_expr (stfsr, 1, tmp3_addr);
12654 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
12656 tree atomic_feraiseexcept
12657 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12658 tree update_call
12659 = build_call_expr (atomic_feraiseexcept, 1,
12660 fold_convert (integer_type_node, tmp2_var));
12662 if (SPARC_LOW_FE_EXCEPT_VALUES)
12664 tree shifted_tmp2_var
12665 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
12666 build_int_cst (unsigned_type_node, 5));
12667 tree update_shift
12668 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
12669 update_call = compound_expr (update_shift, update_call);
12672 *update
12673 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
12676 #include "gt-sparc.h"