2017-09-18 Jeff Law <law@redhat.com>
[official-gcc.git] / gcc / config / sparc / sparc.c
blob906bd75b560ef48e705ad1bf7587ce492de5ca78
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2017 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "emit-rtl.h"
40 #include "recog.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "debug.h"
52 #include "common/common-target.h"
53 #include "gimplify.h"
54 #include "langhooks.h"
55 #include "reload.h"
56 #include "params.h"
57 #include "tree-pass.h"
58 #include "context.h"
59 #include "builtins.h"
61 /* This file should be included last. */
62 #include "target-def.h"
64 /* Processor costs */
66 struct processor_costs {
67 /* Integer load */
68 const int int_load;
70 /* Integer signed load */
71 const int int_sload;
73 /* Integer zeroed load */
74 const int int_zload;
76 /* Float load */
77 const int float_load;
79 /* fmov, fneg, fabs */
80 const int float_move;
82 /* fadd, fsub */
83 const int float_plusminus;
85 /* fcmp */
86 const int float_cmp;
88 /* fmov, fmovr */
89 const int float_cmove;
91 /* fmul */
92 const int float_mul;
94 /* fdivs */
95 const int float_div_sf;
97 /* fdivd */
98 const int float_div_df;
100 /* fsqrts */
101 const int float_sqrt_sf;
103 /* fsqrtd */
104 const int float_sqrt_df;
106 /* umul/smul */
107 const int int_mul;
109 /* mulX */
110 const int int_mulX;
112 /* integer multiply cost for each bit set past the most
113 significant 3, so the formula for multiply cost becomes:
115 if (rs1 < 0)
116 highest_bit = highest_clear_bit(rs1);
117 else
118 highest_bit = highest_set_bit(rs1);
119 if (highest_bit < 3)
120 highest_bit = 3;
121 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
123 A value of zero indicates that the multiply costs is fixed,
124 and not variable. */
125 const int int_mul_bit_factor;
127 /* udiv/sdiv */
128 const int int_div;
130 /* divX */
131 const int int_divX;
133 /* movcc, movr */
134 const int int_cmove;
136 /* penalty for shifts, due to scheduling rules etc. */
137 const int shift_penalty;
140 static const
141 struct processor_costs cypress_costs = {
142 COSTS_N_INSNS (2), /* int load */
143 COSTS_N_INSNS (2), /* int signed load */
144 COSTS_N_INSNS (2), /* int zeroed load */
145 COSTS_N_INSNS (2), /* float load */
146 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
147 COSTS_N_INSNS (5), /* fadd, fsub */
148 COSTS_N_INSNS (1), /* fcmp */
149 COSTS_N_INSNS (1), /* fmov, fmovr */
150 COSTS_N_INSNS (7), /* fmul */
151 COSTS_N_INSNS (37), /* fdivs */
152 COSTS_N_INSNS (37), /* fdivd */
153 COSTS_N_INSNS (63), /* fsqrts */
154 COSTS_N_INSNS (63), /* fsqrtd */
155 COSTS_N_INSNS (1), /* imul */
156 COSTS_N_INSNS (1), /* imulX */
157 0, /* imul bit factor */
158 COSTS_N_INSNS (1), /* idiv */
159 COSTS_N_INSNS (1), /* idivX */
160 COSTS_N_INSNS (1), /* movcc/movr */
161 0, /* shift penalty */
164 static const
165 struct processor_costs supersparc_costs = {
166 COSTS_N_INSNS (1), /* int load */
167 COSTS_N_INSNS (1), /* int signed load */
168 COSTS_N_INSNS (1), /* int zeroed load */
169 COSTS_N_INSNS (0), /* float load */
170 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
171 COSTS_N_INSNS (3), /* fadd, fsub */
172 COSTS_N_INSNS (3), /* fcmp */
173 COSTS_N_INSNS (1), /* fmov, fmovr */
174 COSTS_N_INSNS (3), /* fmul */
175 COSTS_N_INSNS (6), /* fdivs */
176 COSTS_N_INSNS (9), /* fdivd */
177 COSTS_N_INSNS (12), /* fsqrts */
178 COSTS_N_INSNS (12), /* fsqrtd */
179 COSTS_N_INSNS (4), /* imul */
180 COSTS_N_INSNS (4), /* imulX */
181 0, /* imul bit factor */
182 COSTS_N_INSNS (4), /* idiv */
183 COSTS_N_INSNS (4), /* idivX */
184 COSTS_N_INSNS (1), /* movcc/movr */
185 1, /* shift penalty */
188 static const
189 struct processor_costs hypersparc_costs = {
190 COSTS_N_INSNS (1), /* int load */
191 COSTS_N_INSNS (1), /* int signed load */
192 COSTS_N_INSNS (1), /* int zeroed load */
193 COSTS_N_INSNS (1), /* float load */
194 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
195 COSTS_N_INSNS (1), /* fadd, fsub */
196 COSTS_N_INSNS (1), /* fcmp */
197 COSTS_N_INSNS (1), /* fmov, fmovr */
198 COSTS_N_INSNS (1), /* fmul */
199 COSTS_N_INSNS (8), /* fdivs */
200 COSTS_N_INSNS (12), /* fdivd */
201 COSTS_N_INSNS (17), /* fsqrts */
202 COSTS_N_INSNS (17), /* fsqrtd */
203 COSTS_N_INSNS (17), /* imul */
204 COSTS_N_INSNS (17), /* imulX */
205 0, /* imul bit factor */
206 COSTS_N_INSNS (17), /* idiv */
207 COSTS_N_INSNS (17), /* idivX */
208 COSTS_N_INSNS (1), /* movcc/movr */
209 0, /* shift penalty */
212 static const
213 struct processor_costs leon_costs = {
214 COSTS_N_INSNS (1), /* int load */
215 COSTS_N_INSNS (1), /* int signed load */
216 COSTS_N_INSNS (1), /* int zeroed load */
217 COSTS_N_INSNS (1), /* float load */
218 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
219 COSTS_N_INSNS (1), /* fadd, fsub */
220 COSTS_N_INSNS (1), /* fcmp */
221 COSTS_N_INSNS (1), /* fmov, fmovr */
222 COSTS_N_INSNS (1), /* fmul */
223 COSTS_N_INSNS (15), /* fdivs */
224 COSTS_N_INSNS (15), /* fdivd */
225 COSTS_N_INSNS (23), /* fsqrts */
226 COSTS_N_INSNS (23), /* fsqrtd */
227 COSTS_N_INSNS (5), /* imul */
228 COSTS_N_INSNS (5), /* imulX */
229 0, /* imul bit factor */
230 COSTS_N_INSNS (5), /* idiv */
231 COSTS_N_INSNS (5), /* idivX */
232 COSTS_N_INSNS (1), /* movcc/movr */
233 0, /* shift penalty */
236 static const
237 struct processor_costs leon3_costs = {
238 COSTS_N_INSNS (1), /* int load */
239 COSTS_N_INSNS (1), /* int signed load */
240 COSTS_N_INSNS (1), /* int zeroed load */
241 COSTS_N_INSNS (1), /* float load */
242 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
243 COSTS_N_INSNS (1), /* fadd, fsub */
244 COSTS_N_INSNS (1), /* fcmp */
245 COSTS_N_INSNS (1), /* fmov, fmovr */
246 COSTS_N_INSNS (1), /* fmul */
247 COSTS_N_INSNS (14), /* fdivs */
248 COSTS_N_INSNS (15), /* fdivd */
249 COSTS_N_INSNS (22), /* fsqrts */
250 COSTS_N_INSNS (23), /* fsqrtd */
251 COSTS_N_INSNS (5), /* imul */
252 COSTS_N_INSNS (5), /* imulX */
253 0, /* imul bit factor */
254 COSTS_N_INSNS (35), /* idiv */
255 COSTS_N_INSNS (35), /* idivX */
256 COSTS_N_INSNS (1), /* movcc/movr */
257 0, /* shift penalty */
260 static const
261 struct processor_costs sparclet_costs = {
262 COSTS_N_INSNS (3), /* int load */
263 COSTS_N_INSNS (3), /* int signed load */
264 COSTS_N_INSNS (1), /* int zeroed load */
265 COSTS_N_INSNS (1), /* float load */
266 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
267 COSTS_N_INSNS (1), /* fadd, fsub */
268 COSTS_N_INSNS (1), /* fcmp */
269 COSTS_N_INSNS (1), /* fmov, fmovr */
270 COSTS_N_INSNS (1), /* fmul */
271 COSTS_N_INSNS (1), /* fdivs */
272 COSTS_N_INSNS (1), /* fdivd */
273 COSTS_N_INSNS (1), /* fsqrts */
274 COSTS_N_INSNS (1), /* fsqrtd */
275 COSTS_N_INSNS (5), /* imul */
276 COSTS_N_INSNS (5), /* imulX */
277 0, /* imul bit factor */
278 COSTS_N_INSNS (5), /* idiv */
279 COSTS_N_INSNS (5), /* idivX */
280 COSTS_N_INSNS (1), /* movcc/movr */
281 0, /* shift penalty */
284 static const
285 struct processor_costs ultrasparc_costs = {
286 COSTS_N_INSNS (2), /* int load */
287 COSTS_N_INSNS (3), /* int signed load */
288 COSTS_N_INSNS (2), /* int zeroed load */
289 COSTS_N_INSNS (2), /* float load */
290 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
291 COSTS_N_INSNS (4), /* fadd, fsub */
292 COSTS_N_INSNS (1), /* fcmp */
293 COSTS_N_INSNS (2), /* fmov, fmovr */
294 COSTS_N_INSNS (4), /* fmul */
295 COSTS_N_INSNS (13), /* fdivs */
296 COSTS_N_INSNS (23), /* fdivd */
297 COSTS_N_INSNS (13), /* fsqrts */
298 COSTS_N_INSNS (23), /* fsqrtd */
299 COSTS_N_INSNS (4), /* imul */
300 COSTS_N_INSNS (4), /* imulX */
301 2, /* imul bit factor */
302 COSTS_N_INSNS (37), /* idiv */
303 COSTS_N_INSNS (68), /* idivX */
304 COSTS_N_INSNS (2), /* movcc/movr */
305 2, /* shift penalty */
308 static const
309 struct processor_costs ultrasparc3_costs = {
310 COSTS_N_INSNS (2), /* int load */
311 COSTS_N_INSNS (3), /* int signed load */
312 COSTS_N_INSNS (3), /* int zeroed load */
313 COSTS_N_INSNS (2), /* float load */
314 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
315 COSTS_N_INSNS (4), /* fadd, fsub */
316 COSTS_N_INSNS (5), /* fcmp */
317 COSTS_N_INSNS (3), /* fmov, fmovr */
318 COSTS_N_INSNS (4), /* fmul */
319 COSTS_N_INSNS (17), /* fdivs */
320 COSTS_N_INSNS (20), /* fdivd */
321 COSTS_N_INSNS (20), /* fsqrts */
322 COSTS_N_INSNS (29), /* fsqrtd */
323 COSTS_N_INSNS (6), /* imul */
324 COSTS_N_INSNS (6), /* imulX */
325 0, /* imul bit factor */
326 COSTS_N_INSNS (40), /* idiv */
327 COSTS_N_INSNS (71), /* idivX */
328 COSTS_N_INSNS (2), /* movcc/movr */
329 0, /* shift penalty */
332 static const
333 struct processor_costs niagara_costs = {
334 COSTS_N_INSNS (3), /* int load */
335 COSTS_N_INSNS (3), /* int signed load */
336 COSTS_N_INSNS (3), /* int zeroed load */
337 COSTS_N_INSNS (9), /* float load */
338 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
339 COSTS_N_INSNS (8), /* fadd, fsub */
340 COSTS_N_INSNS (26), /* fcmp */
341 COSTS_N_INSNS (8), /* fmov, fmovr */
342 COSTS_N_INSNS (29), /* fmul */
343 COSTS_N_INSNS (54), /* fdivs */
344 COSTS_N_INSNS (83), /* fdivd */
345 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
346 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
347 COSTS_N_INSNS (11), /* imul */
348 COSTS_N_INSNS (11), /* imulX */
349 0, /* imul bit factor */
350 COSTS_N_INSNS (72), /* idiv */
351 COSTS_N_INSNS (72), /* idivX */
352 COSTS_N_INSNS (1), /* movcc/movr */
353 0, /* shift penalty */
356 static const
357 struct processor_costs niagara2_costs = {
358 COSTS_N_INSNS (3), /* int load */
359 COSTS_N_INSNS (3), /* int signed load */
360 COSTS_N_INSNS (3), /* int zeroed load */
361 COSTS_N_INSNS (3), /* float load */
362 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
363 COSTS_N_INSNS (6), /* fadd, fsub */
364 COSTS_N_INSNS (6), /* fcmp */
365 COSTS_N_INSNS (6), /* fmov, fmovr */
366 COSTS_N_INSNS (6), /* fmul */
367 COSTS_N_INSNS (19), /* fdivs */
368 COSTS_N_INSNS (33), /* fdivd */
369 COSTS_N_INSNS (19), /* fsqrts */
370 COSTS_N_INSNS (33), /* fsqrtd */
371 COSTS_N_INSNS (5), /* imul */
372 COSTS_N_INSNS (5), /* imulX */
373 0, /* imul bit factor */
374 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
375 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
376 COSTS_N_INSNS (1), /* movcc/movr */
377 0, /* shift penalty */
380 static const
381 struct processor_costs niagara3_costs = {
382 COSTS_N_INSNS (3), /* int load */
383 COSTS_N_INSNS (3), /* int signed load */
384 COSTS_N_INSNS (3), /* int zeroed load */
385 COSTS_N_INSNS (3), /* float load */
386 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
387 COSTS_N_INSNS (9), /* fadd, fsub */
388 COSTS_N_INSNS (9), /* fcmp */
389 COSTS_N_INSNS (9), /* fmov, fmovr */
390 COSTS_N_INSNS (9), /* fmul */
391 COSTS_N_INSNS (23), /* fdivs */
392 COSTS_N_INSNS (37), /* fdivd */
393 COSTS_N_INSNS (23), /* fsqrts */
394 COSTS_N_INSNS (37), /* fsqrtd */
395 COSTS_N_INSNS (9), /* imul */
396 COSTS_N_INSNS (9), /* imulX */
397 0, /* imul bit factor */
398 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
399 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
400 COSTS_N_INSNS (1), /* movcc/movr */
401 0, /* shift penalty */
404 static const
405 struct processor_costs niagara4_costs = {
406 COSTS_N_INSNS (5), /* int load */
407 COSTS_N_INSNS (5), /* int signed load */
408 COSTS_N_INSNS (5), /* int zeroed load */
409 COSTS_N_INSNS (5), /* float load */
410 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
411 COSTS_N_INSNS (11), /* fadd, fsub */
412 COSTS_N_INSNS (11), /* fcmp */
413 COSTS_N_INSNS (11), /* fmov, fmovr */
414 COSTS_N_INSNS (11), /* fmul */
415 COSTS_N_INSNS (24), /* fdivs */
416 COSTS_N_INSNS (37), /* fdivd */
417 COSTS_N_INSNS (24), /* fsqrts */
418 COSTS_N_INSNS (37), /* fsqrtd */
419 COSTS_N_INSNS (12), /* imul */
420 COSTS_N_INSNS (12), /* imulX */
421 0, /* imul bit factor */
422 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
423 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
424 COSTS_N_INSNS (1), /* movcc/movr */
425 0, /* shift penalty */
428 static const
429 struct processor_costs niagara7_costs = {
430 COSTS_N_INSNS (5), /* int load */
431 COSTS_N_INSNS (5), /* int signed load */
432 COSTS_N_INSNS (5), /* int zeroed load */
433 COSTS_N_INSNS (5), /* float load */
434 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
435 COSTS_N_INSNS (11), /* fadd, fsub */
436 COSTS_N_INSNS (11), /* fcmp */
437 COSTS_N_INSNS (11), /* fmov, fmovr */
438 COSTS_N_INSNS (11), /* fmul */
439 COSTS_N_INSNS (24), /* fdivs */
440 COSTS_N_INSNS (37), /* fdivd */
441 COSTS_N_INSNS (24), /* fsqrts */
442 COSTS_N_INSNS (37), /* fsqrtd */
443 COSTS_N_INSNS (12), /* imul */
444 COSTS_N_INSNS (12), /* imulX */
445 0, /* imul bit factor */
446 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
447 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
448 COSTS_N_INSNS (1), /* movcc/movr */
449 0, /* shift penalty */
452 static const
453 struct processor_costs m8_costs = {
454 COSTS_N_INSNS (3), /* int load */
455 COSTS_N_INSNS (3), /* int signed load */
456 COSTS_N_INSNS (3), /* int zeroed load */
457 COSTS_N_INSNS (3), /* float load */
458 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
459 COSTS_N_INSNS (9), /* fadd, fsub */
460 COSTS_N_INSNS (9), /* fcmp */
461 COSTS_N_INSNS (9), /* fmov, fmovr */
462 COSTS_N_INSNS (9), /* fmul */
463 COSTS_N_INSNS (26), /* fdivs */
464 COSTS_N_INSNS (30), /* fdivd */
465 COSTS_N_INSNS (33), /* fsqrts */
466 COSTS_N_INSNS (41), /* fsqrtd */
467 COSTS_N_INSNS (12), /* imul */
468 COSTS_N_INSNS (10), /* imulX */
469 0, /* imul bit factor */
470 COSTS_N_INSNS (57), /* udiv/sdiv */
471 COSTS_N_INSNS (30), /* udivx/sdivx */
472 COSTS_N_INSNS (1), /* movcc/movr */
473 0, /* shift penalty */
476 static const struct processor_costs *sparc_costs = &cypress_costs;
478 #ifdef HAVE_AS_RELAX_OPTION
479 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
480 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
481 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
482 somebody does not branch between the sethi and jmp. */
483 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
484 #else
485 #define LEAF_SIBCALL_SLOT_RESERVED_P \
486 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
487 #endif
489 /* Vector to say how input registers are mapped to output registers.
490 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
491 eliminate it. You must use -fomit-frame-pointer to get that. */
492 char leaf_reg_remap[] =
493 { 0, 1, 2, 3, 4, 5, 6, 7,
494 -1, -1, -1, -1, -1, -1, 14, -1,
495 -1, -1, -1, -1, -1, -1, -1, -1,
496 8, 9, 10, 11, 12, 13, -1, 15,
498 32, 33, 34, 35, 36, 37, 38, 39,
499 40, 41, 42, 43, 44, 45, 46, 47,
500 48, 49, 50, 51, 52, 53, 54, 55,
501 56, 57, 58, 59, 60, 61, 62, 63,
502 64, 65, 66, 67, 68, 69, 70, 71,
503 72, 73, 74, 75, 76, 77, 78, 79,
504 80, 81, 82, 83, 84, 85, 86, 87,
505 88, 89, 90, 91, 92, 93, 94, 95,
506 96, 97, 98, 99, 100, 101, 102};
508 /* Vector, indexed by hard register number, which contains 1
509 for a register that is allowable in a candidate for leaf
510 function treatment. */
511 char sparc_leaf_regs[] =
512 { 1, 1, 1, 1, 1, 1, 1, 1,
513 0, 0, 0, 0, 0, 0, 1, 0,
514 0, 0, 0, 0, 0, 0, 0, 0,
515 1, 1, 1, 1, 1, 1, 0, 1,
516 1, 1, 1, 1, 1, 1, 1, 1,
517 1, 1, 1, 1, 1, 1, 1, 1,
518 1, 1, 1, 1, 1, 1, 1, 1,
519 1, 1, 1, 1, 1, 1, 1, 1,
520 1, 1, 1, 1, 1, 1, 1, 1,
521 1, 1, 1, 1, 1, 1, 1, 1,
522 1, 1, 1, 1, 1, 1, 1, 1,
523 1, 1, 1, 1, 1, 1, 1, 1,
524 1, 1, 1, 1, 1, 1, 1};
526 struct GTY(()) machine_function
528 /* Size of the frame of the function. */
529 HOST_WIDE_INT frame_size;
531 /* Size of the frame of the function minus the register window save area
532 and the outgoing argument area. */
533 HOST_WIDE_INT apparent_frame_size;
535 /* Register we pretend the frame pointer is allocated to. Normally, this
536 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
537 record "offset" separately as it may be too big for (reg + disp). */
538 rtx frame_base_reg;
539 HOST_WIDE_INT frame_base_offset;
541 /* Number of global or FP registers to be saved (as 4-byte quantities). */
542 int n_global_fp_regs;
544 /* True if the current function is leaf and uses only leaf regs,
545 so that the SPARC leaf function optimization can be applied.
546 Private version of crtl->uses_only_leaf_regs, see
547 sparc_expand_prologue for the rationale. */
548 int leaf_function_p;
550 /* True if the prologue saves local or in registers. */
551 bool save_local_in_regs_p;
553 /* True if the data calculated by sparc_expand_prologue are valid. */
554 bool prologue_data_valid_p;
557 #define sparc_frame_size cfun->machine->frame_size
558 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
559 #define sparc_frame_base_reg cfun->machine->frame_base_reg
560 #define sparc_frame_base_offset cfun->machine->frame_base_offset
561 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
562 #define sparc_leaf_function_p cfun->machine->leaf_function_p
563 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
564 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
566 /* 1 if the next opcode is to be specially indented. */
567 int sparc_indent_opcode = 0;
569 static void sparc_option_override (void);
570 static void sparc_init_modes (void);
571 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
572 const_tree, bool, bool, int *, int *);
574 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
575 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
577 static void sparc_emit_set_const32 (rtx, rtx);
578 static void sparc_emit_set_const64 (rtx, rtx);
579 static void sparc_output_addr_vec (rtx);
580 static void sparc_output_addr_diff_vec (rtx);
581 static void sparc_output_deferred_case_vectors (void);
582 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
583 static bool sparc_legitimate_constant_p (machine_mode, rtx);
584 static rtx sparc_builtin_saveregs (void);
585 static int epilogue_renumber (rtx *, int);
586 static bool sparc_assemble_integer (rtx, unsigned int, int);
587 static int set_extends (rtx_insn *);
588 static void sparc_asm_function_prologue (FILE *);
589 static void sparc_asm_function_epilogue (FILE *);
590 #ifdef TARGET_SOLARIS
591 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
592 tree) ATTRIBUTE_UNUSED;
593 #endif
594 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
595 static int sparc_issue_rate (void);
596 static void sparc_sched_init (FILE *, int, int);
597 static int sparc_use_sched_lookahead (void);
599 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
600 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
601 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
602 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
603 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
605 static bool sparc_function_ok_for_sibcall (tree, tree);
606 static void sparc_init_libfuncs (void);
607 static void sparc_init_builtins (void);
608 static void sparc_fpu_init_builtins (void);
609 static void sparc_vis_init_builtins (void);
610 static tree sparc_builtin_decl (unsigned, bool);
611 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
612 static tree sparc_fold_builtin (tree, int, tree *, bool);
613 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
614 HOST_WIDE_INT, tree);
615 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
616 HOST_WIDE_INT, const_tree);
617 static struct machine_function * sparc_init_machine_status (void);
618 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
619 static rtx sparc_tls_get_addr (void);
620 static rtx sparc_tls_got (void);
621 static int sparc_register_move_cost (machine_mode,
622 reg_class_t, reg_class_t);
623 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
624 static rtx sparc_function_value (const_tree, const_tree, bool);
625 static rtx sparc_libcall_value (machine_mode, const_rtx);
626 static bool sparc_function_value_regno_p (const unsigned int);
627 static rtx sparc_struct_value_rtx (tree, int);
628 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
629 int *, const_tree, int);
630 static bool sparc_return_in_memory (const_tree, const_tree);
631 static bool sparc_strict_argument_naming (cumulative_args_t);
632 static void sparc_va_start (tree, rtx);
633 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
634 static bool sparc_vector_mode_supported_p (machine_mode);
635 static bool sparc_tls_referenced_p (rtx);
636 static rtx sparc_legitimize_tls_address (rtx);
637 static rtx sparc_legitimize_pic_address (rtx, rtx);
638 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
639 static rtx sparc_delegitimize_address (rtx);
640 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
641 static bool sparc_pass_by_reference (cumulative_args_t,
642 machine_mode, const_tree, bool);
643 static void sparc_function_arg_advance (cumulative_args_t,
644 machine_mode, const_tree, bool);
645 static rtx sparc_function_arg_1 (cumulative_args_t,
646 machine_mode, const_tree, bool, bool);
647 static rtx sparc_function_arg (cumulative_args_t,
648 machine_mode, const_tree, bool);
649 static rtx sparc_function_incoming_arg (cumulative_args_t,
650 machine_mode, const_tree, bool);
651 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
652 static unsigned int sparc_function_arg_boundary (machine_mode,
653 const_tree);
654 static int sparc_arg_partial_bytes (cumulative_args_t,
655 machine_mode, tree, bool);
656 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
657 static void sparc_file_end (void);
658 static bool sparc_frame_pointer_required (void);
659 static bool sparc_can_eliminate (const int, const int);
660 static rtx sparc_builtin_setjmp_frame_value (void);
661 static void sparc_conditional_register_usage (void);
662 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
663 static const char *sparc_mangle_type (const_tree);
664 #endif
665 static void sparc_trampoline_init (rtx, tree, rtx);
666 static machine_mode sparc_preferred_simd_mode (scalar_mode);
667 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
668 static bool sparc_lra_p (void);
669 static bool sparc_print_operand_punct_valid_p (unsigned char);
670 static void sparc_print_operand (FILE *, rtx, int);
671 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
672 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
673 machine_mode,
674 secondary_reload_info *);
675 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
676 reg_class_t);
677 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
678 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
679 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
680 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
681 static unsigned int sparc_min_arithmetic_precision (void);
682 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
683 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
684 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
685 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
686 reg_class_t);
688 #ifdef SUBTARGET_ATTRIBUTE_TABLE
689 /* Table of valid machine attributes. */
690 static const struct attribute_spec sparc_attribute_table[] =
692 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
693 do_diagnostic } */
694 SUBTARGET_ATTRIBUTE_TABLE,
695 { NULL, 0, 0, false, false, false, NULL, false }
697 #endif
699 /* Option handling. */
701 /* Parsed value. */
702 enum cmodel sparc_cmodel;
704 char sparc_hard_reg_printed[8];
706 /* Initialize the GCC target structure. */
708 /* The default is to use .half rather than .short for aligned HI objects. */
709 #undef TARGET_ASM_ALIGNED_HI_OP
710 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
712 #undef TARGET_ASM_UNALIGNED_HI_OP
713 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
714 #undef TARGET_ASM_UNALIGNED_SI_OP
715 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
716 #undef TARGET_ASM_UNALIGNED_DI_OP
717 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
719 /* The target hook has to handle DI-mode values. */
720 #undef TARGET_ASM_INTEGER
721 #define TARGET_ASM_INTEGER sparc_assemble_integer
723 #undef TARGET_ASM_FUNCTION_PROLOGUE
724 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
725 #undef TARGET_ASM_FUNCTION_EPILOGUE
726 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
728 #undef TARGET_SCHED_ADJUST_COST
729 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
730 #undef TARGET_SCHED_ISSUE_RATE
731 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
732 #undef TARGET_SCHED_INIT
733 #define TARGET_SCHED_INIT sparc_sched_init
734 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
735 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
737 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
738 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
740 #undef TARGET_INIT_LIBFUNCS
741 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
743 #undef TARGET_LEGITIMIZE_ADDRESS
744 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
745 #undef TARGET_DELEGITIMIZE_ADDRESS
746 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
747 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
748 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
750 #undef TARGET_INIT_BUILTINS
751 #define TARGET_INIT_BUILTINS sparc_init_builtins
752 #undef TARGET_BUILTIN_DECL
753 #define TARGET_BUILTIN_DECL sparc_builtin_decl
754 #undef TARGET_EXPAND_BUILTIN
755 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
756 #undef TARGET_FOLD_BUILTIN
757 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
759 #if TARGET_TLS
760 #undef TARGET_HAVE_TLS
761 #define TARGET_HAVE_TLS true
762 #endif
764 #undef TARGET_CANNOT_FORCE_CONST_MEM
765 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
767 #undef TARGET_ASM_OUTPUT_MI_THUNK
768 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
769 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
770 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
772 #undef TARGET_RTX_COSTS
773 #define TARGET_RTX_COSTS sparc_rtx_costs
774 #undef TARGET_ADDRESS_COST
775 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
776 #undef TARGET_REGISTER_MOVE_COST
777 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
779 #undef TARGET_PROMOTE_FUNCTION_MODE
780 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
782 #undef TARGET_FUNCTION_VALUE
783 #define TARGET_FUNCTION_VALUE sparc_function_value
784 #undef TARGET_LIBCALL_VALUE
785 #define TARGET_LIBCALL_VALUE sparc_libcall_value
786 #undef TARGET_FUNCTION_VALUE_REGNO_P
787 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
789 #undef TARGET_STRUCT_VALUE_RTX
790 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
791 #undef TARGET_RETURN_IN_MEMORY
792 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
793 #undef TARGET_MUST_PASS_IN_STACK
794 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
795 #undef TARGET_PASS_BY_REFERENCE
796 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
797 #undef TARGET_ARG_PARTIAL_BYTES
798 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
799 #undef TARGET_FUNCTION_ARG_ADVANCE
800 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
801 #undef TARGET_FUNCTION_ARG
802 #define TARGET_FUNCTION_ARG sparc_function_arg
803 #undef TARGET_FUNCTION_INCOMING_ARG
804 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
805 #undef TARGET_FUNCTION_ARG_PADDING
806 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
807 #undef TARGET_FUNCTION_ARG_BOUNDARY
808 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
810 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
811 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
812 #undef TARGET_STRICT_ARGUMENT_NAMING
813 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
815 #undef TARGET_EXPAND_BUILTIN_VA_START
816 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
817 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
818 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
820 #undef TARGET_VECTOR_MODE_SUPPORTED_P
821 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
823 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
824 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
826 #ifdef SUBTARGET_INSERT_ATTRIBUTES
827 #undef TARGET_INSERT_ATTRIBUTES
828 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
829 #endif
831 #ifdef SUBTARGET_ATTRIBUTE_TABLE
832 #undef TARGET_ATTRIBUTE_TABLE
833 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
834 #endif
836 #undef TARGET_OPTION_OVERRIDE
837 #define TARGET_OPTION_OVERRIDE sparc_option_override
839 #ifdef TARGET_THREAD_SSP_OFFSET
840 #undef TARGET_STACK_PROTECT_GUARD
841 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
842 #endif
844 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
845 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
846 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
847 #endif
849 #undef TARGET_ASM_FILE_END
850 #define TARGET_ASM_FILE_END sparc_file_end
852 #undef TARGET_FRAME_POINTER_REQUIRED
853 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
855 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
856 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
858 #undef TARGET_CAN_ELIMINATE
859 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
861 #undef TARGET_PREFERRED_RELOAD_CLASS
862 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
864 #undef TARGET_SECONDARY_RELOAD
865 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
866 #undef TARGET_SECONDARY_MEMORY_NEEDED
867 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
868 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
869 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
871 #undef TARGET_CONDITIONAL_REGISTER_USAGE
872 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
874 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
875 #undef TARGET_MANGLE_TYPE
876 #define TARGET_MANGLE_TYPE sparc_mangle_type
877 #endif
879 #undef TARGET_LRA_P
880 #define TARGET_LRA_P sparc_lra_p
882 #undef TARGET_LEGITIMATE_ADDRESS_P
883 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
885 #undef TARGET_LEGITIMATE_CONSTANT_P
886 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
888 #undef TARGET_TRAMPOLINE_INIT
889 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
891 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
892 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
893 #undef TARGET_PRINT_OPERAND
894 #define TARGET_PRINT_OPERAND sparc_print_operand
895 #undef TARGET_PRINT_OPERAND_ADDRESS
896 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
898 /* The value stored by LDSTUB. */
899 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
900 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
902 #undef TARGET_CSTORE_MODE
903 #define TARGET_CSTORE_MODE sparc_cstore_mode
905 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
906 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
908 #undef TARGET_FIXED_CONDITION_CODE_REGS
909 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
911 #undef TARGET_MIN_ARITHMETIC_PRECISION
912 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
914 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
915 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
917 #undef TARGET_HARD_REGNO_NREGS
918 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
919 #undef TARGET_HARD_REGNO_MODE_OK
920 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
922 #undef TARGET_MODES_TIEABLE_P
923 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
925 #undef TARGET_CAN_CHANGE_MODE_CLASS
926 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
928 struct gcc_target targetm = TARGET_INITIALIZER;
930 /* Return the memory reference contained in X if any, zero otherwise. */
932 static rtx
933 mem_ref (rtx x)
935 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
936 x = XEXP (x, 0);
938 if (MEM_P (x))
939 return x;
941 return NULL_RTX;
944 /* We use a machine specific pass to enable workarounds for errata.
946 We need to have the (essentially) final form of the insn stream in order
947 to properly detect the various hazards. Therefore, this machine specific
948 pass runs as late as possible. */
950 /* True if INSN is a md pattern or asm statement. */
951 #define USEFUL_INSN_P(INSN) \
952 (NONDEBUG_INSN_P (INSN) \
953 && GET_CODE (PATTERN (INSN)) != USE \
954 && GET_CODE (PATTERN (INSN)) != CLOBBER)
956 static unsigned int
957 sparc_do_work_around_errata (void)
959 rtx_insn *insn, *next;
961 /* Force all instructions to be split into their final form. */
962 split_all_insns_noflow ();
964 /* Now look for specific patterns in the insn stream. */
965 for (insn = get_insns (); insn; insn = next)
967 bool insert_nop = false;
968 rtx set;
970 /* Look into the instruction in a delay slot. */
971 if (NONJUMP_INSN_P (insn))
972 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
973 insn = seq->insn (1);
975 /* Look for either of these two sequences:
977 Sequence A:
978 1. store of word size or less (e.g. st / stb / sth / stf)
979 2. any single instruction that is not a load or store
980 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
982 Sequence B:
983 1. store of double word size (e.g. std / stdf)
984 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
985 if (sparc_fix_b2bst
986 && NONJUMP_INSN_P (insn)
987 && (set = single_set (insn)) != NULL_RTX
988 && MEM_P (SET_DEST (set)))
990 /* Sequence B begins with a double-word store. */
991 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
992 rtx_insn *after;
993 int i;
995 next = next_active_insn (insn);
996 if (!next)
997 break;
999 for (after = next, i = 0; i < 2; i++)
1001 /* Skip empty assembly statements. */
1002 if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE)
1003 || (USEFUL_INSN_P (after)
1004 && (asm_noperands (PATTERN (after))>=0)
1005 && !strcmp (decode_asm_operands (PATTERN (after),
1006 NULL, NULL, NULL,
1007 NULL, NULL), "")))
1008 after = next_active_insn (after);
1009 if (!after)
1010 break;
1012 /* If the insn is a branch, then it cannot be problematic. */
1013 if (!NONJUMP_INSN_P (after)
1014 || GET_CODE (PATTERN (after)) == SEQUENCE)
1015 break;
1017 /* Sequence B is only two instructions long. */
1018 if (seq_b)
1020 /* Add NOP if followed by a store. */
1021 if ((set = single_set (after)) != NULL_RTX
1022 && MEM_P (SET_DEST (set)))
1023 insert_nop = true;
1025 /* Otherwise it is ok. */
1026 break;
1029 /* If the second instruction is a load or a store,
1030 then the sequence cannot be problematic. */
1031 if (i == 0)
1033 if (((set = single_set (after)) != NULL_RTX)
1034 && (MEM_P (SET_DEST (set)) || MEM_P (SET_SRC (set))))
1035 break;
1037 after = next_active_insn (after);
1038 if (!after)
1039 break;
1042 /* Add NOP if third instruction is a store. */
1043 if (i == 1
1044 && ((set = single_set (after)) != NULL_RTX)
1045 && MEM_P (SET_DEST (set)))
1046 insert_nop = true;
1049 else
1050 /* Look for a single-word load into an odd-numbered FP register. */
1051 if (sparc_fix_at697f
1052 && NONJUMP_INSN_P (insn)
1053 && (set = single_set (insn)) != NULL_RTX
1054 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1055 && MEM_P (SET_SRC (set))
1056 && REG_P (SET_DEST (set))
1057 && REGNO (SET_DEST (set)) > 31
1058 && REGNO (SET_DEST (set)) % 2 != 0)
1060 /* The wrong dependency is on the enclosing double register. */
1061 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1062 unsigned int src1, src2, dest;
1063 int code;
1065 next = next_active_insn (insn);
1066 if (!next)
1067 break;
1068 /* If the insn is a branch, then it cannot be problematic. */
1069 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1070 continue;
1072 extract_insn (next);
1073 code = INSN_CODE (next);
1075 switch (code)
1077 case CODE_FOR_adddf3:
1078 case CODE_FOR_subdf3:
1079 case CODE_FOR_muldf3:
1080 case CODE_FOR_divdf3:
1081 dest = REGNO (recog_data.operand[0]);
1082 src1 = REGNO (recog_data.operand[1]);
1083 src2 = REGNO (recog_data.operand[2]);
1084 if (src1 != src2)
1086 /* Case [1-4]:
1087 ld [address], %fx+1
1088 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1089 if ((src1 == x || src2 == x)
1090 && (dest == src1 || dest == src2))
1091 insert_nop = true;
1093 else
1095 /* Case 5:
1096 ld [address], %fx+1
1097 FPOPd %fx, %fx, %fx */
1098 if (src1 == x
1099 && dest == src1
1100 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1101 insert_nop = true;
1103 break;
1105 case CODE_FOR_sqrtdf2:
1106 dest = REGNO (recog_data.operand[0]);
1107 src1 = REGNO (recog_data.operand[1]);
1108 /* Case 6:
1109 ld [address], %fx+1
1110 fsqrtd %fx, %fx */
1111 if (src1 == x && dest == src1)
1112 insert_nop = true;
1113 break;
1115 default:
1116 break;
1120 /* Look for a single-word load into an integer register. */
1121 else if (sparc_fix_ut699
1122 && NONJUMP_INSN_P (insn)
1123 && (set = single_set (insn)) != NULL_RTX
1124 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1125 && mem_ref (SET_SRC (set)) != NULL_RTX
1126 && REG_P (SET_DEST (set))
1127 && REGNO (SET_DEST (set)) < 32)
1129 /* There is no problem if the second memory access has a data
1130 dependency on the first single-cycle load. */
1131 rtx x = SET_DEST (set);
1133 next = next_active_insn (insn);
1134 if (!next)
1135 break;
1136 /* If the insn is a branch, then it cannot be problematic. */
1137 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1138 continue;
1140 /* Look for a second memory access to/from an integer register. */
1141 if ((set = single_set (next)) != NULL_RTX)
1143 rtx src = SET_SRC (set);
1144 rtx dest = SET_DEST (set);
1145 rtx mem;
1147 /* LDD is affected. */
1148 if ((mem = mem_ref (src)) != NULL_RTX
1149 && REG_P (dest)
1150 && REGNO (dest) < 32
1151 && !reg_mentioned_p (x, XEXP (mem, 0)))
1152 insert_nop = true;
1154 /* STD is *not* affected. */
1155 else if (MEM_P (dest)
1156 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1157 && (src == CONST0_RTX (GET_MODE (dest))
1158 || (REG_P (src)
1159 && REGNO (src) < 32
1160 && REGNO (src) != REGNO (x)))
1161 && !reg_mentioned_p (x, XEXP (dest, 0)))
1162 insert_nop = true;
1166 /* Look for a single-word load/operation into an FP register. */
1167 else if (sparc_fix_ut699
1168 && NONJUMP_INSN_P (insn)
1169 && (set = single_set (insn)) != NULL_RTX
1170 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1171 && REG_P (SET_DEST (set))
1172 && REGNO (SET_DEST (set)) > 31)
1174 /* Number of instructions in the problematic window. */
1175 const int n_insns = 4;
1176 /* The problematic combination is with the sibling FP register. */
1177 const unsigned int x = REGNO (SET_DEST (set));
1178 const unsigned int y = x ^ 1;
1179 rtx_insn *after;
1180 int i;
1182 next = next_active_insn (insn);
1183 if (!next)
1184 break;
1185 /* If the insn is a branch, then it cannot be problematic. */
1186 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1187 continue;
1189 /* Look for a second load/operation into the sibling FP register. */
1190 if (!((set = single_set (next)) != NULL_RTX
1191 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1192 && REG_P (SET_DEST (set))
1193 && REGNO (SET_DEST (set)) == y))
1194 continue;
1196 /* Look for a (possible) store from the FP register in the next N
1197 instructions, but bail out if it is again modified or if there
1198 is a store from the sibling FP register before this store. */
1199 for (after = next, i = 0; i < n_insns; i++)
1201 bool branch_p;
1203 after = next_active_insn (after);
1204 if (!after)
1205 break;
1207 /* This is a branch with an empty delay slot. */
1208 if (!NONJUMP_INSN_P (after))
1210 if (++i == n_insns)
1211 break;
1212 branch_p = true;
1213 after = NULL;
1215 /* This is a branch with a filled delay slot. */
1216 else if (rtx_sequence *seq =
1217 dyn_cast <rtx_sequence *> (PATTERN (after)))
1219 if (++i == n_insns)
1220 break;
1221 branch_p = true;
1222 after = seq->insn (1);
1224 /* This is a regular instruction. */
1225 else
1226 branch_p = false;
1228 if (after && (set = single_set (after)) != NULL_RTX)
1230 const rtx src = SET_SRC (set);
1231 const rtx dest = SET_DEST (set);
1232 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1234 /* If the FP register is again modified before the store,
1235 then the store isn't affected. */
1236 if (REG_P (dest)
1237 && (REGNO (dest) == x
1238 || (REGNO (dest) == y && size == 8)))
1239 break;
1241 if (MEM_P (dest) && REG_P (src))
1243 /* If there is a store from the sibling FP register
1244 before the store, then the store is not affected. */
1245 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1246 break;
1248 /* Otherwise, the store is affected. */
1249 if (REGNO (src) == x && size == 4)
1251 insert_nop = true;
1252 break;
1257 /* If we have a branch in the first M instructions, then we
1258 cannot see the (M+2)th instruction so we play safe. */
1259 if (branch_p && i <= (n_insns - 2))
1261 insert_nop = true;
1262 break;
1267 else
1268 next = NEXT_INSN (insn);
1270 if (insert_nop)
1271 emit_insn_before (gen_nop (), next);
1274 return 0;
1277 namespace {
1279 const pass_data pass_data_work_around_errata =
1281 RTL_PASS, /* type */
1282 "errata", /* name */
1283 OPTGROUP_NONE, /* optinfo_flags */
1284 TV_MACH_DEP, /* tv_id */
1285 0, /* properties_required */
1286 0, /* properties_provided */
1287 0, /* properties_destroyed */
1288 0, /* todo_flags_start */
1289 0, /* todo_flags_finish */
1292 class pass_work_around_errata : public rtl_opt_pass
1294 public:
1295 pass_work_around_errata(gcc::context *ctxt)
1296 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1299 /* opt_pass methods: */
1300 virtual bool gate (function *)
1302 return sparc_fix_at697f || sparc_fix_ut699 || sparc_fix_b2bst;
1305 virtual unsigned int execute (function *)
1307 return sparc_do_work_around_errata ();
1310 }; // class pass_work_around_errata
1312 } // anon namespace
1314 rtl_opt_pass *
1315 make_pass_work_around_errata (gcc::context *ctxt)
1317 return new pass_work_around_errata (ctxt);
1320 /* Helpers for TARGET_DEBUG_OPTIONS. */
1321 static void
1322 dump_target_flag_bits (const int flags)
1324 if (flags & MASK_64BIT)
1325 fprintf (stderr, "64BIT ");
1326 if (flags & MASK_APP_REGS)
1327 fprintf (stderr, "APP_REGS ");
1328 if (flags & MASK_FASTER_STRUCTS)
1329 fprintf (stderr, "FASTER_STRUCTS ");
1330 if (flags & MASK_FLAT)
1331 fprintf (stderr, "FLAT ");
1332 if (flags & MASK_FMAF)
1333 fprintf (stderr, "FMAF ");
1334 if (flags & MASK_FSMULD)
1335 fprintf (stderr, "FSMULD ");
1336 if (flags & MASK_FPU)
1337 fprintf (stderr, "FPU ");
1338 if (flags & MASK_HARD_QUAD)
1339 fprintf (stderr, "HARD_QUAD ");
1340 if (flags & MASK_POPC)
1341 fprintf (stderr, "POPC ");
1342 if (flags & MASK_PTR64)
1343 fprintf (stderr, "PTR64 ");
1344 if (flags & MASK_STACK_BIAS)
1345 fprintf (stderr, "STACK_BIAS ");
1346 if (flags & MASK_UNALIGNED_DOUBLES)
1347 fprintf (stderr, "UNALIGNED_DOUBLES ");
1348 if (flags & MASK_V8PLUS)
1349 fprintf (stderr, "V8PLUS ");
1350 if (flags & MASK_VIS)
1351 fprintf (stderr, "VIS ");
1352 if (flags & MASK_VIS2)
1353 fprintf (stderr, "VIS2 ");
1354 if (flags & MASK_VIS3)
1355 fprintf (stderr, "VIS3 ");
1356 if (flags & MASK_VIS4)
1357 fprintf (stderr, "VIS4 ");
1358 if (flags & MASK_VIS4B)
1359 fprintf (stderr, "VIS4B ");
1360 if (flags & MASK_CBCOND)
1361 fprintf (stderr, "CBCOND ");
1362 if (flags & MASK_DEPRECATED_V8_INSNS)
1363 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1364 if (flags & MASK_SPARCLET)
1365 fprintf (stderr, "SPARCLET ");
1366 if (flags & MASK_SPARCLITE)
1367 fprintf (stderr, "SPARCLITE ");
1368 if (flags & MASK_V8)
1369 fprintf (stderr, "V8 ");
1370 if (flags & MASK_V9)
1371 fprintf (stderr, "V9 ");
1374 static void
1375 dump_target_flags (const char *prefix, const int flags)
1377 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1378 dump_target_flag_bits (flags);
1379 fprintf(stderr, "]\n");
1382 /* Validate and override various options, and do some machine dependent
1383 initialization. */
1385 static void
1386 sparc_option_override (void)
1388 static struct code_model {
1389 const char *const name;
1390 const enum cmodel value;
1391 } const cmodels[] = {
1392 { "32", CM_32 },
1393 { "medlow", CM_MEDLOW },
1394 { "medmid", CM_MEDMID },
1395 { "medany", CM_MEDANY },
1396 { "embmedany", CM_EMBMEDANY },
1397 { NULL, (enum cmodel) 0 }
1399 const struct code_model *cmodel;
1400 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1401 static struct cpu_default {
1402 const int cpu;
1403 const enum processor_type processor;
1404 } const cpu_default[] = {
1405 /* There must be one entry here for each TARGET_CPU value. */
1406 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1407 { TARGET_CPU_v8, PROCESSOR_V8 },
1408 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1409 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1410 { TARGET_CPU_leon, PROCESSOR_LEON },
1411 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1412 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1413 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1414 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1415 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1416 { TARGET_CPU_v9, PROCESSOR_V9 },
1417 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1418 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1419 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1420 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1421 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1422 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1423 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1424 { TARGET_CPU_m8, PROCESSOR_M8 },
1425 { -1, PROCESSOR_V7 }
1427 const struct cpu_default *def;
1428 /* Table of values for -m{cpu,tune}=. This must match the order of
1429 the enum processor_type in sparc-opts.h. */
1430 static struct cpu_table {
1431 const char *const name;
1432 const int disable;
1433 const int enable;
1434 } const cpu_table[] = {
1435 { "v7", MASK_ISA|MASK_FSMULD, 0 },
1436 { "cypress", MASK_ISA|MASK_FSMULD, 0 },
1437 { "v8", MASK_ISA, MASK_V8 },
1438 /* TI TMS390Z55 supersparc */
1439 { "supersparc", MASK_ISA, MASK_V8 },
1440 { "hypersparc", MASK_ISA, MASK_V8 },
1441 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1442 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1443 { "leon3v7", MASK_ISA|MASK_FSMULD, MASK_LEON3 },
1444 { "sparclite", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1445 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1446 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1447 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1448 { "f934", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1449 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1450 { "sparclet", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1451 /* TEMIC sparclet */
1452 { "tsc701", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1453 { "v9", MASK_ISA, MASK_V9 },
1454 /* UltraSPARC I, II, IIi */
1455 { "ultrasparc", MASK_ISA,
1456 /* Although insns using %y are deprecated, it is a clear win. */
1457 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1458 /* UltraSPARC III */
1459 /* ??? Check if %y issue still holds true. */
1460 { "ultrasparc3", MASK_ISA,
1461 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1462 /* UltraSPARC T1 */
1463 { "niagara", MASK_ISA,
1464 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1465 /* UltraSPARC T2 */
1466 { "niagara2", MASK_ISA,
1467 MASK_V9|MASK_POPC|MASK_VIS2 },
1468 /* UltraSPARC T3 */
1469 { "niagara3", MASK_ISA,
1470 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1471 /* UltraSPARC T4 */
1472 { "niagara4", MASK_ISA,
1473 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1474 /* UltraSPARC M7 */
1475 { "niagara7", MASK_ISA,
1476 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1477 /* UltraSPARC M8 */
1478 { "m8", MASK_ISA,
1479 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC|MASK_VIS4B }
1481 const struct cpu_table *cpu;
1482 unsigned int i;
1484 if (sparc_debug_string != NULL)
1486 const char *q;
1487 char *p;
1489 p = ASTRDUP (sparc_debug_string);
1490 while ((q = strtok (p, ",")) != NULL)
1492 bool invert;
1493 int mask;
1495 p = NULL;
1496 if (*q == '!')
1498 invert = true;
1499 q++;
1501 else
1502 invert = false;
1504 if (! strcmp (q, "all"))
1505 mask = MASK_DEBUG_ALL;
1506 else if (! strcmp (q, "options"))
1507 mask = MASK_DEBUG_OPTIONS;
1508 else
1509 error ("unknown -mdebug-%s switch", q);
1511 if (invert)
1512 sparc_debug &= ~mask;
1513 else
1514 sparc_debug |= mask;
1518 /* Enable the FsMULd instruction by default if not explicitly specified by
1519 the user. It may be later disabled by the CPU (explicitly or not). */
1520 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1521 target_flags |= MASK_FSMULD;
1523 if (TARGET_DEBUG_OPTIONS)
1525 dump_target_flags("Initial target_flags", target_flags);
1526 dump_target_flags("target_flags_explicit", target_flags_explicit);
1529 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1530 SUBTARGET_OVERRIDE_OPTIONS;
1531 #endif
1533 #ifndef SPARC_BI_ARCH
1534 /* Check for unsupported architecture size. */
1535 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1536 error ("%s is not supported by this configuration",
1537 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1538 #endif
1540 /* We force all 64bit archs to use 128 bit long double */
1541 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1543 error ("-mlong-double-64 not allowed with -m64");
1544 target_flags |= MASK_LONG_DOUBLE_128;
1547 /* Code model selection. */
1548 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1550 #ifdef SPARC_BI_ARCH
1551 if (TARGET_ARCH32)
1552 sparc_cmodel = CM_32;
1553 #endif
1555 if (sparc_cmodel_string != NULL)
1557 if (TARGET_ARCH64)
1559 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1560 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1561 break;
1562 if (cmodel->name == NULL)
1563 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1564 else
1565 sparc_cmodel = cmodel->value;
1567 else
1568 error ("-mcmodel= is not supported on 32-bit systems");
1571 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1572 for (i = 8; i < 16; i++)
1573 if (!call_used_regs [i])
1575 error ("-fcall-saved-REG is not supported for out registers");
1576 call_used_regs [i] = 1;
1579 /* Set the default CPU if no -mcpu option was specified. */
1580 if (!global_options_set.x_sparc_cpu_and_features)
1582 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1583 if (def->cpu == TARGET_CPU_DEFAULT)
1584 break;
1585 gcc_assert (def->cpu != -1);
1586 sparc_cpu_and_features = def->processor;
1589 /* Set the default CPU if no -mtune option was specified. */
1590 if (!global_options_set.x_sparc_cpu)
1591 sparc_cpu = sparc_cpu_and_features;
1593 cpu = &cpu_table[(int) sparc_cpu_and_features];
1595 if (TARGET_DEBUG_OPTIONS)
1597 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1598 dump_target_flags ("cpu->disable", cpu->disable);
1599 dump_target_flags ("cpu->enable", cpu->enable);
1602 target_flags &= ~cpu->disable;
1603 target_flags |= (cpu->enable
1604 #ifndef HAVE_AS_FMAF_HPC_VIS3
1605 & ~(MASK_FMAF | MASK_VIS3)
1606 #endif
1607 #ifndef HAVE_AS_SPARC4
1608 & ~MASK_CBCOND
1609 #endif
1610 #ifndef HAVE_AS_SPARC5_VIS4
1611 & ~(MASK_VIS4 | MASK_SUBXC)
1612 #endif
1613 #ifndef HAVE_AS_SPARC6
1614 & ~(MASK_VIS4B)
1615 #endif
1616 #ifndef HAVE_AS_LEON
1617 & ~(MASK_LEON | MASK_LEON3)
1618 #endif
1619 & ~(target_flags_explicit & MASK_FEATURES)
1622 /* -mvis2 implies -mvis. */
1623 if (TARGET_VIS2)
1624 target_flags |= MASK_VIS;
1626 /* -mvis3 implies -mvis2 and -mvis. */
1627 if (TARGET_VIS3)
1628 target_flags |= MASK_VIS2 | MASK_VIS;
1630 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1631 if (TARGET_VIS4)
1632 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1634 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1635 if (TARGET_VIS4B)
1636 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1638 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1639 FPU is disabled. */
1640 if (!TARGET_FPU)
1641 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1642 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1644 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1645 are available; -m64 also implies v9. */
1646 if (TARGET_VIS || TARGET_ARCH64)
1648 target_flags |= MASK_V9;
1649 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1652 /* -mvis also implies -mv8plus on 32-bit. */
1653 if (TARGET_VIS && !TARGET_ARCH64)
1654 target_flags |= MASK_V8PLUS;
1656 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1657 if (TARGET_V9 && TARGET_ARCH32)
1658 target_flags |= MASK_DEPRECATED_V8_INSNS;
1660 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1661 if (!TARGET_V9 || TARGET_ARCH64)
1662 target_flags &= ~MASK_V8PLUS;
1664 /* Don't use stack biasing in 32-bit mode. */
1665 if (TARGET_ARCH32)
1666 target_flags &= ~MASK_STACK_BIAS;
1668 /* Use LRA instead of reload, unless otherwise instructed. */
1669 if (!(target_flags_explicit & MASK_LRA))
1670 target_flags |= MASK_LRA;
1672 /* Enable the back-to-back store errata workaround for LEON3FT. */
1673 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1674 sparc_fix_b2bst = 1;
1676 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1677 if (sparc_fix_ut699)
1678 target_flags &= ~MASK_FSMULD;
1680 /* Supply a default value for align_functions. */
1681 if (align_functions == 0)
1683 if (sparc_cpu == PROCESSOR_ULTRASPARC
1684 || sparc_cpu == PROCESSOR_ULTRASPARC3
1685 || sparc_cpu == PROCESSOR_NIAGARA
1686 || sparc_cpu == PROCESSOR_NIAGARA2
1687 || sparc_cpu == PROCESSOR_NIAGARA3
1688 || sparc_cpu == PROCESSOR_NIAGARA4)
1689 align_functions = 32;
1690 else if (sparc_cpu == PROCESSOR_NIAGARA7
1691 || sparc_cpu == PROCESSOR_M8)
1692 align_functions = 64;
1695 /* Validate PCC_STRUCT_RETURN. */
1696 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1697 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1699 /* Only use .uaxword when compiling for a 64-bit target. */
1700 if (!TARGET_ARCH64)
1701 targetm.asm_out.unaligned_op.di = NULL;
1703 /* Do various machine dependent initializations. */
1704 sparc_init_modes ();
1706 /* Set up function hooks. */
1707 init_machine_status = sparc_init_machine_status;
1709 switch (sparc_cpu)
1711 case PROCESSOR_V7:
1712 case PROCESSOR_CYPRESS:
1713 sparc_costs = &cypress_costs;
1714 break;
1715 case PROCESSOR_V8:
1716 case PROCESSOR_SPARCLITE:
1717 case PROCESSOR_SUPERSPARC:
1718 sparc_costs = &supersparc_costs;
1719 break;
1720 case PROCESSOR_F930:
1721 case PROCESSOR_F934:
1722 case PROCESSOR_HYPERSPARC:
1723 case PROCESSOR_SPARCLITE86X:
1724 sparc_costs = &hypersparc_costs;
1725 break;
1726 case PROCESSOR_LEON:
1727 sparc_costs = &leon_costs;
1728 break;
1729 case PROCESSOR_LEON3:
1730 case PROCESSOR_LEON3V7:
1731 sparc_costs = &leon3_costs;
1732 break;
1733 case PROCESSOR_SPARCLET:
1734 case PROCESSOR_TSC701:
1735 sparc_costs = &sparclet_costs;
1736 break;
1737 case PROCESSOR_V9:
1738 case PROCESSOR_ULTRASPARC:
1739 sparc_costs = &ultrasparc_costs;
1740 break;
1741 case PROCESSOR_ULTRASPARC3:
1742 sparc_costs = &ultrasparc3_costs;
1743 break;
1744 case PROCESSOR_NIAGARA:
1745 sparc_costs = &niagara_costs;
1746 break;
1747 case PROCESSOR_NIAGARA2:
1748 sparc_costs = &niagara2_costs;
1749 break;
1750 case PROCESSOR_NIAGARA3:
1751 sparc_costs = &niagara3_costs;
1752 break;
1753 case PROCESSOR_NIAGARA4:
1754 sparc_costs = &niagara4_costs;
1755 break;
1756 case PROCESSOR_NIAGARA7:
1757 sparc_costs = &niagara7_costs;
1758 break;
1759 case PROCESSOR_M8:
1760 sparc_costs = &m8_costs;
1761 break;
1762 case PROCESSOR_NATIVE:
1763 gcc_unreachable ();
1766 if (sparc_memory_model == SMM_DEFAULT)
1768 /* Choose the memory model for the operating system. */
1769 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1770 if (os_default != SMM_DEFAULT)
1771 sparc_memory_model = os_default;
1772 /* Choose the most relaxed model for the processor. */
1773 else if (TARGET_V9)
1774 sparc_memory_model = SMM_RMO;
1775 else if (TARGET_LEON3)
1776 sparc_memory_model = SMM_TSO;
1777 else if (TARGET_LEON)
1778 sparc_memory_model = SMM_SC;
1779 else if (TARGET_V8)
1780 sparc_memory_model = SMM_PSO;
1781 else
1782 sparc_memory_model = SMM_SC;
1785 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1786 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1787 target_flags |= MASK_LONG_DOUBLE_128;
1788 #endif
1790 if (TARGET_DEBUG_OPTIONS)
1791 dump_target_flags ("Final target_flags", target_flags);
1793 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
1794 can run at the same time. More important, it is the threshold
1795 defining when additional prefetches will be dropped by the
1796 hardware.
1798 The UltraSPARC-III features a documented prefetch queue with a
1799 size of 8. Additional prefetches issued in the cpu are
1800 dropped.
1802 Niagara processors are different. In these processors prefetches
1803 are handled much like regular loads. The L1 miss buffer is 32
1804 entries, but prefetches start getting affected when 30 entries
1805 become occupied. That occupation could be a mix of regular loads
1806 and prefetches though. And that buffer is shared by all threads.
1807 Once the threshold is reached, if the core is running a single
1808 thread the prefetch will retry. If more than one thread is
1809 running, the prefetch will be dropped.
1811 All this makes it very difficult to determine how many
1812 simultaneous prefetches can be issued simultaneously, even in a
1813 single-threaded program. Experimental results show that setting
1814 this parameter to 32 works well when the number of threads is not
1815 high. */
1816 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1817 ((sparc_cpu == PROCESSOR_ULTRASPARC
1818 || sparc_cpu == PROCESSOR_NIAGARA
1819 || sparc_cpu == PROCESSOR_NIAGARA2
1820 || sparc_cpu == PROCESSOR_NIAGARA3
1821 || sparc_cpu == PROCESSOR_NIAGARA4)
1823 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1824 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
1825 || sparc_cpu == PROCESSOR_M8)
1826 ? 32 : 3))),
1827 global_options.x_param_values,
1828 global_options_set.x_param_values);
1830 /* PARAM_L1_CACHE_LINE_SIZE is the size of the L1 cache line, in
1831 bytes.
1833 The Oracle SPARC Architecture (previously the UltraSPARC
1834 Architecture) specification states that when a PREFETCH[A]
1835 instruction is executed an implementation-specific amount of data
1836 is prefetched, and that it is at least 64 bytes long (aligned to
1837 at least 64 bytes).
1839 However, this is not correct. The M7 (and implementations prior
1840 to that) does not guarantee a 64B prefetch into a cache if the
1841 line size is smaller. A single cache line is all that is ever
1842 prefetched. So for the M7, where the L1D$ has 32B lines and the
1843 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
1844 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
1845 is a read_n prefetch, which is the only type which allocates to
1846 the L1.) */
1847 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1848 (sparc_cpu == PROCESSOR_M8
1849 ? 64 : 32),
1850 global_options.x_param_values,
1851 global_options_set.x_param_values);
1853 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
1854 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
1855 Niagara processors feature a L1D$ of 16KB. */
1856 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1857 ((sparc_cpu == PROCESSOR_ULTRASPARC
1858 || sparc_cpu == PROCESSOR_ULTRASPARC3
1859 || sparc_cpu == PROCESSOR_NIAGARA
1860 || sparc_cpu == PROCESSOR_NIAGARA2
1861 || sparc_cpu == PROCESSOR_NIAGARA3
1862 || sparc_cpu == PROCESSOR_NIAGARA4
1863 || sparc_cpu == PROCESSOR_NIAGARA7
1864 || sparc_cpu == PROCESSOR_M8)
1865 ? 16 : 64),
1866 global_options.x_param_values,
1867 global_options_set.x_param_values);
1870 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
1871 that 512 is the default in params.def. */
1872 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
1873 ((sparc_cpu == PROCESSOR_NIAGARA4
1874 || sparc_cpu == PROCESSOR_M8)
1875 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
1876 ? 256 : 512)),
1877 global_options.x_param_values,
1878 global_options_set.x_param_values);
1881 /* Disable save slot sharing for call-clobbered registers by default.
1882 The IRA sharing algorithm works on single registers only and this
1883 pessimizes for double floating-point registers. */
1884 if (!global_options_set.x_flag_ira_share_save_slots)
1885 flag_ira_share_save_slots = 0;
1887 /* Only enable REE by default in 64-bit mode where it helps to eliminate
1888 redundant 32-to-64-bit extensions. */
1889 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
1890 flag_ree = 0;
1893 /* Miscellaneous utilities. */
1895 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1896 or branch on register contents instructions. */
1899 v9_regcmp_p (enum rtx_code code)
1901 return (code == EQ || code == NE || code == GE || code == LT
1902 || code == LE || code == GT);
1905 /* Nonzero if OP is a floating point constant which can
1906 be loaded into an integer register using a single
1907 sethi instruction. */
1910 fp_sethi_p (rtx op)
1912 if (GET_CODE (op) == CONST_DOUBLE)
1914 long i;
1916 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1917 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1920 return 0;
1923 /* Nonzero if OP is a floating point constant which can
1924 be loaded into an integer register using a single
1925 mov instruction. */
1928 fp_mov_p (rtx op)
1930 if (GET_CODE (op) == CONST_DOUBLE)
1932 long i;
1934 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1935 return SPARC_SIMM13_P (i);
1938 return 0;
1941 /* Nonzero if OP is a floating point constant which can
1942 be loaded into an integer register using a high/losum
1943 instruction sequence. */
1946 fp_high_losum_p (rtx op)
1948 /* The constraints calling this should only be in
1949 SFmode move insns, so any constant which cannot
1950 be moved using a single insn will do. */
1951 if (GET_CODE (op) == CONST_DOUBLE)
1953 long i;
1955 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1956 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1959 return 0;
1962 /* Return true if the address of LABEL can be loaded by means of the
1963 mov{si,di}_pic_label_ref patterns in PIC mode. */
1965 static bool
1966 can_use_mov_pic_label_ref (rtx label)
1968 /* VxWorks does not impose a fixed gap between segments; the run-time
1969 gap can be different from the object-file gap. We therefore can't
1970 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1971 are absolutely sure that X is in the same segment as the GOT.
1972 Unfortunately, the flexibility of linker scripts means that we
1973 can't be sure of that in general, so assume that GOT-relative
1974 accesses are never valid on VxWorks. */
1975 if (TARGET_VXWORKS_RTP)
1976 return false;
1978 /* Similarly, if the label is non-local, it might end up being placed
1979 in a different section than the current one; now mov_pic_label_ref
1980 requires the label and the code to be in the same section. */
1981 if (LABEL_REF_NONLOCAL_P (label))
1982 return false;
1984 /* Finally, if we are reordering basic blocks and partition into hot
1985 and cold sections, this might happen for any label. */
1986 if (flag_reorder_blocks_and_partition)
1987 return false;
1989 return true;
1992 /* Expand a move instruction. Return true if all work is done. */
1994 bool
1995 sparc_expand_move (machine_mode mode, rtx *operands)
1997 /* Handle sets of MEM first. */
1998 if (GET_CODE (operands[0]) == MEM)
2000 /* 0 is a register (or a pair of registers) on SPARC. */
2001 if (register_or_zero_operand (operands[1], mode))
2002 return false;
2004 if (!reload_in_progress)
2006 operands[0] = validize_mem (operands[0]);
2007 operands[1] = force_reg (mode, operands[1]);
2011 /* Fixup TLS cases. */
2012 if (TARGET_HAVE_TLS
2013 && CONSTANT_P (operands[1])
2014 && sparc_tls_referenced_p (operands [1]))
2016 operands[1] = sparc_legitimize_tls_address (operands[1]);
2017 return false;
2020 /* Fixup PIC cases. */
2021 if (flag_pic && CONSTANT_P (operands[1]))
2023 if (pic_address_needs_scratch (operands[1]))
2024 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2026 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2027 if (GET_CODE (operands[1]) == LABEL_REF
2028 && can_use_mov_pic_label_ref (operands[1]))
2030 if (mode == SImode)
2032 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2033 return true;
2036 if (mode == DImode)
2038 gcc_assert (TARGET_ARCH64);
2039 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2040 return true;
2044 if (symbolic_operand (operands[1], mode))
2046 operands[1]
2047 = sparc_legitimize_pic_address (operands[1],
2048 reload_in_progress
2049 ? operands[0] : NULL_RTX);
2050 return false;
2054 /* If we are trying to toss an integer constant into FP registers,
2055 or loading a FP or vector constant, force it into memory. */
2056 if (CONSTANT_P (operands[1])
2057 && REG_P (operands[0])
2058 && (SPARC_FP_REG_P (REGNO (operands[0]))
2059 || SCALAR_FLOAT_MODE_P (mode)
2060 || VECTOR_MODE_P (mode)))
2062 /* emit_group_store will send such bogosity to us when it is
2063 not storing directly into memory. So fix this up to avoid
2064 crashes in output_constant_pool. */
2065 if (operands [1] == const0_rtx)
2066 operands[1] = CONST0_RTX (mode);
2068 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2069 always other regs. */
2070 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2071 && (const_zero_operand (operands[1], mode)
2072 || const_all_ones_operand (operands[1], mode)))
2073 return false;
2075 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2076 /* We are able to build any SF constant in integer registers
2077 with at most 2 instructions. */
2078 && (mode == SFmode
2079 /* And any DF constant in integer registers if needed. */
2080 || (mode == DFmode && !can_create_pseudo_p ())))
2081 return false;
2083 operands[1] = force_const_mem (mode, operands[1]);
2084 if (!reload_in_progress)
2085 operands[1] = validize_mem (operands[1]);
2086 return false;
2089 /* Accept non-constants and valid constants unmodified. */
2090 if (!CONSTANT_P (operands[1])
2091 || GET_CODE (operands[1]) == HIGH
2092 || input_operand (operands[1], mode))
2093 return false;
2095 switch (mode)
2097 case E_QImode:
2098 /* All QImode constants require only one insn, so proceed. */
2099 break;
2101 case E_HImode:
2102 case E_SImode:
2103 sparc_emit_set_const32 (operands[0], operands[1]);
2104 return true;
2106 case E_DImode:
2107 /* input_operand should have filtered out 32-bit mode. */
2108 sparc_emit_set_const64 (operands[0], operands[1]);
2109 return true;
2111 case E_TImode:
2113 rtx high, low;
2114 /* TImode isn't available in 32-bit mode. */
2115 split_double (operands[1], &high, &low);
2116 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2117 high));
2118 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2119 low));
2121 return true;
2123 default:
2124 gcc_unreachable ();
2127 return false;
2130 /* Load OP1, a 32-bit constant, into OP0, a register.
2131 We know it can't be done in one insn when we get
2132 here, the move expander guarantees this. */
2134 static void
2135 sparc_emit_set_const32 (rtx op0, rtx op1)
2137 machine_mode mode = GET_MODE (op0);
2138 rtx temp = op0;
2140 if (can_create_pseudo_p ())
2141 temp = gen_reg_rtx (mode);
2143 if (GET_CODE (op1) == CONST_INT)
2145 gcc_assert (!small_int_operand (op1, mode)
2146 && !const_high_operand (op1, mode));
2148 /* Emit them as real moves instead of a HIGH/LO_SUM,
2149 this way CSE can see everything and reuse intermediate
2150 values if it wants. */
2151 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2152 & ~(HOST_WIDE_INT) 0x3ff)));
2154 emit_insn (gen_rtx_SET (op0,
2155 gen_rtx_IOR (mode, temp,
2156 GEN_INT (INTVAL (op1) & 0x3ff))));
2158 else
2160 /* A symbol, emit in the traditional way. */
2161 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2162 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2166 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2167 If TEMP is nonzero, we are forbidden to use any other scratch
2168 registers. Otherwise, we are allowed to generate them as needed.
2170 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2171 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2173 void
2174 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2176 rtx cst, temp1, temp2, temp3, temp4, temp5;
2177 rtx ti_temp = 0;
2179 /* Deal with too large offsets. */
2180 if (GET_CODE (op1) == CONST
2181 && GET_CODE (XEXP (op1, 0)) == PLUS
2182 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2183 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2185 gcc_assert (!temp);
2186 temp1 = gen_reg_rtx (DImode);
2187 temp2 = gen_reg_rtx (DImode);
2188 sparc_emit_set_const64 (temp2, cst);
2189 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2190 NULL_RTX);
2191 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2192 return;
2195 if (temp && GET_MODE (temp) == TImode)
2197 ti_temp = temp;
2198 temp = gen_rtx_REG (DImode, REGNO (temp));
2201 /* SPARC-V9 code-model support. */
2202 switch (sparc_cmodel)
2204 case CM_MEDLOW:
2205 /* The range spanned by all instructions in the object is less
2206 than 2^31 bytes (2GB) and the distance from any instruction
2207 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2208 than 2^31 bytes (2GB).
2210 The executable must be in the low 4TB of the virtual address
2211 space.
2213 sethi %hi(symbol), %temp1
2214 or %temp1, %lo(symbol), %reg */
2215 if (temp)
2216 temp1 = temp; /* op0 is allowed. */
2217 else
2218 temp1 = gen_reg_rtx (DImode);
2220 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2221 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2222 break;
2224 case CM_MEDMID:
2225 /* The range spanned by all instructions in the object is less
2226 than 2^31 bytes (2GB) and the distance from any instruction
2227 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2228 than 2^31 bytes (2GB).
2230 The executable must be in the low 16TB of the virtual address
2231 space.
2233 sethi %h44(symbol), %temp1
2234 or %temp1, %m44(symbol), %temp2
2235 sllx %temp2, 12, %temp3
2236 or %temp3, %l44(symbol), %reg */
2237 if (temp)
2239 temp1 = op0;
2240 temp2 = op0;
2241 temp3 = temp; /* op0 is allowed. */
2243 else
2245 temp1 = gen_reg_rtx (DImode);
2246 temp2 = gen_reg_rtx (DImode);
2247 temp3 = gen_reg_rtx (DImode);
2250 emit_insn (gen_seth44 (temp1, op1));
2251 emit_insn (gen_setm44 (temp2, temp1, op1));
2252 emit_insn (gen_rtx_SET (temp3,
2253 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2254 emit_insn (gen_setl44 (op0, temp3, op1));
2255 break;
2257 case CM_MEDANY:
2258 /* The range spanned by all instructions in the object is less
2259 than 2^31 bytes (2GB) and the distance from any instruction
2260 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2261 than 2^31 bytes (2GB).
2263 The executable can be placed anywhere in the virtual address
2264 space.
2266 sethi %hh(symbol), %temp1
2267 sethi %lm(symbol), %temp2
2268 or %temp1, %hm(symbol), %temp3
2269 sllx %temp3, 32, %temp4
2270 or %temp4, %temp2, %temp5
2271 or %temp5, %lo(symbol), %reg */
2272 if (temp)
2274 /* It is possible that one of the registers we got for operands[2]
2275 might coincide with that of operands[0] (which is why we made
2276 it TImode). Pick the other one to use as our scratch. */
2277 if (rtx_equal_p (temp, op0))
2279 gcc_assert (ti_temp);
2280 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2282 temp1 = op0;
2283 temp2 = temp; /* op0 is _not_ allowed, see above. */
2284 temp3 = op0;
2285 temp4 = op0;
2286 temp5 = op0;
2288 else
2290 temp1 = gen_reg_rtx (DImode);
2291 temp2 = gen_reg_rtx (DImode);
2292 temp3 = gen_reg_rtx (DImode);
2293 temp4 = gen_reg_rtx (DImode);
2294 temp5 = gen_reg_rtx (DImode);
2297 emit_insn (gen_sethh (temp1, op1));
2298 emit_insn (gen_setlm (temp2, op1));
2299 emit_insn (gen_sethm (temp3, temp1, op1));
2300 emit_insn (gen_rtx_SET (temp4,
2301 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2302 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2303 emit_insn (gen_setlo (op0, temp5, op1));
2304 break;
2306 case CM_EMBMEDANY:
2307 /* Old old old backwards compatibility kruft here.
2308 Essentially it is MEDLOW with a fixed 64-bit
2309 virtual base added to all data segment addresses.
2310 Text-segment stuff is computed like MEDANY, we can't
2311 reuse the code above because the relocation knobs
2312 look different.
2314 Data segment: sethi %hi(symbol), %temp1
2315 add %temp1, EMBMEDANY_BASE_REG, %temp2
2316 or %temp2, %lo(symbol), %reg */
2317 if (data_segment_operand (op1, GET_MODE (op1)))
2319 if (temp)
2321 temp1 = temp; /* op0 is allowed. */
2322 temp2 = op0;
2324 else
2326 temp1 = gen_reg_rtx (DImode);
2327 temp2 = gen_reg_rtx (DImode);
2330 emit_insn (gen_embmedany_sethi (temp1, op1));
2331 emit_insn (gen_embmedany_brsum (temp2, temp1));
2332 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2335 /* Text segment: sethi %uhi(symbol), %temp1
2336 sethi %hi(symbol), %temp2
2337 or %temp1, %ulo(symbol), %temp3
2338 sllx %temp3, 32, %temp4
2339 or %temp4, %temp2, %temp5
2340 or %temp5, %lo(symbol), %reg */
2341 else
2343 if (temp)
2345 /* It is possible that one of the registers we got for operands[2]
2346 might coincide with that of operands[0] (which is why we made
2347 it TImode). Pick the other one to use as our scratch. */
2348 if (rtx_equal_p (temp, op0))
2350 gcc_assert (ti_temp);
2351 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2353 temp1 = op0;
2354 temp2 = temp; /* op0 is _not_ allowed, see above. */
2355 temp3 = op0;
2356 temp4 = op0;
2357 temp5 = op0;
2359 else
2361 temp1 = gen_reg_rtx (DImode);
2362 temp2 = gen_reg_rtx (DImode);
2363 temp3 = gen_reg_rtx (DImode);
2364 temp4 = gen_reg_rtx (DImode);
2365 temp5 = gen_reg_rtx (DImode);
2368 emit_insn (gen_embmedany_textuhi (temp1, op1));
2369 emit_insn (gen_embmedany_texthi (temp2, op1));
2370 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2371 emit_insn (gen_rtx_SET (temp4,
2372 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2373 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2374 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2376 break;
2378 default:
2379 gcc_unreachable ();
2383 /* These avoid problems when cross compiling. If we do not
2384 go through all this hair then the optimizer will see
2385 invalid REG_EQUAL notes or in some cases none at all. */
2386 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2387 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2388 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2389 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2391 /* The optimizer is not to assume anything about exactly
2392 which bits are set for a HIGH, they are unspecified.
2393 Unfortunately this leads to many missed optimizations
2394 during CSE. We mask out the non-HIGH bits, and matches
2395 a plain movdi, to alleviate this problem. */
2396 static rtx
2397 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2399 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2402 static rtx
2403 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2405 return gen_rtx_SET (dest, GEN_INT (val));
2408 static rtx
2409 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2411 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2414 static rtx
2415 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2417 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2420 /* Worker routines for 64-bit constant formation on arch64.
2421 One of the key things to be doing in these emissions is
2422 to create as many temp REGs as possible. This makes it
2423 possible for half-built constants to be used later when
2424 such values are similar to something required later on.
2425 Without doing this, the optimizer cannot see such
2426 opportunities. */
2428 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2429 unsigned HOST_WIDE_INT, int);
2431 static void
2432 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2433 unsigned HOST_WIDE_INT low_bits, int is_neg)
2435 unsigned HOST_WIDE_INT high_bits;
2437 if (is_neg)
2438 high_bits = (~low_bits) & 0xffffffff;
2439 else
2440 high_bits = low_bits;
2442 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2443 if (!is_neg)
2445 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2447 else
2449 /* If we are XOR'ing with -1, then we should emit a one's complement
2450 instead. This way the combiner will notice logical operations
2451 such as ANDN later on and substitute. */
2452 if ((low_bits & 0x3ff) == 0x3ff)
2454 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2456 else
2458 emit_insn (gen_rtx_SET (op0,
2459 gen_safe_XOR64 (temp,
2460 (-(HOST_WIDE_INT)0x400
2461 | (low_bits & 0x3ff)))));
2466 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2467 unsigned HOST_WIDE_INT, int);
2469 static void
2470 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2471 unsigned HOST_WIDE_INT high_bits,
2472 unsigned HOST_WIDE_INT low_immediate,
2473 int shift_count)
2475 rtx temp2 = op0;
2477 if ((high_bits & 0xfffffc00) != 0)
2479 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2480 if ((high_bits & ~0xfffffc00) != 0)
2481 emit_insn (gen_rtx_SET (op0,
2482 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2483 else
2484 temp2 = temp;
2486 else
2488 emit_insn (gen_safe_SET64 (temp, high_bits));
2489 temp2 = temp;
2492 /* Now shift it up into place. */
2493 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2494 GEN_INT (shift_count))));
2496 /* If there is a low immediate part piece, finish up by
2497 putting that in as well. */
2498 if (low_immediate != 0)
2499 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2502 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2503 unsigned HOST_WIDE_INT);
2505 /* Full 64-bit constant decomposition. Even though this is the
2506 'worst' case, we still optimize a few things away. */
2507 static void
2508 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2509 unsigned HOST_WIDE_INT high_bits,
2510 unsigned HOST_WIDE_INT low_bits)
2512 rtx sub_temp = op0;
2514 if (can_create_pseudo_p ())
2515 sub_temp = gen_reg_rtx (DImode);
2517 if ((high_bits & 0xfffffc00) != 0)
2519 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2520 if ((high_bits & ~0xfffffc00) != 0)
2521 emit_insn (gen_rtx_SET (sub_temp,
2522 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2523 else
2524 sub_temp = temp;
2526 else
2528 emit_insn (gen_safe_SET64 (temp, high_bits));
2529 sub_temp = temp;
2532 if (can_create_pseudo_p ())
2534 rtx temp2 = gen_reg_rtx (DImode);
2535 rtx temp3 = gen_reg_rtx (DImode);
2536 rtx temp4 = gen_reg_rtx (DImode);
2538 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2539 GEN_INT (32))));
2541 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2542 if ((low_bits & ~0xfffffc00) != 0)
2544 emit_insn (gen_rtx_SET (temp3,
2545 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2546 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2548 else
2550 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2553 else
2555 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2556 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2557 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2558 int to_shift = 12;
2560 /* We are in the middle of reload, so this is really
2561 painful. However we do still make an attempt to
2562 avoid emitting truly stupid code. */
2563 if (low1 != const0_rtx)
2565 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2566 GEN_INT (to_shift))));
2567 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2568 sub_temp = op0;
2569 to_shift = 12;
2571 else
2573 to_shift += 12;
2575 if (low2 != const0_rtx)
2577 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2578 GEN_INT (to_shift))));
2579 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2580 sub_temp = op0;
2581 to_shift = 8;
2583 else
2585 to_shift += 8;
2587 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2588 GEN_INT (to_shift))));
2589 if (low3 != const0_rtx)
2590 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2591 /* phew... */
2595 /* Analyze a 64-bit constant for certain properties. */
2596 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2597 unsigned HOST_WIDE_INT,
2598 int *, int *, int *);
2600 static void
2601 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2602 unsigned HOST_WIDE_INT low_bits,
2603 int *hbsp, int *lbsp, int *abbasp)
2605 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2606 int i;
2608 lowest_bit_set = highest_bit_set = -1;
2609 i = 0;
2612 if ((lowest_bit_set == -1)
2613 && ((low_bits >> i) & 1))
2614 lowest_bit_set = i;
2615 if ((highest_bit_set == -1)
2616 && ((high_bits >> (32 - i - 1)) & 1))
2617 highest_bit_set = (64 - i - 1);
2619 while (++i < 32
2620 && ((highest_bit_set == -1)
2621 || (lowest_bit_set == -1)));
2622 if (i == 32)
2624 i = 0;
2627 if ((lowest_bit_set == -1)
2628 && ((high_bits >> i) & 1))
2629 lowest_bit_set = i + 32;
2630 if ((highest_bit_set == -1)
2631 && ((low_bits >> (32 - i - 1)) & 1))
2632 highest_bit_set = 32 - i - 1;
2634 while (++i < 32
2635 && ((highest_bit_set == -1)
2636 || (lowest_bit_set == -1)));
2638 /* If there are no bits set this should have gone out
2639 as one instruction! */
2640 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2641 all_bits_between_are_set = 1;
2642 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2644 if (i < 32)
2646 if ((low_bits & (1 << i)) != 0)
2647 continue;
2649 else
2651 if ((high_bits & (1 << (i - 32))) != 0)
2652 continue;
2654 all_bits_between_are_set = 0;
2655 break;
2657 *hbsp = highest_bit_set;
2658 *lbsp = lowest_bit_set;
2659 *abbasp = all_bits_between_are_set;
2662 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2664 static int
2665 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2666 unsigned HOST_WIDE_INT low_bits)
2668 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2670 if (high_bits == 0
2671 || high_bits == 0xffffffff)
2672 return 1;
2674 analyze_64bit_constant (high_bits, low_bits,
2675 &highest_bit_set, &lowest_bit_set,
2676 &all_bits_between_are_set);
2678 if ((highest_bit_set == 63
2679 || lowest_bit_set == 0)
2680 && all_bits_between_are_set != 0)
2681 return 1;
2683 if ((highest_bit_set - lowest_bit_set) < 21)
2684 return 1;
2686 return 0;
2689 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2690 unsigned HOST_WIDE_INT,
2691 int, int);
2693 static unsigned HOST_WIDE_INT
2694 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2695 unsigned HOST_WIDE_INT low_bits,
2696 int lowest_bit_set, int shift)
2698 HOST_WIDE_INT hi, lo;
2700 if (lowest_bit_set < 32)
2702 lo = (low_bits >> lowest_bit_set) << shift;
2703 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2705 else
2707 lo = 0;
2708 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2710 gcc_assert (! (hi & lo));
2711 return (hi | lo);
2714 /* Here we are sure to be arch64 and this is an integer constant
2715 being loaded into a register. Emit the most efficient
2716 insn sequence possible. Detection of all the 1-insn cases
2717 has been done already. */
2718 static void
2719 sparc_emit_set_const64 (rtx op0, rtx op1)
2721 unsigned HOST_WIDE_INT high_bits, low_bits;
2722 int lowest_bit_set, highest_bit_set;
2723 int all_bits_between_are_set;
2724 rtx temp = 0;
2726 /* Sanity check that we know what we are working with. */
2727 gcc_assert (TARGET_ARCH64
2728 && (GET_CODE (op0) == SUBREG
2729 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2731 if (! can_create_pseudo_p ())
2732 temp = op0;
2734 if (GET_CODE (op1) != CONST_INT)
2736 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2737 return;
2740 if (! temp)
2741 temp = gen_reg_rtx (DImode);
2743 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2744 low_bits = (INTVAL (op1) & 0xffffffff);
2746 /* low_bits bits 0 --> 31
2747 high_bits bits 32 --> 63 */
2749 analyze_64bit_constant (high_bits, low_bits,
2750 &highest_bit_set, &lowest_bit_set,
2751 &all_bits_between_are_set);
2753 /* First try for a 2-insn sequence. */
2755 /* These situations are preferred because the optimizer can
2756 * do more things with them:
2757 * 1) mov -1, %reg
2758 * sllx %reg, shift, %reg
2759 * 2) mov -1, %reg
2760 * srlx %reg, shift, %reg
2761 * 3) mov some_small_const, %reg
2762 * sllx %reg, shift, %reg
2764 if (((highest_bit_set == 63
2765 || lowest_bit_set == 0)
2766 && all_bits_between_are_set != 0)
2767 || ((highest_bit_set - lowest_bit_set) < 12))
2769 HOST_WIDE_INT the_const = -1;
2770 int shift = lowest_bit_set;
2772 if ((highest_bit_set != 63
2773 && lowest_bit_set != 0)
2774 || all_bits_between_are_set == 0)
2776 the_const =
2777 create_simple_focus_bits (high_bits, low_bits,
2778 lowest_bit_set, 0);
2780 else if (lowest_bit_set == 0)
2781 shift = -(63 - highest_bit_set);
2783 gcc_assert (SPARC_SIMM13_P (the_const));
2784 gcc_assert (shift != 0);
2786 emit_insn (gen_safe_SET64 (temp, the_const));
2787 if (shift > 0)
2788 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
2789 GEN_INT (shift))));
2790 else if (shift < 0)
2791 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
2792 GEN_INT (-shift))));
2793 return;
2796 /* Now a range of 22 or less bits set somewhere.
2797 * 1) sethi %hi(focus_bits), %reg
2798 * sllx %reg, shift, %reg
2799 * 2) sethi %hi(focus_bits), %reg
2800 * srlx %reg, shift, %reg
2802 if ((highest_bit_set - lowest_bit_set) < 21)
2804 unsigned HOST_WIDE_INT focus_bits =
2805 create_simple_focus_bits (high_bits, low_bits,
2806 lowest_bit_set, 10);
2808 gcc_assert (SPARC_SETHI_P (focus_bits));
2809 gcc_assert (lowest_bit_set != 10);
2811 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2813 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2814 if (lowest_bit_set < 10)
2815 emit_insn (gen_rtx_SET (op0,
2816 gen_rtx_LSHIFTRT (DImode, temp,
2817 GEN_INT (10 - lowest_bit_set))));
2818 else if (lowest_bit_set > 10)
2819 emit_insn (gen_rtx_SET (op0,
2820 gen_rtx_ASHIFT (DImode, temp,
2821 GEN_INT (lowest_bit_set - 10))));
2822 return;
2825 /* 1) sethi %hi(low_bits), %reg
2826 * or %reg, %lo(low_bits), %reg
2827 * 2) sethi %hi(~low_bits), %reg
2828 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2830 if (high_bits == 0
2831 || high_bits == 0xffffffff)
2833 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2834 (high_bits == 0xffffffff));
2835 return;
2838 /* Now, try 3-insn sequences. */
2840 /* 1) sethi %hi(high_bits), %reg
2841 * or %reg, %lo(high_bits), %reg
2842 * sllx %reg, 32, %reg
2844 if (low_bits == 0)
2846 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2847 return;
2850 /* We may be able to do something quick
2851 when the constant is negated, so try that. */
2852 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2853 (~low_bits) & 0xfffffc00))
2855 /* NOTE: The trailing bits get XOR'd so we need the
2856 non-negated bits, not the negated ones. */
2857 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2859 if ((((~high_bits) & 0xffffffff) == 0
2860 && ((~low_bits) & 0x80000000) == 0)
2861 || (((~high_bits) & 0xffffffff) == 0xffffffff
2862 && ((~low_bits) & 0x80000000) != 0))
2864 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2866 if ((SPARC_SETHI_P (fast_int)
2867 && (~high_bits & 0xffffffff) == 0)
2868 || SPARC_SIMM13_P (fast_int))
2869 emit_insn (gen_safe_SET64 (temp, fast_int));
2870 else
2871 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2873 else
2875 rtx negated_const;
2876 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2877 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2878 sparc_emit_set_const64 (temp, negated_const);
2881 /* If we are XOR'ing with -1, then we should emit a one's complement
2882 instead. This way the combiner will notice logical operations
2883 such as ANDN later on and substitute. */
2884 if (trailing_bits == 0x3ff)
2886 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2888 else
2890 emit_insn (gen_rtx_SET (op0,
2891 gen_safe_XOR64 (temp,
2892 (-0x400 | trailing_bits))));
2894 return;
2897 /* 1) sethi %hi(xxx), %reg
2898 * or %reg, %lo(xxx), %reg
2899 * sllx %reg, yyy, %reg
2901 * ??? This is just a generalized version of the low_bits==0
2902 * thing above, FIXME...
2904 if ((highest_bit_set - lowest_bit_set) < 32)
2906 unsigned HOST_WIDE_INT focus_bits =
2907 create_simple_focus_bits (high_bits, low_bits,
2908 lowest_bit_set, 0);
2910 /* We can't get here in this state. */
2911 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2913 /* So what we know is that the set bits straddle the
2914 middle of the 64-bit word. */
2915 sparc_emit_set_const64_quick2 (op0, temp,
2916 focus_bits, 0,
2917 lowest_bit_set);
2918 return;
2921 /* 1) sethi %hi(high_bits), %reg
2922 * or %reg, %lo(high_bits), %reg
2923 * sllx %reg, 32, %reg
2924 * or %reg, low_bits, %reg
2926 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
2928 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2929 return;
2932 /* The easiest way when all else fails, is full decomposition. */
2933 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2936 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
2938 static bool
2939 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
2941 *p1 = SPARC_ICC_REG;
2942 *p2 = SPARC_FCC_REG;
2943 return true;
2946 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
2948 static unsigned int
2949 sparc_min_arithmetic_precision (void)
2951 return 32;
2954 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2955 return the mode to be used for the comparison. For floating-point,
2956 CCFP[E]mode is used. CCNZmode should be used when the first operand
2957 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2958 processing is needed. */
2960 machine_mode
2961 select_cc_mode (enum rtx_code op, rtx x, rtx y)
2963 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2965 switch (op)
2967 case EQ:
2968 case NE:
2969 case UNORDERED:
2970 case ORDERED:
2971 case UNLT:
2972 case UNLE:
2973 case UNGT:
2974 case UNGE:
2975 case UNEQ:
2976 case LTGT:
2977 return CCFPmode;
2979 case LT:
2980 case LE:
2981 case GT:
2982 case GE:
2983 return CCFPEmode;
2985 default:
2986 gcc_unreachable ();
2989 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2990 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2991 && y == const0_rtx)
2993 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2994 return CCXNZmode;
2995 else
2996 return CCNZmode;
2998 else
3000 /* This is for the cmp<mode>_sne pattern. */
3001 if (GET_CODE (x) == NOT && y == constm1_rtx)
3003 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3004 return CCXCmode;
3005 else
3006 return CCCmode;
3009 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3010 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3012 if (GET_CODE (y) == UNSPEC
3013 && (XINT (y, 1) == UNSPEC_ADDV
3014 || XINT (y, 1) == UNSPEC_SUBV
3015 || XINT (y, 1) == UNSPEC_NEGV))
3016 return CCVmode;
3017 else
3018 return CCCmode;
3021 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3022 return CCXmode;
3023 else
3024 return CCmode;
3028 /* Emit the compare insn and return the CC reg for a CODE comparison
3029 with operands X and Y. */
3031 static rtx
3032 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3034 machine_mode mode;
3035 rtx cc_reg;
3037 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3038 return x;
3040 mode = SELECT_CC_MODE (code, x, y);
3042 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3043 fcc regs (cse can't tell they're really call clobbered regs and will
3044 remove a duplicate comparison even if there is an intervening function
3045 call - it will then try to reload the cc reg via an int reg which is why
3046 we need the movcc patterns). It is possible to provide the movcc
3047 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3048 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3049 to tell cse that CCFPE mode registers (even pseudos) are call
3050 clobbered. */
3052 /* ??? This is an experiment. Rather than making changes to cse which may
3053 or may not be easy/clean, we do our own cse. This is possible because
3054 we will generate hard registers. Cse knows they're call clobbered (it
3055 doesn't know the same thing about pseudos). If we guess wrong, no big
3056 deal, but if we win, great! */
3058 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3059 #if 1 /* experiment */
3061 int reg;
3062 /* We cycle through the registers to ensure they're all exercised. */
3063 static int next_fcc_reg = 0;
3064 /* Previous x,y for each fcc reg. */
3065 static rtx prev_args[4][2];
3067 /* Scan prev_args for x,y. */
3068 for (reg = 0; reg < 4; reg++)
3069 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3070 break;
3071 if (reg == 4)
3073 reg = next_fcc_reg;
3074 prev_args[reg][0] = x;
3075 prev_args[reg][1] = y;
3076 next_fcc_reg = (next_fcc_reg + 1) & 3;
3078 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3080 #else
3081 cc_reg = gen_reg_rtx (mode);
3082 #endif /* ! experiment */
3083 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3084 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3085 else
3086 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3088 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3089 will only result in an unrecognizable insn so no point in asserting. */
3090 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3092 return cc_reg;
3096 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3099 gen_compare_reg (rtx cmp)
3101 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3104 /* This function is used for v9 only.
3105 DEST is the target of the Scc insn.
3106 CODE is the code for an Scc's comparison.
3107 X and Y are the values we compare.
3109 This function is needed to turn
3111 (set (reg:SI 110)
3112 (gt (reg:CCX 100 %icc)
3113 (const_int 0)))
3114 into
3115 (set (reg:SI 110)
3116 (gt:DI (reg:CCX 100 %icc)
3117 (const_int 0)))
3119 IE: The instruction recognizer needs to see the mode of the comparison to
3120 find the right instruction. We could use "gt:DI" right in the
3121 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3123 static int
3124 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3126 if (! TARGET_ARCH64
3127 && (GET_MODE (x) == DImode
3128 || GET_MODE (dest) == DImode))
3129 return 0;
3131 /* Try to use the movrCC insns. */
3132 if (TARGET_ARCH64
3133 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3134 && y == const0_rtx
3135 && v9_regcmp_p (compare_code))
3137 rtx op0 = x;
3138 rtx temp;
3140 /* Special case for op0 != 0. This can be done with one instruction if
3141 dest == x. */
3143 if (compare_code == NE
3144 && GET_MODE (dest) == DImode
3145 && rtx_equal_p (op0, dest))
3147 emit_insn (gen_rtx_SET (dest,
3148 gen_rtx_IF_THEN_ELSE (DImode,
3149 gen_rtx_fmt_ee (compare_code, DImode,
3150 op0, const0_rtx),
3151 const1_rtx,
3152 dest)));
3153 return 1;
3156 if (reg_overlap_mentioned_p (dest, op0))
3158 /* Handle the case where dest == x.
3159 We "early clobber" the result. */
3160 op0 = gen_reg_rtx (GET_MODE (x));
3161 emit_move_insn (op0, x);
3164 emit_insn (gen_rtx_SET (dest, const0_rtx));
3165 if (GET_MODE (op0) != DImode)
3167 temp = gen_reg_rtx (DImode);
3168 convert_move (temp, op0, 0);
3170 else
3171 temp = op0;
3172 emit_insn (gen_rtx_SET (dest,
3173 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3174 gen_rtx_fmt_ee (compare_code, DImode,
3175 temp, const0_rtx),
3176 const1_rtx,
3177 dest)));
3178 return 1;
3180 else
3182 x = gen_compare_reg_1 (compare_code, x, y);
3183 y = const0_rtx;
3185 emit_insn (gen_rtx_SET (dest, const0_rtx));
3186 emit_insn (gen_rtx_SET (dest,
3187 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3188 gen_rtx_fmt_ee (compare_code,
3189 GET_MODE (x), x, y),
3190 const1_rtx, dest)));
3191 return 1;
3196 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3197 without jumps using the addx/subx instructions. */
3199 bool
3200 emit_scc_insn (rtx operands[])
3202 rtx tem, x, y;
3203 enum rtx_code code;
3204 machine_mode mode;
3206 /* The quad-word fp compare library routines all return nonzero to indicate
3207 true, which is different from the equivalent libgcc routines, so we must
3208 handle them specially here. */
3209 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3211 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3212 GET_CODE (operands[1]));
3213 operands[2] = XEXP (operands[1], 0);
3214 operands[3] = XEXP (operands[1], 1);
3217 code = GET_CODE (operands[1]);
3218 x = operands[2];
3219 y = operands[3];
3220 mode = GET_MODE (x);
3222 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3223 more applications). The exception to this is "reg != 0" which can
3224 be done in one instruction on v9 (so we do it). */
3225 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3227 if (y != const0_rtx)
3228 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3230 rtx pat = gen_rtx_SET (operands[0],
3231 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3232 x, const0_rtx));
3234 /* If we can use addx/subx or addxc, add a clobber for CC. */
3235 if (mode == SImode || (code == NE && TARGET_VIS3))
3237 rtx clobber
3238 = gen_rtx_CLOBBER (VOIDmode,
3239 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3240 SPARC_ICC_REG));
3241 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3244 emit_insn (pat);
3245 return true;
3248 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3249 if (TARGET_ARCH64
3250 && mode == DImode
3251 && !((code == LTU || code == GTU) && TARGET_VIS3)
3252 && gen_v9_scc (operands[0], code, x, y))
3253 return true;
3255 /* We can do LTU and GEU using the addx/subx instructions too. And
3256 for GTU/LEU, if both operands are registers swap them and fall
3257 back to the easy case. */
3258 if (code == GTU || code == LEU)
3260 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3261 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3263 tem = x;
3264 x = y;
3265 y = tem;
3266 code = swap_condition (code);
3270 if (code == LTU || code == GEU)
3272 emit_insn (gen_rtx_SET (operands[0],
3273 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3274 gen_compare_reg_1 (code, x, y),
3275 const0_rtx)));
3276 return true;
3279 /* All the posibilities to use addx/subx based sequences has been
3280 exhausted, try for a 3 instruction sequence using v9 conditional
3281 moves. */
3282 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3283 return true;
3285 /* Nope, do branches. */
3286 return false;
3289 /* Emit a conditional jump insn for the v9 architecture using comparison code
3290 CODE and jump target LABEL.
3291 This function exists to take advantage of the v9 brxx insns. */
3293 static void
3294 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3296 emit_jump_insn (gen_rtx_SET (pc_rtx,
3297 gen_rtx_IF_THEN_ELSE (VOIDmode,
3298 gen_rtx_fmt_ee (code, GET_MODE (op0),
3299 op0, const0_rtx),
3300 gen_rtx_LABEL_REF (VOIDmode, label),
3301 pc_rtx)));
3304 /* Emit a conditional jump insn for the UA2011 architecture using
3305 comparison code CODE and jump target LABEL. This function exists
3306 to take advantage of the UA2011 Compare and Branch insns. */
3308 static void
3309 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3311 rtx if_then_else;
3313 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3314 gen_rtx_fmt_ee(code, GET_MODE(op0),
3315 op0, op1),
3316 gen_rtx_LABEL_REF (VOIDmode, label),
3317 pc_rtx);
3319 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3322 void
3323 emit_conditional_branch_insn (rtx operands[])
3325 /* The quad-word fp compare library routines all return nonzero to indicate
3326 true, which is different from the equivalent libgcc routines, so we must
3327 handle them specially here. */
3328 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3330 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3331 GET_CODE (operands[0]));
3332 operands[1] = XEXP (operands[0], 0);
3333 operands[2] = XEXP (operands[0], 1);
3336 /* If we can tell early on that the comparison is against a constant
3337 that won't fit in the 5-bit signed immediate field of a cbcond,
3338 use one of the other v9 conditional branch sequences. */
3339 if (TARGET_CBCOND
3340 && GET_CODE (operands[1]) == REG
3341 && (GET_MODE (operands[1]) == SImode
3342 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3343 && (GET_CODE (operands[2]) != CONST_INT
3344 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3346 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3347 return;
3350 if (TARGET_ARCH64 && operands[2] == const0_rtx
3351 && GET_CODE (operands[1]) == REG
3352 && GET_MODE (operands[1]) == DImode)
3354 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3355 return;
3358 operands[1] = gen_compare_reg (operands[0]);
3359 operands[2] = const0_rtx;
3360 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3361 operands[1], operands[2]);
3362 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3363 operands[3]));
3367 /* Generate a DFmode part of a hard TFmode register.
3368 REG is the TFmode hard register, LOW is 1 for the
3369 low 64bit of the register and 0 otherwise.
3372 gen_df_reg (rtx reg, int low)
3374 int regno = REGNO (reg);
3376 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3377 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3378 return gen_rtx_REG (DFmode, regno);
3381 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3382 Unlike normal calls, TFmode operands are passed by reference. It is
3383 assumed that no more than 3 operands are required. */
3385 static void
3386 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3388 rtx ret_slot = NULL, arg[3], func_sym;
3389 int i;
3391 /* We only expect to be called for conversions, unary, and binary ops. */
3392 gcc_assert (nargs == 2 || nargs == 3);
3394 for (i = 0; i < nargs; ++i)
3396 rtx this_arg = operands[i];
3397 rtx this_slot;
3399 /* TFmode arguments and return values are passed by reference. */
3400 if (GET_MODE (this_arg) == TFmode)
3402 int force_stack_temp;
3404 force_stack_temp = 0;
3405 if (TARGET_BUGGY_QP_LIB && i == 0)
3406 force_stack_temp = 1;
3408 if (GET_CODE (this_arg) == MEM
3409 && ! force_stack_temp)
3411 tree expr = MEM_EXPR (this_arg);
3412 if (expr)
3413 mark_addressable (expr);
3414 this_arg = XEXP (this_arg, 0);
3416 else if (CONSTANT_P (this_arg)
3417 && ! force_stack_temp)
3419 this_slot = force_const_mem (TFmode, this_arg);
3420 this_arg = XEXP (this_slot, 0);
3422 else
3424 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3426 /* Operand 0 is the return value. We'll copy it out later. */
3427 if (i > 0)
3428 emit_move_insn (this_slot, this_arg);
3429 else
3430 ret_slot = this_slot;
3432 this_arg = XEXP (this_slot, 0);
3436 arg[i] = this_arg;
3439 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3441 if (GET_MODE (operands[0]) == TFmode)
3443 if (nargs == 2)
3444 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3445 arg[0], GET_MODE (arg[0]),
3446 arg[1], GET_MODE (arg[1]));
3447 else
3448 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3449 arg[0], GET_MODE (arg[0]),
3450 arg[1], GET_MODE (arg[1]),
3451 arg[2], GET_MODE (arg[2]));
3453 if (ret_slot)
3454 emit_move_insn (operands[0], ret_slot);
3456 else
3458 rtx ret;
3460 gcc_assert (nargs == 2);
3462 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3463 GET_MODE (operands[0]),
3464 arg[1], GET_MODE (arg[1]));
3466 if (ret != operands[0])
3467 emit_move_insn (operands[0], ret);
3471 /* Expand soft-float TFmode calls to sparc abi routines. */
3473 static void
3474 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3476 const char *func;
3478 switch (code)
3480 case PLUS:
3481 func = "_Qp_add";
3482 break;
3483 case MINUS:
3484 func = "_Qp_sub";
3485 break;
3486 case MULT:
3487 func = "_Qp_mul";
3488 break;
3489 case DIV:
3490 func = "_Qp_div";
3491 break;
3492 default:
3493 gcc_unreachable ();
3496 emit_soft_tfmode_libcall (func, 3, operands);
3499 static void
3500 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3502 const char *func;
3504 gcc_assert (code == SQRT);
3505 func = "_Qp_sqrt";
3507 emit_soft_tfmode_libcall (func, 2, operands);
3510 static void
3511 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3513 const char *func;
3515 switch (code)
3517 case FLOAT_EXTEND:
3518 switch (GET_MODE (operands[1]))
3520 case E_SFmode:
3521 func = "_Qp_stoq";
3522 break;
3523 case E_DFmode:
3524 func = "_Qp_dtoq";
3525 break;
3526 default:
3527 gcc_unreachable ();
3529 break;
3531 case FLOAT_TRUNCATE:
3532 switch (GET_MODE (operands[0]))
3534 case E_SFmode:
3535 func = "_Qp_qtos";
3536 break;
3537 case E_DFmode:
3538 func = "_Qp_qtod";
3539 break;
3540 default:
3541 gcc_unreachable ();
3543 break;
3545 case FLOAT:
3546 switch (GET_MODE (operands[1]))
3548 case E_SImode:
3549 func = "_Qp_itoq";
3550 if (TARGET_ARCH64)
3551 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3552 break;
3553 case E_DImode:
3554 func = "_Qp_xtoq";
3555 break;
3556 default:
3557 gcc_unreachable ();
3559 break;
3561 case UNSIGNED_FLOAT:
3562 switch (GET_MODE (operands[1]))
3564 case E_SImode:
3565 func = "_Qp_uitoq";
3566 if (TARGET_ARCH64)
3567 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3568 break;
3569 case E_DImode:
3570 func = "_Qp_uxtoq";
3571 break;
3572 default:
3573 gcc_unreachable ();
3575 break;
3577 case FIX:
3578 switch (GET_MODE (operands[0]))
3580 case E_SImode:
3581 func = "_Qp_qtoi";
3582 break;
3583 case E_DImode:
3584 func = "_Qp_qtox";
3585 break;
3586 default:
3587 gcc_unreachable ();
3589 break;
3591 case UNSIGNED_FIX:
3592 switch (GET_MODE (operands[0]))
3594 case E_SImode:
3595 func = "_Qp_qtoui";
3596 break;
3597 case E_DImode:
3598 func = "_Qp_qtoux";
3599 break;
3600 default:
3601 gcc_unreachable ();
3603 break;
3605 default:
3606 gcc_unreachable ();
3609 emit_soft_tfmode_libcall (func, 2, operands);
3612 /* Expand a hard-float tfmode operation. All arguments must be in
3613 registers. */
3615 static void
3616 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3618 rtx op, dest;
3620 if (GET_RTX_CLASS (code) == RTX_UNARY)
3622 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3623 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3625 else
3627 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3628 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3629 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3630 operands[1], operands[2]);
3633 if (register_operand (operands[0], VOIDmode))
3634 dest = operands[0];
3635 else
3636 dest = gen_reg_rtx (GET_MODE (operands[0]));
3638 emit_insn (gen_rtx_SET (dest, op));
3640 if (dest != operands[0])
3641 emit_move_insn (operands[0], dest);
3644 void
3645 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3647 if (TARGET_HARD_QUAD)
3648 emit_hard_tfmode_operation (code, operands);
3649 else
3650 emit_soft_tfmode_binop (code, operands);
3653 void
3654 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3656 if (TARGET_HARD_QUAD)
3657 emit_hard_tfmode_operation (code, operands);
3658 else
3659 emit_soft_tfmode_unop (code, operands);
3662 void
3663 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3665 if (TARGET_HARD_QUAD)
3666 emit_hard_tfmode_operation (code, operands);
3667 else
3668 emit_soft_tfmode_cvt (code, operands);
3671 /* Return nonzero if a branch/jump/call instruction will be emitting
3672 nop into its delay slot. */
3675 empty_delay_slot (rtx_insn *insn)
3677 rtx seq;
3679 /* If no previous instruction (should not happen), return true. */
3680 if (PREV_INSN (insn) == NULL)
3681 return 1;
3683 seq = NEXT_INSN (PREV_INSN (insn));
3684 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3685 return 0;
3687 return 1;
3690 /* Return nonzero if we should emit a nop after a cbcond instruction.
3691 The cbcond instruction does not have a delay slot, however there is
3692 a severe performance penalty if a control transfer appears right
3693 after a cbcond. Therefore we emit a nop when we detect this
3694 situation. */
3697 emit_cbcond_nop (rtx_insn *insn)
3699 rtx next = next_active_insn (insn);
3701 if (!next)
3702 return 1;
3704 if (NONJUMP_INSN_P (next)
3705 && GET_CODE (PATTERN (next)) == SEQUENCE)
3706 next = XVECEXP (PATTERN (next), 0, 0);
3707 else if (CALL_P (next)
3708 && GET_CODE (PATTERN (next)) == PARALLEL)
3710 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3712 if (GET_CODE (delay) == RETURN)
3714 /* It's a sibling call. Do not emit the nop if we're going
3715 to emit something other than the jump itself as the first
3716 instruction of the sibcall sequence. */
3717 if (sparc_leaf_function_p || TARGET_FLAT)
3718 return 0;
3722 if (NONJUMP_INSN_P (next))
3723 return 0;
3725 return 1;
3728 /* Return nonzero if TRIAL can go into the call delay slot. */
3731 eligible_for_call_delay (rtx_insn *trial)
3733 rtx pat;
3735 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3736 return 0;
3738 /* Binutils allows
3739 call __tls_get_addr, %tgd_call (foo)
3740 add %l7, %o0, %o0, %tgd_add (foo)
3741 while Sun as/ld does not. */
3742 if (TARGET_GNU_TLS || !TARGET_TLS)
3743 return 1;
3745 pat = PATTERN (trial);
3747 /* We must reject tgd_add{32|64}, i.e.
3748 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3749 and tldm_add{32|64}, i.e.
3750 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3751 for Sun as/ld. */
3752 if (GET_CODE (pat) == SET
3753 && GET_CODE (SET_SRC (pat)) == PLUS)
3755 rtx unspec = XEXP (SET_SRC (pat), 1);
3757 if (GET_CODE (unspec) == UNSPEC
3758 && (XINT (unspec, 1) == UNSPEC_TLSGD
3759 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3760 return 0;
3763 return 1;
3766 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3767 instruction. RETURN_P is true if the v9 variant 'return' is to be
3768 considered in the test too.
3770 TRIAL must be a SET whose destination is a REG appropriate for the
3771 'restore' instruction or, if RETURN_P is true, for the 'return'
3772 instruction. */
3774 static int
3775 eligible_for_restore_insn (rtx trial, bool return_p)
3777 rtx pat = PATTERN (trial);
3778 rtx src = SET_SRC (pat);
3779 bool src_is_freg = false;
3780 rtx src_reg;
3782 /* Since we now can do moves between float and integer registers when
3783 VIS3 is enabled, we have to catch this case. We can allow such
3784 moves when doing a 'return' however. */
3785 src_reg = src;
3786 if (GET_CODE (src_reg) == SUBREG)
3787 src_reg = SUBREG_REG (src_reg);
3788 if (GET_CODE (src_reg) == REG
3789 && SPARC_FP_REG_P (REGNO (src_reg)))
3790 src_is_freg = true;
3792 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3793 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3794 && arith_operand (src, GET_MODE (src))
3795 && ! src_is_freg)
3797 if (TARGET_ARCH64)
3798 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3799 else
3800 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3803 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3804 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3805 && arith_double_operand (src, GET_MODE (src))
3806 && ! src_is_freg)
3807 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3809 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3810 else if (! TARGET_FPU && register_operand (src, SFmode))
3811 return 1;
3813 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3814 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3815 return 1;
3817 /* If we have the 'return' instruction, anything that does not use
3818 local or output registers and can go into a delay slot wins. */
3819 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3820 return 1;
3822 /* The 'restore src1,src2,dest' pattern for SImode. */
3823 else if (GET_CODE (src) == PLUS
3824 && register_operand (XEXP (src, 0), SImode)
3825 && arith_operand (XEXP (src, 1), SImode))
3826 return 1;
3828 /* The 'restore src1,src2,dest' pattern for DImode. */
3829 else if (GET_CODE (src) == PLUS
3830 && register_operand (XEXP (src, 0), DImode)
3831 && arith_double_operand (XEXP (src, 1), DImode))
3832 return 1;
3834 /* The 'restore src1,%lo(src2),dest' pattern. */
3835 else if (GET_CODE (src) == LO_SUM
3836 && ! TARGET_CM_MEDMID
3837 && ((register_operand (XEXP (src, 0), SImode)
3838 && immediate_operand (XEXP (src, 1), SImode))
3839 || (TARGET_ARCH64
3840 && register_operand (XEXP (src, 0), DImode)
3841 && immediate_operand (XEXP (src, 1), DImode))))
3842 return 1;
3844 /* The 'restore src,src,dest' pattern. */
3845 else if (GET_CODE (src) == ASHIFT
3846 && (register_operand (XEXP (src, 0), SImode)
3847 || register_operand (XEXP (src, 0), DImode))
3848 && XEXP (src, 1) == const1_rtx)
3849 return 1;
3851 return 0;
3854 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3857 eligible_for_return_delay (rtx_insn *trial)
3859 int regno;
3860 rtx pat;
3862 /* If the function uses __builtin_eh_return, the eh_return machinery
3863 occupies the delay slot. */
3864 if (crtl->calls_eh_return)
3865 return 0;
3867 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3868 return 0;
3870 /* In the case of a leaf or flat function, anything can go into the slot. */
3871 if (sparc_leaf_function_p || TARGET_FLAT)
3872 return 1;
3874 if (!NONJUMP_INSN_P (trial))
3875 return 0;
3877 pat = PATTERN (trial);
3878 if (GET_CODE (pat) == PARALLEL)
3880 int i;
3882 if (! TARGET_V9)
3883 return 0;
3884 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3886 rtx expr = XVECEXP (pat, 0, i);
3887 if (GET_CODE (expr) != SET)
3888 return 0;
3889 if (GET_CODE (SET_DEST (expr)) != REG)
3890 return 0;
3891 regno = REGNO (SET_DEST (expr));
3892 if (regno >= 8 && regno < 24)
3893 return 0;
3895 return !epilogue_renumber (&pat, 1);
3898 if (GET_CODE (pat) != SET)
3899 return 0;
3901 if (GET_CODE (SET_DEST (pat)) != REG)
3902 return 0;
3904 regno = REGNO (SET_DEST (pat));
3906 /* Otherwise, only operations which can be done in tandem with
3907 a `restore' or `return' insn can go into the delay slot. */
3908 if (regno >= 8 && regno < 24)
3909 return 0;
3911 /* If this instruction sets up floating point register and we have a return
3912 instruction, it can probably go in. But restore will not work
3913 with FP_REGS. */
3914 if (! SPARC_INT_REG_P (regno))
3915 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3917 return eligible_for_restore_insn (trial, true);
3920 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3923 eligible_for_sibcall_delay (rtx_insn *trial)
3925 rtx pat;
3927 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3928 return 0;
3930 if (!NONJUMP_INSN_P (trial))
3931 return 0;
3933 pat = PATTERN (trial);
3935 if (sparc_leaf_function_p || TARGET_FLAT)
3937 /* If the tail call is done using the call instruction,
3938 we have to restore %o7 in the delay slot. */
3939 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3940 return 0;
3942 /* %g1 is used to build the function address */
3943 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3944 return 0;
3946 return 1;
3949 if (GET_CODE (pat) != SET)
3950 return 0;
3952 /* Otherwise, only operations which can be done in tandem with
3953 a `restore' insn can go into the delay slot. */
3954 if (GET_CODE (SET_DEST (pat)) != REG
3955 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3956 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3957 return 0;
3959 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3960 in most cases. */
3961 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3962 return 0;
3964 return eligible_for_restore_insn (trial, false);
3967 /* Determine if it's legal to put X into the constant pool. This
3968 is not possible if X contains the address of a symbol that is
3969 not constant (TLS) or not known at final link time (PIC). */
3971 static bool
3972 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
3974 switch (GET_CODE (x))
3976 case CONST_INT:
3977 case CONST_WIDE_INT:
3978 case CONST_DOUBLE:
3979 case CONST_VECTOR:
3980 /* Accept all non-symbolic constants. */
3981 return false;
3983 case LABEL_REF:
3984 /* Labels are OK iff we are non-PIC. */
3985 return flag_pic != 0;
3987 case SYMBOL_REF:
3988 /* 'Naked' TLS symbol references are never OK,
3989 non-TLS symbols are OK iff we are non-PIC. */
3990 if (SYMBOL_REF_TLS_MODEL (x))
3991 return true;
3992 else
3993 return flag_pic != 0;
3995 case CONST:
3996 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3997 case PLUS:
3998 case MINUS:
3999 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4000 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4001 case UNSPEC:
4002 return true;
4003 default:
4004 gcc_unreachable ();
4008 /* Global Offset Table support. */
4009 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4010 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
4012 /* Return the SYMBOL_REF for the Global Offset Table. */
4014 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
4016 static rtx
4017 sparc_got (void)
4019 if (!sparc_got_symbol)
4020 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4022 return sparc_got_symbol;
4025 /* Ensure that we are not using patterns that are not OK with PIC. */
4028 check_pic (int i)
4030 rtx op;
4032 switch (flag_pic)
4034 case 1:
4035 op = recog_data.operand[i];
4036 gcc_assert (GET_CODE (op) != SYMBOL_REF
4037 && (GET_CODE (op) != CONST
4038 || (GET_CODE (XEXP (op, 0)) == MINUS
4039 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4040 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4041 /* fallthrough */
4042 case 2:
4043 default:
4044 return 1;
4048 /* Return true if X is an address which needs a temporary register when
4049 reloaded while generating PIC code. */
4052 pic_address_needs_scratch (rtx x)
4054 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4055 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
4056 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4057 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4058 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
4059 return 1;
4061 return 0;
4064 /* Determine if a given RTX is a valid constant. We already know this
4065 satisfies CONSTANT_P. */
4067 static bool
4068 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4070 switch (GET_CODE (x))
4072 case CONST:
4073 case SYMBOL_REF:
4074 if (sparc_tls_referenced_p (x))
4075 return false;
4076 break;
4078 case CONST_DOUBLE:
4079 /* Floating point constants are generally not ok.
4080 The only exception is 0.0 and all-ones in VIS. */
4081 if (TARGET_VIS
4082 && SCALAR_FLOAT_MODE_P (mode)
4083 && (const_zero_operand (x, mode)
4084 || const_all_ones_operand (x, mode)))
4085 return true;
4087 return false;
4089 case CONST_VECTOR:
4090 /* Vector constants are generally not ok.
4091 The only exception is 0 or -1 in VIS. */
4092 if (TARGET_VIS
4093 && (const_zero_operand (x, mode)
4094 || const_all_ones_operand (x, mode)))
4095 return true;
4097 return false;
4099 default:
4100 break;
4103 return true;
4106 /* Determine if a given RTX is a valid constant address. */
4108 bool
4109 constant_address_p (rtx x)
4111 switch (GET_CODE (x))
4113 case LABEL_REF:
4114 case CONST_INT:
4115 case HIGH:
4116 return true;
4118 case CONST:
4119 if (flag_pic && pic_address_needs_scratch (x))
4120 return false;
4121 return sparc_legitimate_constant_p (Pmode, x);
4123 case SYMBOL_REF:
4124 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4126 default:
4127 return false;
4131 /* Nonzero if the constant value X is a legitimate general operand
4132 when generating PIC code. It is given that flag_pic is on and
4133 that X satisfies CONSTANT_P. */
4135 bool
4136 legitimate_pic_operand_p (rtx x)
4138 if (pic_address_needs_scratch (x))
4139 return false;
4140 if (sparc_tls_referenced_p (x))
4141 return false;
4142 return true;
4145 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4146 (CONST_INT_P (X) \
4147 && INTVAL (X) >= -0x1000 \
4148 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4150 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4151 (CONST_INT_P (X) \
4152 && INTVAL (X) >= -0x1000 \
4153 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4155 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4157 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4158 ordinarily. This changes a bit when generating PIC. */
4160 static bool
4161 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4163 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4165 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4166 rs1 = addr;
4167 else if (GET_CODE (addr) == PLUS)
4169 rs1 = XEXP (addr, 0);
4170 rs2 = XEXP (addr, 1);
4172 /* Canonicalize. REG comes first, if there are no regs,
4173 LO_SUM comes first. */
4174 if (!REG_P (rs1)
4175 && GET_CODE (rs1) != SUBREG
4176 && (REG_P (rs2)
4177 || GET_CODE (rs2) == SUBREG
4178 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4180 rs1 = XEXP (addr, 1);
4181 rs2 = XEXP (addr, 0);
4184 if ((flag_pic == 1
4185 && rs1 == pic_offset_table_rtx
4186 && !REG_P (rs2)
4187 && GET_CODE (rs2) != SUBREG
4188 && GET_CODE (rs2) != LO_SUM
4189 && GET_CODE (rs2) != MEM
4190 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4191 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4192 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4193 || ((REG_P (rs1)
4194 || GET_CODE (rs1) == SUBREG)
4195 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4197 imm1 = rs2;
4198 rs2 = NULL;
4200 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4201 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4203 /* We prohibit REG + REG for TFmode when there are no quad move insns
4204 and we consequently need to split. We do this because REG+REG
4205 is not an offsettable address. If we get the situation in reload
4206 where source and destination of a movtf pattern are both MEMs with
4207 REG+REG address, then only one of them gets converted to an
4208 offsettable address. */
4209 if (mode == TFmode
4210 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4211 return 0;
4213 /* Likewise for TImode, but in all cases. */
4214 if (mode == TImode)
4215 return 0;
4217 /* We prohibit REG + REG on ARCH32 if not optimizing for
4218 DFmode/DImode because then mem_min_alignment is likely to be zero
4219 after reload and the forced split would lack a matching splitter
4220 pattern. */
4221 if (TARGET_ARCH32 && !optimize
4222 && (mode == DFmode || mode == DImode))
4223 return 0;
4225 else if (USE_AS_OFFSETABLE_LO10
4226 && GET_CODE (rs1) == LO_SUM
4227 && TARGET_ARCH64
4228 && ! TARGET_CM_MEDMID
4229 && RTX_OK_FOR_OLO10_P (rs2, mode))
4231 rs2 = NULL;
4232 imm1 = XEXP (rs1, 1);
4233 rs1 = XEXP (rs1, 0);
4234 if (!CONSTANT_P (imm1)
4235 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4236 return 0;
4239 else if (GET_CODE (addr) == LO_SUM)
4241 rs1 = XEXP (addr, 0);
4242 imm1 = XEXP (addr, 1);
4244 if (!CONSTANT_P (imm1)
4245 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4246 return 0;
4248 /* We can't allow TFmode in 32-bit mode, because an offset greater
4249 than the alignment (8) may cause the LO_SUM to overflow. */
4250 if (mode == TFmode && TARGET_ARCH32)
4251 return 0;
4253 /* During reload, accept the HIGH+LO_SUM construct generated by
4254 sparc_legitimize_reload_address. */
4255 if (reload_in_progress
4256 && GET_CODE (rs1) == HIGH
4257 && XEXP (rs1, 0) == imm1)
4258 return 1;
4260 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4261 return 1;
4262 else
4263 return 0;
4265 if (GET_CODE (rs1) == SUBREG)
4266 rs1 = SUBREG_REG (rs1);
4267 if (!REG_P (rs1))
4268 return 0;
4270 if (rs2)
4272 if (GET_CODE (rs2) == SUBREG)
4273 rs2 = SUBREG_REG (rs2);
4274 if (!REG_P (rs2))
4275 return 0;
4278 if (strict)
4280 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4281 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4282 return 0;
4284 else
4286 if ((! SPARC_INT_REG_P (REGNO (rs1))
4287 && REGNO (rs1) != FRAME_POINTER_REGNUM
4288 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4289 || (rs2
4290 && (! SPARC_INT_REG_P (REGNO (rs2))
4291 && REGNO (rs2) != FRAME_POINTER_REGNUM
4292 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4293 return 0;
4295 return 1;
4298 /* Return the SYMBOL_REF for the tls_get_addr function. */
4300 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4302 static rtx
4303 sparc_tls_get_addr (void)
4305 if (!sparc_tls_symbol)
4306 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4308 return sparc_tls_symbol;
4311 /* Return the Global Offset Table to be used in TLS mode. */
4313 static rtx
4314 sparc_tls_got (void)
4316 /* In PIC mode, this is just the PIC offset table. */
4317 if (flag_pic)
4319 crtl->uses_pic_offset_table = 1;
4320 return pic_offset_table_rtx;
4323 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4324 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4325 if (TARGET_SUN_TLS && TARGET_ARCH32)
4327 load_got_register ();
4328 return global_offset_table_rtx;
4331 /* In all other cases, we load a new pseudo with the GOT symbol. */
4332 return copy_to_reg (sparc_got ());
4335 /* Return true if X contains a thread-local symbol. */
4337 static bool
4338 sparc_tls_referenced_p (rtx x)
4340 if (!TARGET_HAVE_TLS)
4341 return false;
4343 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4344 x = XEXP (XEXP (x, 0), 0);
4346 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4347 return true;
4349 /* That's all we handle in sparc_legitimize_tls_address for now. */
4350 return false;
4353 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4354 this (thread-local) address. */
4356 static rtx
4357 sparc_legitimize_tls_address (rtx addr)
4359 rtx temp1, temp2, temp3, ret, o0, got;
4360 rtx_insn *insn;
4362 gcc_assert (can_create_pseudo_p ());
4364 if (GET_CODE (addr) == SYMBOL_REF)
4365 switch (SYMBOL_REF_TLS_MODEL (addr))
4367 case TLS_MODEL_GLOBAL_DYNAMIC:
4368 start_sequence ();
4369 temp1 = gen_reg_rtx (SImode);
4370 temp2 = gen_reg_rtx (SImode);
4371 ret = gen_reg_rtx (Pmode);
4372 o0 = gen_rtx_REG (Pmode, 8);
4373 got = sparc_tls_got ();
4374 emit_insn (gen_tgd_hi22 (temp1, addr));
4375 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4376 if (TARGET_ARCH32)
4378 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4379 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4380 addr, const1_rtx));
4382 else
4384 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4385 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4386 addr, const1_rtx));
4388 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4389 insn = get_insns ();
4390 end_sequence ();
4391 emit_libcall_block (insn, ret, o0, addr);
4392 break;
4394 case TLS_MODEL_LOCAL_DYNAMIC:
4395 start_sequence ();
4396 temp1 = gen_reg_rtx (SImode);
4397 temp2 = gen_reg_rtx (SImode);
4398 temp3 = gen_reg_rtx (Pmode);
4399 ret = gen_reg_rtx (Pmode);
4400 o0 = gen_rtx_REG (Pmode, 8);
4401 got = sparc_tls_got ();
4402 emit_insn (gen_tldm_hi22 (temp1));
4403 emit_insn (gen_tldm_lo10 (temp2, temp1));
4404 if (TARGET_ARCH32)
4406 emit_insn (gen_tldm_add32 (o0, got, temp2));
4407 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4408 const1_rtx));
4410 else
4412 emit_insn (gen_tldm_add64 (o0, got, temp2));
4413 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4414 const1_rtx));
4416 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4417 insn = get_insns ();
4418 end_sequence ();
4419 emit_libcall_block (insn, temp3, o0,
4420 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4421 UNSPEC_TLSLD_BASE));
4422 temp1 = gen_reg_rtx (SImode);
4423 temp2 = gen_reg_rtx (SImode);
4424 emit_insn (gen_tldo_hix22 (temp1, addr));
4425 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4426 if (TARGET_ARCH32)
4427 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4428 else
4429 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4430 break;
4432 case TLS_MODEL_INITIAL_EXEC:
4433 temp1 = gen_reg_rtx (SImode);
4434 temp2 = gen_reg_rtx (SImode);
4435 temp3 = gen_reg_rtx (Pmode);
4436 got = sparc_tls_got ();
4437 emit_insn (gen_tie_hi22 (temp1, addr));
4438 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4439 if (TARGET_ARCH32)
4440 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4441 else
4442 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4443 if (TARGET_SUN_TLS)
4445 ret = gen_reg_rtx (Pmode);
4446 if (TARGET_ARCH32)
4447 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4448 temp3, addr));
4449 else
4450 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4451 temp3, addr));
4453 else
4454 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4455 break;
4457 case TLS_MODEL_LOCAL_EXEC:
4458 temp1 = gen_reg_rtx (Pmode);
4459 temp2 = gen_reg_rtx (Pmode);
4460 if (TARGET_ARCH32)
4462 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4463 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4465 else
4467 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4468 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4470 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4471 break;
4473 default:
4474 gcc_unreachable ();
4477 else if (GET_CODE (addr) == CONST)
4479 rtx base, offset;
4481 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4483 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4484 offset = XEXP (XEXP (addr, 0), 1);
4486 base = force_operand (base, NULL_RTX);
4487 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4488 offset = force_reg (Pmode, offset);
4489 ret = gen_rtx_PLUS (Pmode, base, offset);
4492 else
4493 gcc_unreachable (); /* for now ... */
4495 return ret;
4498 /* Legitimize PIC addresses. If the address is already position-independent,
4499 we return ORIG. Newly generated position-independent addresses go into a
4500 reg. This is REG if nonzero, otherwise we allocate register(s) as
4501 necessary. */
4503 static rtx
4504 sparc_legitimize_pic_address (rtx orig, rtx reg)
4506 bool gotdata_op = false;
4508 if (GET_CODE (orig) == SYMBOL_REF
4509 /* See the comment in sparc_expand_move. */
4510 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4512 rtx pic_ref, address;
4513 rtx_insn *insn;
4515 if (reg == 0)
4517 gcc_assert (can_create_pseudo_p ());
4518 reg = gen_reg_rtx (Pmode);
4521 if (flag_pic == 2)
4523 /* If not during reload, allocate another temp reg here for loading
4524 in the address, so that these instructions can be optimized
4525 properly. */
4526 rtx temp_reg = (! can_create_pseudo_p ()
4527 ? reg : gen_reg_rtx (Pmode));
4529 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4530 won't get confused into thinking that these two instructions
4531 are loading in the true address of the symbol. If in the
4532 future a PIC rtx exists, that should be used instead. */
4533 if (TARGET_ARCH64)
4535 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4536 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4538 else
4540 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4541 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4543 address = temp_reg;
4544 gotdata_op = true;
4546 else
4547 address = orig;
4549 crtl->uses_pic_offset_table = 1;
4550 if (gotdata_op)
4552 if (TARGET_ARCH64)
4553 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4554 pic_offset_table_rtx,
4555 address, orig));
4556 else
4557 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4558 pic_offset_table_rtx,
4559 address, orig));
4561 else
4563 pic_ref
4564 = gen_const_mem (Pmode,
4565 gen_rtx_PLUS (Pmode,
4566 pic_offset_table_rtx, address));
4567 insn = emit_move_insn (reg, pic_ref);
4570 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4571 by loop. */
4572 set_unique_reg_note (insn, REG_EQUAL, orig);
4573 return reg;
4575 else if (GET_CODE (orig) == CONST)
4577 rtx base, offset;
4579 if (GET_CODE (XEXP (orig, 0)) == PLUS
4580 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4581 return orig;
4583 if (reg == 0)
4585 gcc_assert (can_create_pseudo_p ());
4586 reg = gen_reg_rtx (Pmode);
4589 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4590 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4591 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4592 base == reg ? NULL_RTX : reg);
4594 if (GET_CODE (offset) == CONST_INT)
4596 if (SMALL_INT (offset))
4597 return plus_constant (Pmode, base, INTVAL (offset));
4598 else if (can_create_pseudo_p ())
4599 offset = force_reg (Pmode, offset);
4600 else
4601 /* If we reach here, then something is seriously wrong. */
4602 gcc_unreachable ();
4604 return gen_rtx_PLUS (Pmode, base, offset);
4606 else if (GET_CODE (orig) == LABEL_REF)
4607 /* ??? We ought to be checking that the register is live instead, in case
4608 it is eliminated. */
4609 crtl->uses_pic_offset_table = 1;
4611 return orig;
4614 /* Try machine-dependent ways of modifying an illegitimate address X
4615 to be legitimate. If we find one, return the new, valid address.
4617 OLDX is the address as it was before break_out_memory_refs was called.
4618 In some cases it is useful to look at this to decide what needs to be done.
4620 MODE is the mode of the operand pointed to by X.
4622 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4624 static rtx
4625 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4626 machine_mode mode)
4628 rtx orig_x = x;
4630 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4631 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4632 force_operand (XEXP (x, 0), NULL_RTX));
4633 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4634 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4635 force_operand (XEXP (x, 1), NULL_RTX));
4636 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4637 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4638 XEXP (x, 1));
4639 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4640 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4641 force_operand (XEXP (x, 1), NULL_RTX));
4643 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4644 return x;
4646 if (sparc_tls_referenced_p (x))
4647 x = sparc_legitimize_tls_address (x);
4648 else if (flag_pic)
4649 x = sparc_legitimize_pic_address (x, NULL_RTX);
4650 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4651 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4652 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4653 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4654 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4655 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4656 else if (GET_CODE (x) == SYMBOL_REF
4657 || GET_CODE (x) == CONST
4658 || GET_CODE (x) == LABEL_REF)
4659 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4661 return x;
4664 /* Delegitimize an address that was legitimized by the above function. */
4666 static rtx
4667 sparc_delegitimize_address (rtx x)
4669 x = delegitimize_mem_from_attrs (x);
4671 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4672 switch (XINT (XEXP (x, 1), 1))
4674 case UNSPEC_MOVE_PIC:
4675 case UNSPEC_TLSLE:
4676 x = XVECEXP (XEXP (x, 1), 0, 0);
4677 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4678 break;
4679 default:
4680 break;
4683 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4684 if (GET_CODE (x) == MINUS
4685 && REG_P (XEXP (x, 0))
4686 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4687 && GET_CODE (XEXP (x, 1)) == LO_SUM
4688 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4689 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4691 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4692 gcc_assert (GET_CODE (x) == LABEL_REF);
4695 return x;
4698 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4699 replace the input X, or the original X if no replacement is called for.
4700 The output parameter *WIN is 1 if the calling macro should goto WIN,
4701 0 if it should not.
4703 For SPARC, we wish to handle addresses by splitting them into
4704 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4705 This cuts the number of extra insns by one.
4707 Do nothing when generating PIC code and the address is a symbolic
4708 operand or requires a scratch register. */
4711 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4712 int opnum, int type,
4713 int ind_levels ATTRIBUTE_UNUSED, int *win)
4715 /* Decompose SImode constants into HIGH+LO_SUM. */
4716 if (CONSTANT_P (x)
4717 && (mode != TFmode || TARGET_ARCH64)
4718 && GET_MODE (x) == SImode
4719 && GET_CODE (x) != LO_SUM
4720 && GET_CODE (x) != HIGH
4721 && sparc_cmodel <= CM_MEDLOW
4722 && !(flag_pic
4723 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4725 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4726 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4727 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4728 opnum, (enum reload_type)type);
4729 *win = 1;
4730 return x;
4733 /* We have to recognize what we have already generated above. */
4734 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4736 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4737 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4738 opnum, (enum reload_type)type);
4739 *win = 1;
4740 return x;
4743 *win = 0;
4744 return x;
4747 /* Return true if ADDR (a legitimate address expression)
4748 has an effect that depends on the machine mode it is used for.
4750 In PIC mode,
4752 (mem:HI [%l7+a])
4754 is not equivalent to
4756 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4758 because [%l7+a+1] is interpreted as the address of (a+1). */
4761 static bool
4762 sparc_mode_dependent_address_p (const_rtx addr,
4763 addr_space_t as ATTRIBUTE_UNUSED)
4765 if (flag_pic && GET_CODE (addr) == PLUS)
4767 rtx op0 = XEXP (addr, 0);
4768 rtx op1 = XEXP (addr, 1);
4769 if (op0 == pic_offset_table_rtx
4770 && symbolic_operand (op1, VOIDmode))
4771 return true;
4774 return false;
4777 #ifdef HAVE_GAS_HIDDEN
4778 # define USE_HIDDEN_LINKONCE 1
4779 #else
4780 # define USE_HIDDEN_LINKONCE 0
4781 #endif
4783 static void
4784 get_pc_thunk_name (char name[32], unsigned int regno)
4786 const char *reg_name = reg_names[regno];
4788 /* Skip the leading '%' as that cannot be used in a
4789 symbol name. */
4790 reg_name += 1;
4792 if (USE_HIDDEN_LINKONCE)
4793 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4794 else
4795 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4798 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4800 static rtx
4801 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4803 int orig_flag_pic = flag_pic;
4804 rtx insn;
4806 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4807 flag_pic = 0;
4808 if (TARGET_ARCH64)
4809 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4810 else
4811 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4812 flag_pic = orig_flag_pic;
4814 return insn;
4817 /* Emit code to load the GOT register. */
4819 void
4820 load_got_register (void)
4822 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4823 if (!global_offset_table_rtx)
4824 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4826 if (TARGET_VXWORKS_RTP)
4827 emit_insn (gen_vxworks_load_got ());
4828 else
4830 /* The GOT symbol is subject to a PC-relative relocation so we need a
4831 helper function to add the PC value and thus get the final value. */
4832 if (!got_helper_rtx)
4834 char name[32];
4835 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4836 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4839 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4840 got_helper_rtx,
4841 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4844 /* Need to emit this whether or not we obey regdecls,
4845 since setjmp/longjmp can cause life info to screw up.
4846 ??? In the case where we don't obey regdecls, this is not sufficient
4847 since we may not fall out the bottom. */
4848 emit_use (global_offset_table_rtx);
4851 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4852 address of the call target. */
4854 void
4855 sparc_emit_call_insn (rtx pat, rtx addr)
4857 rtx_insn *insn;
4859 insn = emit_call_insn (pat);
4861 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4862 if (TARGET_VXWORKS_RTP
4863 && flag_pic
4864 && GET_CODE (addr) == SYMBOL_REF
4865 && (SYMBOL_REF_DECL (addr)
4866 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4867 : !SYMBOL_REF_LOCAL_P (addr)))
4869 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4870 crtl->uses_pic_offset_table = 1;
4874 /* Return 1 if RTX is a MEM which is known to be aligned to at
4875 least a DESIRED byte boundary. */
4878 mem_min_alignment (rtx mem, int desired)
4880 rtx addr, base, offset;
4882 /* If it's not a MEM we can't accept it. */
4883 if (GET_CODE (mem) != MEM)
4884 return 0;
4886 /* Obviously... */
4887 if (!TARGET_UNALIGNED_DOUBLES
4888 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4889 return 1;
4891 /* ??? The rest of the function predates MEM_ALIGN so
4892 there is probably a bit of redundancy. */
4893 addr = XEXP (mem, 0);
4894 base = offset = NULL_RTX;
4895 if (GET_CODE (addr) == PLUS)
4897 if (GET_CODE (XEXP (addr, 0)) == REG)
4899 base = XEXP (addr, 0);
4901 /* What we are saying here is that if the base
4902 REG is aligned properly, the compiler will make
4903 sure any REG based index upon it will be so
4904 as well. */
4905 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4906 offset = XEXP (addr, 1);
4907 else
4908 offset = const0_rtx;
4911 else if (GET_CODE (addr) == REG)
4913 base = addr;
4914 offset = const0_rtx;
4917 if (base != NULL_RTX)
4919 int regno = REGNO (base);
4921 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4923 /* Check if the compiler has recorded some information
4924 about the alignment of the base REG. If reload has
4925 completed, we already matched with proper alignments.
4926 If not running global_alloc, reload might give us
4927 unaligned pointer to local stack though. */
4928 if (((cfun != 0
4929 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4930 || (optimize && reload_completed))
4931 && (INTVAL (offset) & (desired - 1)) == 0)
4932 return 1;
4934 else
4936 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4937 return 1;
4940 else if (! TARGET_UNALIGNED_DOUBLES
4941 || CONSTANT_P (addr)
4942 || GET_CODE (addr) == LO_SUM)
4944 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4945 is true, in which case we can only assume that an access is aligned if
4946 it is to a constant address, or the address involves a LO_SUM. */
4947 return 1;
4950 /* An obviously unaligned address. */
4951 return 0;
4955 /* Vectors to keep interesting information about registers where it can easily
4956 be got. We used to use the actual mode value as the bit number, but there
4957 are more than 32 modes now. Instead we use two tables: one indexed by
4958 hard register number, and one indexed by mode. */
4960 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4961 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4962 mapped into one sparc_mode_class mode. */
4964 enum sparc_mode_class {
4965 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4966 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4967 CC_MODE, CCFP_MODE
4970 /* Modes for single-word and smaller quantities. */
4971 #define S_MODES \
4972 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4974 /* Modes for double-word and smaller quantities. */
4975 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
4977 /* Modes for quad-word and smaller quantities. */
4978 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4980 /* Modes for 8-word and smaller quantities. */
4981 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4983 /* Modes for single-float quantities. */
4984 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4986 /* Modes for double-float and smaller quantities. */
4987 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
4989 /* Modes for quad-float and smaller quantities. */
4990 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4992 /* Modes for quad-float pairs and smaller quantities. */
4993 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4995 /* Modes for double-float only quantities. */
4996 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4998 /* Modes for quad-float and double-float only quantities. */
4999 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5001 /* Modes for quad-float pairs and double-float only quantities. */
5002 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5004 /* Modes for condition codes. */
5005 #define CC_MODES (1 << (int) CC_MODE)
5006 #define CCFP_MODES (1 << (int) CCFP_MODE)
5008 /* Value is 1 if register/mode pair is acceptable on sparc.
5010 The funny mixture of D and T modes is because integer operations
5011 do not specially operate on tetra quantities, so non-quad-aligned
5012 registers can hold quadword quantities (except %o4 and %i4 because
5013 they cross fixed registers).
5015 ??? Note that, despite the settings, non-double-aligned parameter
5016 registers can hold double-word quantities in 32-bit mode. */
5018 /* This points to either the 32-bit or the 64-bit version. */
5019 static const int *hard_regno_mode_classes;
5021 static const int hard_32bit_mode_classes[] = {
5022 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5023 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5024 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5025 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5027 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5028 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5029 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5030 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5032 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5033 and none can hold SFmode/SImode values. */
5034 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5035 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5036 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5037 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5039 /* %fcc[0123] */
5040 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5042 /* %icc, %sfp, %gsr */
5043 CC_MODES, 0, D_MODES
5046 static const int hard_64bit_mode_classes[] = {
5047 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5048 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5049 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5050 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5052 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5053 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5054 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5055 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5057 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5058 and none can hold SFmode/SImode values. */
5059 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5060 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5061 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5062 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5064 /* %fcc[0123] */
5065 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5067 /* %icc, %sfp, %gsr */
5068 CC_MODES, 0, D_MODES
5071 static int sparc_mode_class [NUM_MACHINE_MODES];
5073 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5075 static void
5076 sparc_init_modes (void)
5078 int i;
5080 for (i = 0; i < NUM_MACHINE_MODES; i++)
5082 machine_mode m = (machine_mode) i;
5083 unsigned int size = GET_MODE_SIZE (m);
5085 switch (GET_MODE_CLASS (m))
5087 case MODE_INT:
5088 case MODE_PARTIAL_INT:
5089 case MODE_COMPLEX_INT:
5090 if (size < 4)
5091 sparc_mode_class[i] = 1 << (int) H_MODE;
5092 else if (size == 4)
5093 sparc_mode_class[i] = 1 << (int) S_MODE;
5094 else if (size == 8)
5095 sparc_mode_class[i] = 1 << (int) D_MODE;
5096 else if (size == 16)
5097 sparc_mode_class[i] = 1 << (int) T_MODE;
5098 else if (size == 32)
5099 sparc_mode_class[i] = 1 << (int) O_MODE;
5100 else
5101 sparc_mode_class[i] = 0;
5102 break;
5103 case MODE_VECTOR_INT:
5104 if (size == 4)
5105 sparc_mode_class[i] = 1 << (int) SF_MODE;
5106 else if (size == 8)
5107 sparc_mode_class[i] = 1 << (int) DF_MODE;
5108 else
5109 sparc_mode_class[i] = 0;
5110 break;
5111 case MODE_FLOAT:
5112 case MODE_COMPLEX_FLOAT:
5113 if (size == 4)
5114 sparc_mode_class[i] = 1 << (int) SF_MODE;
5115 else if (size == 8)
5116 sparc_mode_class[i] = 1 << (int) DF_MODE;
5117 else if (size == 16)
5118 sparc_mode_class[i] = 1 << (int) TF_MODE;
5119 else if (size == 32)
5120 sparc_mode_class[i] = 1 << (int) OF_MODE;
5121 else
5122 sparc_mode_class[i] = 0;
5123 break;
5124 case MODE_CC:
5125 if (m == CCFPmode || m == CCFPEmode)
5126 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5127 else
5128 sparc_mode_class[i] = 1 << (int) CC_MODE;
5129 break;
5130 default:
5131 sparc_mode_class[i] = 0;
5132 break;
5136 if (TARGET_ARCH64)
5137 hard_regno_mode_classes = hard_64bit_mode_classes;
5138 else
5139 hard_regno_mode_classes = hard_32bit_mode_classes;
5141 /* Initialize the array used by REGNO_REG_CLASS. */
5142 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5144 if (i < 16 && TARGET_V8PLUS)
5145 sparc_regno_reg_class[i] = I64_REGS;
5146 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5147 sparc_regno_reg_class[i] = GENERAL_REGS;
5148 else if (i < 64)
5149 sparc_regno_reg_class[i] = FP_REGS;
5150 else if (i < 96)
5151 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5152 else if (i < 100)
5153 sparc_regno_reg_class[i] = FPCC_REGS;
5154 else
5155 sparc_regno_reg_class[i] = NO_REGS;
5159 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5161 static inline bool
5162 save_global_or_fp_reg_p (unsigned int regno,
5163 int leaf_function ATTRIBUTE_UNUSED)
5165 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
5168 /* Return whether the return address register (%i7) is needed. */
5170 static inline bool
5171 return_addr_reg_needed_p (int leaf_function)
5173 /* If it is live, for example because of __builtin_return_address (0). */
5174 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5175 return true;
5177 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5178 if (!leaf_function
5179 /* Loading the GOT register clobbers %o7. */
5180 || crtl->uses_pic_offset_table
5181 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5182 return true;
5184 return false;
5187 /* Return whether REGNO, a local or in register, must be saved/restored. */
5189 static bool
5190 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5192 /* General case: call-saved registers live at some point. */
5193 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
5194 return true;
5196 /* Frame pointer register (%fp) if needed. */
5197 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5198 return true;
5200 /* Return address register (%i7) if needed. */
5201 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5202 return true;
5204 /* GOT register (%l7) if needed. */
5205 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
5206 return true;
5208 /* If the function accesses prior frames, the frame pointer and the return
5209 address of the previous frame must be saved on the stack. */
5210 if (crtl->accesses_prior_frames
5211 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5212 return true;
5214 return false;
5217 /* Compute the frame size required by the function. This function is called
5218 during the reload pass and also by sparc_expand_prologue. */
5220 HOST_WIDE_INT
5221 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5223 HOST_WIDE_INT frame_size, apparent_frame_size;
5224 int args_size, n_global_fp_regs = 0;
5225 bool save_local_in_regs_p = false;
5226 unsigned int i;
5228 /* If the function allocates dynamic stack space, the dynamic offset is
5229 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5230 if (leaf_function && !cfun->calls_alloca)
5231 args_size = 0;
5232 else
5233 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5235 /* Calculate space needed for global registers. */
5236 if (TARGET_ARCH64)
5238 for (i = 0; i < 8; i++)
5239 if (save_global_or_fp_reg_p (i, 0))
5240 n_global_fp_regs += 2;
5242 else
5244 for (i = 0; i < 8; i += 2)
5245 if (save_global_or_fp_reg_p (i, 0)
5246 || save_global_or_fp_reg_p (i + 1, 0))
5247 n_global_fp_regs += 2;
5250 /* In the flat window model, find out which local and in registers need to
5251 be saved. We don't reserve space in the current frame for them as they
5252 will be spilled into the register window save area of the caller's frame.
5253 However, as soon as we use this register window save area, we must create
5254 that of the current frame to make it the live one. */
5255 if (TARGET_FLAT)
5256 for (i = 16; i < 32; i++)
5257 if (save_local_or_in_reg_p (i, leaf_function))
5259 save_local_in_regs_p = true;
5260 break;
5263 /* Calculate space needed for FP registers. */
5264 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5265 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5266 n_global_fp_regs += 2;
5268 if (size == 0
5269 && n_global_fp_regs == 0
5270 && args_size == 0
5271 && !save_local_in_regs_p)
5272 frame_size = apparent_frame_size = 0;
5273 else
5275 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
5276 apparent_frame_size = ROUND_UP (size - STARTING_FRAME_OFFSET, 8);
5277 apparent_frame_size += n_global_fp_regs * 4;
5279 /* We need to add the size of the outgoing argument area. */
5280 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5282 /* And that of the register window save area. */
5283 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5285 /* Finally, bump to the appropriate alignment. */
5286 frame_size = SPARC_STACK_ALIGN (frame_size);
5289 /* Set up values for use in prologue and epilogue. */
5290 sparc_frame_size = frame_size;
5291 sparc_apparent_frame_size = apparent_frame_size;
5292 sparc_n_global_fp_regs = n_global_fp_regs;
5293 sparc_save_local_in_regs_p = save_local_in_regs_p;
5295 return frame_size;
5298 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5301 sparc_initial_elimination_offset (int to)
5303 int offset;
5305 if (to == STACK_POINTER_REGNUM)
5306 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5307 else
5308 offset = 0;
5310 offset += SPARC_STACK_BIAS;
5311 return offset;
5314 /* Output any necessary .register pseudo-ops. */
5316 void
5317 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5319 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5320 int i;
5322 if (TARGET_ARCH32)
5323 return;
5325 /* Check if %g[2367] were used without
5326 .register being printed for them already. */
5327 for (i = 2; i < 8; i++)
5329 if (df_regs_ever_live_p (i)
5330 && ! sparc_hard_reg_printed [i])
5332 sparc_hard_reg_printed [i] = 1;
5333 /* %g7 is used as TLS base register, use #ignore
5334 for it instead of #scratch. */
5335 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5336 i == 7 ? "ignore" : "scratch");
5338 if (i == 3) i = 5;
5340 #endif
5343 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5345 #if PROBE_INTERVAL > 4096
5346 #error Cannot use indexed addressing mode for stack probing
5347 #endif
5349 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5350 inclusive. These are offsets from the current stack pointer.
5352 Note that we don't use the REG+REG addressing mode for the probes because
5353 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5354 so the advantages of having a single code win here. */
5356 static void
5357 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5359 rtx g1 = gen_rtx_REG (Pmode, 1);
5361 /* See if we have a constant small number of probes to generate. If so,
5362 that's the easy case. */
5363 if (size <= PROBE_INTERVAL)
5365 emit_move_insn (g1, GEN_INT (first));
5366 emit_insn (gen_rtx_SET (g1,
5367 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5368 emit_stack_probe (plus_constant (Pmode, g1, -size));
5371 /* The run-time loop is made up of 9 insns in the generic case while the
5372 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5373 else if (size <= 4 * PROBE_INTERVAL)
5375 HOST_WIDE_INT i;
5377 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5378 emit_insn (gen_rtx_SET (g1,
5379 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5380 emit_stack_probe (g1);
5382 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5383 it exceeds SIZE. If only two probes are needed, this will not
5384 generate any code. Then probe at FIRST + SIZE. */
5385 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5387 emit_insn (gen_rtx_SET (g1,
5388 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5389 emit_stack_probe (g1);
5392 emit_stack_probe (plus_constant (Pmode, g1,
5393 (i - PROBE_INTERVAL) - size));
5396 /* Otherwise, do the same as above, but in a loop. Note that we must be
5397 extra careful with variables wrapping around because we might be at
5398 the very top (or the very bottom) of the address space and we have
5399 to be able to handle this case properly; in particular, we use an
5400 equality test for the loop condition. */
5401 else
5403 HOST_WIDE_INT rounded_size;
5404 rtx g4 = gen_rtx_REG (Pmode, 4);
5406 emit_move_insn (g1, GEN_INT (first));
5409 /* Step 1: round SIZE to the previous multiple of the interval. */
5411 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5412 emit_move_insn (g4, GEN_INT (rounded_size));
5415 /* Step 2: compute initial and final value of the loop counter. */
5417 /* TEST_ADDR = SP + FIRST. */
5418 emit_insn (gen_rtx_SET (g1,
5419 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5421 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5422 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5425 /* Step 3: the loop
5427 while (TEST_ADDR != LAST_ADDR)
5429 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5430 probe at TEST_ADDR
5433 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5434 until it is equal to ROUNDED_SIZE. */
5436 if (TARGET_ARCH64)
5437 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5438 else
5439 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5442 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5443 that SIZE is equal to ROUNDED_SIZE. */
5445 if (size != rounded_size)
5446 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5449 /* Make sure nothing is scheduled before we are done. */
5450 emit_insn (gen_blockage ());
5453 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5454 absolute addresses. */
5456 const char *
5457 output_probe_stack_range (rtx reg1, rtx reg2)
5459 static int labelno = 0;
5460 char loop_lab[32];
5461 rtx xops[2];
5463 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5465 /* Loop. */
5466 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5468 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5469 xops[0] = reg1;
5470 xops[1] = GEN_INT (-PROBE_INTERVAL);
5471 output_asm_insn ("add\t%0, %1, %0", xops);
5473 /* Test if TEST_ADDR == LAST_ADDR. */
5474 xops[1] = reg2;
5475 output_asm_insn ("cmp\t%0, %1", xops);
5477 /* Probe at TEST_ADDR and branch. */
5478 if (TARGET_ARCH64)
5479 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5480 else
5481 fputs ("\tbne\t", asm_out_file);
5482 assemble_name_raw (asm_out_file, loop_lab);
5483 fputc ('\n', asm_out_file);
5484 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5485 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5487 return "";
5490 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5491 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5492 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5493 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5494 the action to be performed if it returns false. Return the new offset. */
5496 typedef bool (*sorr_pred_t) (unsigned int, int);
5497 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5499 static int
5500 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5501 int offset, int leaf_function, sorr_pred_t save_p,
5502 sorr_act_t action_true, sorr_act_t action_false)
5504 unsigned int i;
5505 rtx mem;
5506 rtx_insn *insn;
5508 if (TARGET_ARCH64 && high <= 32)
5510 int fp_offset = -1;
5512 for (i = low; i < high; i++)
5514 if (save_p (i, leaf_function))
5516 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5517 base, offset));
5518 if (action_true == SORR_SAVE)
5520 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5521 RTX_FRAME_RELATED_P (insn) = 1;
5523 else /* action_true == SORR_RESTORE */
5525 /* The frame pointer must be restored last since its old
5526 value may be used as base address for the frame. This
5527 is problematic in 64-bit mode only because of the lack
5528 of double-word load instruction. */
5529 if (i == HARD_FRAME_POINTER_REGNUM)
5530 fp_offset = offset;
5531 else
5532 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5534 offset += 8;
5536 else if (action_false == SORR_ADVANCE)
5537 offset += 8;
5540 if (fp_offset >= 0)
5542 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5543 emit_move_insn (hard_frame_pointer_rtx, mem);
5546 else
5548 for (i = low; i < high; i += 2)
5550 bool reg0 = save_p (i, leaf_function);
5551 bool reg1 = save_p (i + 1, leaf_function);
5552 machine_mode mode;
5553 int regno;
5555 if (reg0 && reg1)
5557 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5558 regno = i;
5560 else if (reg0)
5562 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5563 regno = i;
5565 else if (reg1)
5567 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5568 regno = i + 1;
5569 offset += 4;
5571 else
5573 if (action_false == SORR_ADVANCE)
5574 offset += 8;
5575 continue;
5578 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5579 if (action_true == SORR_SAVE)
5581 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5582 RTX_FRAME_RELATED_P (insn) = 1;
5583 if (mode == DImode)
5585 rtx set1, set2;
5586 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5587 offset));
5588 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5589 RTX_FRAME_RELATED_P (set1) = 1;
5591 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5592 offset + 4));
5593 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5594 RTX_FRAME_RELATED_P (set2) = 1;
5595 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5596 gen_rtx_PARALLEL (VOIDmode,
5597 gen_rtvec (2, set1, set2)));
5600 else /* action_true == SORR_RESTORE */
5601 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5603 /* Bump and round down to double word
5604 in case we already bumped by 4. */
5605 offset = ROUND_DOWN (offset + 8, 8);
5609 return offset;
5612 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5614 static rtx
5615 emit_adjust_base_to_offset (rtx base, int offset)
5617 /* ??? This might be optimized a little as %g1 might already have a
5618 value close enough that a single add insn will do. */
5619 /* ??? Although, all of this is probably only a temporary fix because
5620 if %g1 can hold a function result, then sparc_expand_epilogue will
5621 lose (the result will be clobbered). */
5622 rtx new_base = gen_rtx_REG (Pmode, 1);
5623 emit_move_insn (new_base, GEN_INT (offset));
5624 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5625 return new_base;
5628 /* Emit code to save/restore call-saved global and FP registers. */
5630 static void
5631 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5633 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5635 base = emit_adjust_base_to_offset (base, offset);
5636 offset = 0;
5639 offset
5640 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5641 save_global_or_fp_reg_p, action, SORR_NONE);
5642 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5643 save_global_or_fp_reg_p, action, SORR_NONE);
5646 /* Emit code to save/restore call-saved local and in registers. */
5648 static void
5649 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5651 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5653 base = emit_adjust_base_to_offset (base, offset);
5654 offset = 0;
5657 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5658 save_local_or_in_reg_p, action, SORR_ADVANCE);
5661 /* Emit a window_save insn. */
5663 static rtx_insn *
5664 emit_window_save (rtx increment)
5666 rtx_insn *insn = emit_insn (gen_window_save (increment));
5667 RTX_FRAME_RELATED_P (insn) = 1;
5669 /* The incoming return address (%o7) is saved in %i7. */
5670 add_reg_note (insn, REG_CFA_REGISTER,
5671 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5672 gen_rtx_REG (Pmode,
5673 INCOMING_RETURN_ADDR_REGNUM)));
5675 /* The window save event. */
5676 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5678 /* The CFA is %fp, the hard frame pointer. */
5679 add_reg_note (insn, REG_CFA_DEF_CFA,
5680 plus_constant (Pmode, hard_frame_pointer_rtx,
5681 INCOMING_FRAME_SP_OFFSET));
5683 return insn;
5686 /* Generate an increment for the stack pointer. */
5688 static rtx
5689 gen_stack_pointer_inc (rtx increment)
5691 return gen_rtx_SET (stack_pointer_rtx,
5692 gen_rtx_PLUS (Pmode,
5693 stack_pointer_rtx,
5694 increment));
5697 /* Expand the function prologue. The prologue is responsible for reserving
5698 storage for the frame, saving the call-saved registers and loading the
5699 GOT register if needed. */
5701 void
5702 sparc_expand_prologue (void)
5704 HOST_WIDE_INT size;
5705 rtx_insn *insn;
5707 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5708 on the final value of the flag means deferring the prologue/epilogue
5709 expansion until just before the second scheduling pass, which is too
5710 late to emit multiple epilogues or return insns.
5712 Of course we are making the assumption that the value of the flag
5713 will not change between now and its final value. Of the three parts
5714 of the formula, only the last one can reasonably vary. Let's take a
5715 closer look, after assuming that the first two ones are set to true
5716 (otherwise the last value is effectively silenced).
5718 If only_leaf_regs_used returns false, the global predicate will also
5719 be false so the actual frame size calculated below will be positive.
5720 As a consequence, the save_register_window insn will be emitted in
5721 the instruction stream; now this insn explicitly references %fp
5722 which is not a leaf register so only_leaf_regs_used will always
5723 return false subsequently.
5725 If only_leaf_regs_used returns true, we hope that the subsequent
5726 optimization passes won't cause non-leaf registers to pop up. For
5727 example, the regrename pass has special provisions to not rename to
5728 non-leaf registers in a leaf function. */
5729 sparc_leaf_function_p
5730 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5732 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5734 if (flag_stack_usage_info)
5735 current_function_static_stack_size = size;
5737 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5739 if (crtl->is_leaf && !cfun->calls_alloca)
5741 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
5742 sparc_emit_probe_stack_range (get_stack_check_protect (),
5743 size - get_stack_check_protect ());
5745 else if (size > 0)
5746 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
5749 if (size == 0)
5750 ; /* do nothing. */
5751 else if (sparc_leaf_function_p)
5753 rtx size_int_rtx = GEN_INT (-size);
5755 if (size <= 4096)
5756 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5757 else if (size <= 8192)
5759 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5760 RTX_FRAME_RELATED_P (insn) = 1;
5762 /* %sp is still the CFA register. */
5763 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5765 else
5767 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5768 emit_move_insn (size_rtx, size_int_rtx);
5769 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5770 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5771 gen_stack_pointer_inc (size_int_rtx));
5774 RTX_FRAME_RELATED_P (insn) = 1;
5776 else
5778 rtx size_int_rtx = GEN_INT (-size);
5780 if (size <= 4096)
5781 emit_window_save (size_int_rtx);
5782 else if (size <= 8192)
5784 emit_window_save (GEN_INT (-4096));
5786 /* %sp is not the CFA register anymore. */
5787 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5789 /* Make sure no %fp-based store is issued until after the frame is
5790 established. The offset between the frame pointer and the stack
5791 pointer is calculated relative to the value of the stack pointer
5792 at the end of the function prologue, and moving instructions that
5793 access the stack via the frame pointer between the instructions
5794 that decrement the stack pointer could result in accessing the
5795 register window save area, which is volatile. */
5796 emit_insn (gen_frame_blockage ());
5798 else
5800 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5801 emit_move_insn (size_rtx, size_int_rtx);
5802 emit_window_save (size_rtx);
5806 if (sparc_leaf_function_p)
5808 sparc_frame_base_reg = stack_pointer_rtx;
5809 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5811 else
5813 sparc_frame_base_reg = hard_frame_pointer_rtx;
5814 sparc_frame_base_offset = SPARC_STACK_BIAS;
5817 if (sparc_n_global_fp_regs > 0)
5818 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5819 sparc_frame_base_offset
5820 - sparc_apparent_frame_size,
5821 SORR_SAVE);
5823 /* Load the GOT register if needed. */
5824 if (crtl->uses_pic_offset_table)
5825 load_got_register ();
5827 /* Advertise that the data calculated just above are now valid. */
5828 sparc_prologue_data_valid_p = true;
5831 /* Expand the function prologue. The prologue is responsible for reserving
5832 storage for the frame, saving the call-saved registers and loading the
5833 GOT register if needed. */
5835 void
5836 sparc_flat_expand_prologue (void)
5838 HOST_WIDE_INT size;
5839 rtx_insn *insn;
5841 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5843 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5845 if (flag_stack_usage_info)
5846 current_function_static_stack_size = size;
5848 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5850 if (crtl->is_leaf && !cfun->calls_alloca)
5852 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
5853 sparc_emit_probe_stack_range (get_stack_check_protect (),
5854 size - get_stack_check_protect ());
5856 else if (size > 0)
5857 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
5860 if (sparc_save_local_in_regs_p)
5861 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5862 SORR_SAVE);
5864 if (size == 0)
5865 ; /* do nothing. */
5866 else
5868 rtx size_int_rtx, size_rtx;
5870 size_rtx = size_int_rtx = GEN_INT (-size);
5872 /* We establish the frame (i.e. decrement the stack pointer) first, even
5873 if we use a frame pointer, because we cannot clobber any call-saved
5874 registers, including the frame pointer, if we haven't created a new
5875 register save area, for the sake of compatibility with the ABI. */
5876 if (size <= 4096)
5877 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5878 else if (size <= 8192 && !frame_pointer_needed)
5880 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5881 RTX_FRAME_RELATED_P (insn) = 1;
5882 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5884 else
5886 size_rtx = gen_rtx_REG (Pmode, 1);
5887 emit_move_insn (size_rtx, size_int_rtx);
5888 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5889 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5890 gen_stack_pointer_inc (size_int_rtx));
5892 RTX_FRAME_RELATED_P (insn) = 1;
5894 /* Ensure nothing is scheduled until after the frame is established. */
5895 emit_insn (gen_blockage ());
5897 if (frame_pointer_needed)
5899 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
5900 gen_rtx_MINUS (Pmode,
5901 stack_pointer_rtx,
5902 size_rtx)));
5903 RTX_FRAME_RELATED_P (insn) = 1;
5905 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5906 gen_rtx_SET (hard_frame_pointer_rtx,
5907 plus_constant (Pmode, stack_pointer_rtx,
5908 size)));
5911 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5913 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5914 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5916 insn = emit_move_insn (i7, o7);
5917 RTX_FRAME_RELATED_P (insn) = 1;
5919 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
5921 /* Prevent this instruction from ever being considered dead,
5922 even if this function has no epilogue. */
5923 emit_use (i7);
5927 if (frame_pointer_needed)
5929 sparc_frame_base_reg = hard_frame_pointer_rtx;
5930 sparc_frame_base_offset = SPARC_STACK_BIAS;
5932 else
5934 sparc_frame_base_reg = stack_pointer_rtx;
5935 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5938 if (sparc_n_global_fp_regs > 0)
5939 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5940 sparc_frame_base_offset
5941 - sparc_apparent_frame_size,
5942 SORR_SAVE);
5944 /* Load the GOT register if needed. */
5945 if (crtl->uses_pic_offset_table)
5946 load_got_register ();
5948 /* Advertise that the data calculated just above are now valid. */
5949 sparc_prologue_data_valid_p = true;
5952 /* This function generates the assembly code for function entry, which boils
5953 down to emitting the necessary .register directives. */
5955 static void
5956 sparc_asm_function_prologue (FILE *file)
5958 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5959 if (!TARGET_FLAT)
5960 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5962 sparc_output_scratch_registers (file);
5965 /* Expand the function epilogue, either normal or part of a sibcall.
5966 We emit all the instructions except the return or the call. */
5968 void
5969 sparc_expand_epilogue (bool for_eh)
5971 HOST_WIDE_INT size = sparc_frame_size;
5973 if (cfun->calls_alloca)
5974 emit_insn (gen_frame_blockage ());
5976 if (sparc_n_global_fp_regs > 0)
5977 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5978 sparc_frame_base_offset
5979 - sparc_apparent_frame_size,
5980 SORR_RESTORE);
5982 if (size == 0 || for_eh)
5983 ; /* do nothing. */
5984 else if (sparc_leaf_function_p)
5986 if (size <= 4096)
5987 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5988 else if (size <= 8192)
5990 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5991 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5993 else
5995 rtx reg = gen_rtx_REG (Pmode, 1);
5996 emit_move_insn (reg, GEN_INT (size));
5997 emit_insn (gen_stack_pointer_inc (reg));
6002 /* Expand the function epilogue, either normal or part of a sibcall.
6003 We emit all the instructions except the return or the call. */
6005 void
6006 sparc_flat_expand_epilogue (bool for_eh)
6008 HOST_WIDE_INT size = sparc_frame_size;
6010 if (sparc_n_global_fp_regs > 0)
6011 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6012 sparc_frame_base_offset
6013 - sparc_apparent_frame_size,
6014 SORR_RESTORE);
6016 /* If we have a frame pointer, we'll need both to restore it before the
6017 frame is destroyed and use its current value in destroying the frame.
6018 Since we don't have an atomic way to do that in the flat window model,
6019 we save the current value into a temporary register (%g1). */
6020 if (frame_pointer_needed && !for_eh)
6021 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6023 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6024 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6025 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6027 if (sparc_save_local_in_regs_p)
6028 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6029 sparc_frame_base_offset,
6030 SORR_RESTORE);
6032 if (size == 0 || for_eh)
6033 ; /* do nothing. */
6034 else if (frame_pointer_needed)
6036 /* Make sure the frame is destroyed after everything else is done. */
6037 emit_insn (gen_blockage ());
6039 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6041 else
6043 /* Likewise. */
6044 emit_insn (gen_blockage ());
6046 if (size <= 4096)
6047 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6048 else if (size <= 8192)
6050 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6051 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6053 else
6055 rtx reg = gen_rtx_REG (Pmode, 1);
6056 emit_move_insn (reg, GEN_INT (size));
6057 emit_insn (gen_stack_pointer_inc (reg));
6062 /* Return true if it is appropriate to emit `return' instructions in the
6063 body of a function. */
6065 bool
6066 sparc_can_use_return_insn_p (void)
6068 return sparc_prologue_data_valid_p
6069 && sparc_n_global_fp_regs == 0
6070 && TARGET_FLAT
6071 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6072 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6075 /* This function generates the assembly code for function exit. */
6077 static void
6078 sparc_asm_function_epilogue (FILE *file)
6080 /* If the last two instructions of a function are "call foo; dslot;"
6081 the return address might point to the first instruction in the next
6082 function and we have to output a dummy nop for the sake of sane
6083 backtraces in such cases. This is pointless for sibling calls since
6084 the return address is explicitly adjusted. */
6086 rtx_insn *insn = get_last_insn ();
6088 rtx last_real_insn = prev_real_insn (insn);
6089 if (last_real_insn
6090 && NONJUMP_INSN_P (last_real_insn)
6091 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6092 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6094 if (last_real_insn
6095 && CALL_P (last_real_insn)
6096 && !SIBLING_CALL_P (last_real_insn))
6097 fputs("\tnop\n", file);
6099 sparc_output_deferred_case_vectors ();
6102 /* Output a 'restore' instruction. */
6104 static void
6105 output_restore (rtx pat)
6107 rtx operands[3];
6109 if (! pat)
6111 fputs ("\t restore\n", asm_out_file);
6112 return;
6115 gcc_assert (GET_CODE (pat) == SET);
6117 operands[0] = SET_DEST (pat);
6118 pat = SET_SRC (pat);
6120 switch (GET_CODE (pat))
6122 case PLUS:
6123 operands[1] = XEXP (pat, 0);
6124 operands[2] = XEXP (pat, 1);
6125 output_asm_insn (" restore %r1, %2, %Y0", operands);
6126 break;
6127 case LO_SUM:
6128 operands[1] = XEXP (pat, 0);
6129 operands[2] = XEXP (pat, 1);
6130 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6131 break;
6132 case ASHIFT:
6133 operands[1] = XEXP (pat, 0);
6134 gcc_assert (XEXP (pat, 1) == const1_rtx);
6135 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6136 break;
6137 default:
6138 operands[1] = pat;
6139 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6140 break;
6144 /* Output a return. */
6146 const char *
6147 output_return (rtx_insn *insn)
6149 if (crtl->calls_eh_return)
6151 /* If the function uses __builtin_eh_return, the eh_return
6152 machinery occupies the delay slot. */
6153 gcc_assert (!final_sequence);
6155 if (flag_delayed_branch)
6157 if (!TARGET_FLAT && TARGET_V9)
6158 fputs ("\treturn\t%i7+8\n", asm_out_file);
6159 else
6161 if (!TARGET_FLAT)
6162 fputs ("\trestore\n", asm_out_file);
6164 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6167 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6169 else
6171 if (!TARGET_FLAT)
6172 fputs ("\trestore\n", asm_out_file);
6174 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6175 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6178 else if (sparc_leaf_function_p || TARGET_FLAT)
6180 /* This is a leaf or flat function so we don't have to bother restoring
6181 the register window, which frees us from dealing with the convoluted
6182 semantics of restore/return. We simply output the jump to the
6183 return address and the insn in the delay slot (if any). */
6185 return "jmp\t%%o7+%)%#";
6187 else
6189 /* This is a regular function so we have to restore the register window.
6190 We may have a pending insn for the delay slot, which will be either
6191 combined with the 'restore' instruction or put in the delay slot of
6192 the 'return' instruction. */
6194 if (final_sequence)
6196 rtx_insn *delay;
6197 rtx pat;
6198 int seen;
6200 delay = NEXT_INSN (insn);
6201 gcc_assert (delay);
6203 pat = PATTERN (delay);
6205 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6207 epilogue_renumber (&pat, 0);
6208 return "return\t%%i7+%)%#";
6210 else
6212 output_asm_insn ("jmp\t%%i7+%)", NULL);
6214 /* We're going to output the insn in the delay slot manually.
6215 Make sure to output its source location first. */
6216 PATTERN (delay) = gen_blockage ();
6217 INSN_CODE (delay) = -1;
6218 final_scan_insn (delay, asm_out_file, optimize, 0, &seen);
6219 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6221 output_restore (pat);
6224 else
6226 /* The delay slot is empty. */
6227 if (TARGET_V9)
6228 return "return\t%%i7+%)\n\t nop";
6229 else if (flag_delayed_branch)
6230 return "jmp\t%%i7+%)\n\t restore";
6231 else
6232 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6236 return "";
6239 /* Output a sibling call. */
6241 const char *
6242 output_sibcall (rtx_insn *insn, rtx call_operand)
6244 rtx operands[1];
6246 gcc_assert (flag_delayed_branch);
6248 operands[0] = call_operand;
6250 if (sparc_leaf_function_p || TARGET_FLAT)
6252 /* This is a leaf or flat function so we don't have to bother restoring
6253 the register window. We simply output the jump to the function and
6254 the insn in the delay slot (if any). */
6256 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6258 if (final_sequence)
6259 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6260 operands);
6261 else
6262 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6263 it into branch if possible. */
6264 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6265 operands);
6267 else
6269 /* This is a regular function so we have to restore the register window.
6270 We may have a pending insn for the delay slot, which will be combined
6271 with the 'restore' instruction. */
6273 output_asm_insn ("call\t%a0, 0", operands);
6275 if (final_sequence)
6277 rtx_insn *delay;
6278 rtx pat;
6279 int seen;
6281 delay = NEXT_INSN (insn);
6282 gcc_assert (delay);
6284 pat = PATTERN (delay);
6286 /* We're going to output the insn in the delay slot manually.
6287 Make sure to output its source location first. */
6288 PATTERN (delay) = gen_blockage ();
6289 INSN_CODE (delay) = -1;
6290 final_scan_insn (delay, asm_out_file, optimize, 0, &seen);
6291 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6293 output_restore (pat);
6295 else
6296 output_restore (NULL_RTX);
6299 return "";
6302 /* Functions for handling argument passing.
6304 For 32-bit, the first 6 args are normally in registers and the rest are
6305 pushed. Any arg that starts within the first 6 words is at least
6306 partially passed in a register unless its data type forbids.
6308 For 64-bit, the argument registers are laid out as an array of 16 elements
6309 and arguments are added sequentially. The first 6 int args and up to the
6310 first 16 fp args (depending on size) are passed in regs.
6312 Slot Stack Integral Float Float in structure Double Long Double
6313 ---- ----- -------- ----- ------------------ ------ -----------
6314 15 [SP+248] %f31 %f30,%f31 %d30
6315 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6316 13 [SP+232] %f27 %f26,%f27 %d26
6317 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6318 11 [SP+216] %f23 %f22,%f23 %d22
6319 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6320 9 [SP+200] %f19 %f18,%f19 %d18
6321 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6322 7 [SP+184] %f15 %f14,%f15 %d14
6323 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6324 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6325 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6326 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6327 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6328 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6329 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6331 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6333 Integral arguments are always passed as 64-bit quantities appropriately
6334 extended.
6336 Passing of floating point values is handled as follows.
6337 If a prototype is in scope:
6338 If the value is in a named argument (i.e. not a stdarg function or a
6339 value not part of the `...') then the value is passed in the appropriate
6340 fp reg.
6341 If the value is part of the `...' and is passed in one of the first 6
6342 slots then the value is passed in the appropriate int reg.
6343 If the value is part of the `...' and is not passed in one of the first 6
6344 slots then the value is passed in memory.
6345 If a prototype is not in scope:
6346 If the value is one of the first 6 arguments the value is passed in the
6347 appropriate integer reg and the appropriate fp reg.
6348 If the value is not one of the first 6 arguments the value is passed in
6349 the appropriate fp reg and in memory.
6352 Summary of the calling conventions implemented by GCC on the SPARC:
6354 32-bit ABI:
6355 size argument return value
6357 small integer <4 int. reg. int. reg.
6358 word 4 int. reg. int. reg.
6359 double word 8 int. reg. int. reg.
6361 _Complex small integer <8 int. reg. int. reg.
6362 _Complex word 8 int. reg. int. reg.
6363 _Complex double word 16 memory int. reg.
6365 vector integer <=8 int. reg. FP reg.
6366 vector integer >8 memory memory
6368 float 4 int. reg. FP reg.
6369 double 8 int. reg. FP reg.
6370 long double 16 memory memory
6372 _Complex float 8 memory FP reg.
6373 _Complex double 16 memory FP reg.
6374 _Complex long double 32 memory FP reg.
6376 vector float any memory memory
6378 aggregate any memory memory
6382 64-bit ABI:
6383 size argument return value
6385 small integer <8 int. reg. int. reg.
6386 word 8 int. reg. int. reg.
6387 double word 16 int. reg. int. reg.
6389 _Complex small integer <16 int. reg. int. reg.
6390 _Complex word 16 int. reg. int. reg.
6391 _Complex double word 32 memory int. reg.
6393 vector integer <=16 FP reg. FP reg.
6394 vector integer 16<s<=32 memory FP reg.
6395 vector integer >32 memory memory
6397 float 4 FP reg. FP reg.
6398 double 8 FP reg. FP reg.
6399 long double 16 FP reg. FP reg.
6401 _Complex float 8 FP reg. FP reg.
6402 _Complex double 16 FP reg. FP reg.
6403 _Complex long double 32 memory FP reg.
6405 vector float <=16 FP reg. FP reg.
6406 vector float 16<s<=32 memory FP reg.
6407 vector float >32 memory memory
6409 aggregate <=16 reg. reg.
6410 aggregate 16<s<=32 memory reg.
6411 aggregate >32 memory memory
6415 Note #1: complex floating-point types follow the extended SPARC ABIs as
6416 implemented by the Sun compiler.
6418 Note #2: integral vector types follow the scalar floating-point types
6419 conventions to match what is implemented by the Sun VIS SDK.
6421 Note #3: floating-point vector types follow the aggregate types
6422 conventions. */
6425 /* Maximum number of int regs for args. */
6426 #define SPARC_INT_ARG_MAX 6
6427 /* Maximum number of fp regs for args. */
6428 #define SPARC_FP_ARG_MAX 16
6429 /* Number of words (partially) occupied for a given size in units. */
6430 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6432 /* Handle the INIT_CUMULATIVE_ARGS macro.
6433 Initialize a variable CUM of type CUMULATIVE_ARGS
6434 for a call to a function whose data type is FNTYPE.
6435 For a library call, FNTYPE is 0. */
6437 void
6438 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6440 cum->words = 0;
6441 cum->prototype_p = fntype && prototype_p (fntype);
6442 cum->libcall_p = !fntype;
6445 /* Handle promotion of pointer and integer arguments. */
6447 static machine_mode
6448 sparc_promote_function_mode (const_tree type, machine_mode mode,
6449 int *punsignedp, const_tree, int)
6451 if (type && POINTER_TYPE_P (type))
6453 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6454 return Pmode;
6457 /* Integral arguments are passed as full words, as per the ABI. */
6458 if (GET_MODE_CLASS (mode) == MODE_INT
6459 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6460 return word_mode;
6462 return mode;
6465 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6467 static bool
6468 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6470 return TARGET_ARCH64 ? true : false;
6473 /* Traverse the record TYPE recursively and call FUNC on its fields.
6474 NAMED is true if this is for a named parameter. DATA is passed
6475 to FUNC for each field. OFFSET is the starting position and
6476 PACKED is true if we are inside a packed record. */
6478 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6479 static void
6480 traverse_record_type (const_tree type, bool named, T *data,
6481 HOST_WIDE_INT offset = 0, bool packed = false)
6483 /* The ABI obviously doesn't specify how packed structures are passed.
6484 These are passed in integer regs if possible, otherwise memory. */
6485 if (!packed)
6486 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6487 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6489 packed = true;
6490 break;
6493 /* Walk the real fields, but skip those with no size or a zero size.
6494 ??? Fields with variable offset are handled as having zero offset. */
6495 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6496 if (TREE_CODE (field) == FIELD_DECL)
6498 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6499 continue;
6501 HOST_WIDE_INT bitpos = offset;
6502 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6503 bitpos += int_bit_position (field);
6505 tree field_type = TREE_TYPE (field);
6506 if (TREE_CODE (field_type) == RECORD_TYPE)
6507 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6508 packed);
6509 else
6511 const bool fp_type
6512 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6513 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6514 data);
6519 /* Handle recursive register classifying for structure layout. */
6521 typedef struct
6523 bool fp_regs; /* true if field eligible to FP registers. */
6524 bool fp_regs_in_first_word; /* true if such field in first word. */
6525 } classify_data_t;
6527 /* A subroutine of function_arg_slotno. Classify the field. */
6529 inline void
6530 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6531 classify_data_t *data)
6533 if (fp)
6535 data->fp_regs = true;
6536 if (bitpos < BITS_PER_WORD)
6537 data->fp_regs_in_first_word = true;
6541 /* Compute the slot number to pass an argument in.
6542 Return the slot number or -1 if passing on the stack.
6544 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6545 the preceding args and about the function being called.
6546 MODE is the argument's machine mode.
6547 TYPE is the data type of the argument (as a tree).
6548 This is null for libcalls where that information may
6549 not be available.
6550 NAMED is nonzero if this argument is a named parameter
6551 (otherwise it is an extra parameter matching an ellipsis).
6552 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6553 *PREGNO records the register number to use if scalar type.
6554 *PPADDING records the amount of padding needed in words. */
6556 static int
6557 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6558 const_tree type, bool named, bool incoming,
6559 int *pregno, int *ppadding)
6561 int regbase = (incoming
6562 ? SPARC_INCOMING_INT_ARG_FIRST
6563 : SPARC_OUTGOING_INT_ARG_FIRST);
6564 int slotno = cum->words;
6565 enum mode_class mclass;
6566 int regno;
6568 *ppadding = 0;
6570 if (type && TREE_ADDRESSABLE (type))
6571 return -1;
6573 if (TARGET_ARCH32
6574 && mode == BLKmode
6575 && type
6576 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6577 return -1;
6579 /* For SPARC64, objects requiring 16-byte alignment get it. */
6580 if (TARGET_ARCH64
6581 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6582 && (slotno & 1) != 0)
6583 slotno++, *ppadding = 1;
6585 mclass = GET_MODE_CLASS (mode);
6586 if (type && TREE_CODE (type) == VECTOR_TYPE)
6588 /* Vector types deserve special treatment because they are
6589 polymorphic wrt their mode, depending upon whether VIS
6590 instructions are enabled. */
6591 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6593 /* The SPARC port defines no floating-point vector modes. */
6594 gcc_assert (mode == BLKmode);
6596 else
6598 /* Integral vector types should either have a vector
6599 mode or an integral mode, because we are guaranteed
6600 by pass_by_reference that their size is not greater
6601 than 16 bytes and TImode is 16-byte wide. */
6602 gcc_assert (mode != BLKmode);
6604 /* Vector integers are handled like floats according to
6605 the Sun VIS SDK. */
6606 mclass = MODE_FLOAT;
6610 switch (mclass)
6612 case MODE_FLOAT:
6613 case MODE_COMPLEX_FLOAT:
6614 case MODE_VECTOR_INT:
6615 if (TARGET_ARCH64 && TARGET_FPU && named)
6617 /* If all arg slots are filled, then must pass on stack. */
6618 if (slotno >= SPARC_FP_ARG_MAX)
6619 return -1;
6621 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6622 /* Arguments filling only one single FP register are
6623 right-justified in the outer double FP register. */
6624 if (GET_MODE_SIZE (mode) <= 4)
6625 regno++;
6626 break;
6628 /* fallthrough */
6630 case MODE_INT:
6631 case MODE_COMPLEX_INT:
6632 /* If all arg slots are filled, then must pass on stack. */
6633 if (slotno >= SPARC_INT_ARG_MAX)
6634 return -1;
6636 regno = regbase + slotno;
6637 break;
6639 case MODE_RANDOM:
6640 if (mode == VOIDmode)
6641 /* MODE is VOIDmode when generating the actual call. */
6642 return -1;
6644 gcc_assert (mode == BLKmode);
6646 if (TARGET_ARCH32
6647 || !type
6648 || (TREE_CODE (type) != RECORD_TYPE
6649 && TREE_CODE (type) != VECTOR_TYPE))
6651 /* If all arg slots are filled, then must pass on stack. */
6652 if (slotno >= SPARC_INT_ARG_MAX)
6653 return -1;
6655 regno = regbase + slotno;
6657 else /* TARGET_ARCH64 && type */
6659 /* If all arg slots are filled, then must pass on stack. */
6660 if (slotno >= SPARC_FP_ARG_MAX)
6661 return -1;
6663 if (TREE_CODE (type) == RECORD_TYPE)
6665 classify_data_t data = { false, false };
6666 traverse_record_type<classify_data_t, classify_registers>
6667 (type, named, &data);
6669 if (data.fp_regs)
6671 /* If all FP slots are filled except for the last one and
6672 there is no FP field in the first word, then must pass
6673 on stack. */
6674 if (slotno >= SPARC_FP_ARG_MAX - 1
6675 && !data.fp_regs_in_first_word)
6676 return -1;
6678 else
6680 /* If all int slots are filled, then must pass on stack. */
6681 if (slotno >= SPARC_INT_ARG_MAX)
6682 return -1;
6686 /* PREGNO isn't set since both int and FP regs can be used. */
6687 return slotno;
6689 break;
6691 default :
6692 gcc_unreachable ();
6695 *pregno = regno;
6696 return slotno;
6699 /* Handle recursive register counting/assigning for structure layout. */
6701 typedef struct
6703 int slotno; /* slot number of the argument. */
6704 int regbase; /* regno of the base register. */
6705 int intoffset; /* offset of the first pending integer field. */
6706 int nregs; /* number of words passed in registers. */
6707 bool stack; /* true if part of the argument is on the stack. */
6708 rtx ret; /* return expression being built. */
6709 } assign_data_t;
6711 /* A subroutine of function_arg_record_value. Compute the number of integer
6712 registers to be assigned between PARMS->intoffset and BITPOS. Return
6713 true if at least one integer register is assigned or false otherwise. */
6715 static bool
6716 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6718 if (data->intoffset < 0)
6719 return false;
6721 const int intoffset = data->intoffset;
6722 data->intoffset = -1;
6724 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6725 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6726 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6727 int nregs = (endbit - startbit) / BITS_PER_WORD;
6729 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6731 nregs = SPARC_INT_ARG_MAX - this_slotno;
6733 /* We need to pass this field (partly) on the stack. */
6734 data->stack = 1;
6737 if (nregs <= 0)
6738 return false;
6740 *pnregs = nregs;
6741 return true;
6744 /* A subroutine of function_arg_record_value. Compute the number and the mode
6745 of the FP registers to be assigned for FIELD. Return true if at least one
6746 FP register is assigned or false otherwise. */
6748 static bool
6749 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6750 assign_data_t *data,
6751 int *pnregs, machine_mode *pmode)
6753 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6754 machine_mode mode = DECL_MODE (field);
6755 int nregs, nslots;
6757 /* Slots are counted as words while regs are counted as having the size of
6758 the (inner) mode. */
6759 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6761 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6762 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6764 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6766 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6767 nregs = 2;
6769 else
6770 nregs = 1;
6772 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
6774 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
6776 nslots = SPARC_FP_ARG_MAX - this_slotno;
6777 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
6779 /* We need to pass this field (partly) on the stack. */
6780 data->stack = 1;
6782 if (nregs <= 0)
6783 return false;
6786 *pnregs = nregs;
6787 *pmode = mode;
6788 return true;
6791 /* A subroutine of function_arg_record_value. Count the number of registers
6792 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
6794 inline void
6795 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6796 assign_data_t *data)
6798 if (fp)
6800 int nregs;
6801 machine_mode mode;
6803 if (compute_int_layout (bitpos, data, &nregs))
6804 data->nregs += nregs;
6806 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
6807 data->nregs += nregs;
6809 else
6811 if (data->intoffset < 0)
6812 data->intoffset = bitpos;
6816 /* A subroutine of function_arg_record_value. Assign the bits of the
6817 structure between PARMS->intoffset and BITPOS to integer registers. */
6819 static void
6820 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
6822 int intoffset = data->intoffset;
6823 machine_mode mode;
6824 int nregs;
6826 if (!compute_int_layout (bitpos, data, &nregs))
6827 return;
6829 /* If this is the trailing part of a word, only load that much into
6830 the register. Otherwise load the whole register. Note that in
6831 the latter case we may pick up unwanted bits. It's not a problem
6832 at the moment but may wish to revisit. */
6833 if (intoffset % BITS_PER_WORD != 0)
6834 mode = smallest_int_mode_for_size (BITS_PER_WORD
6835 - intoffset % BITS_PER_WORD);
6836 else
6837 mode = word_mode;
6839 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6840 unsigned int regno = data->regbase + this_slotno;
6841 intoffset /= BITS_PER_UNIT;
6845 rtx reg = gen_rtx_REG (mode, regno);
6846 XVECEXP (data->ret, 0, data->stack + data->nregs)
6847 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6848 data->nregs += 1;
6849 mode = word_mode;
6850 regno += 1;
6851 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
6853 while (--nregs > 0);
6856 /* A subroutine of function_arg_record_value. Assign FIELD at position
6857 BITPOS to FP registers. */
6859 static void
6860 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
6861 assign_data_t *data)
6863 int nregs;
6864 machine_mode mode;
6866 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
6867 return;
6869 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6870 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6871 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6872 regno++;
6873 int pos = bitpos / BITS_PER_UNIT;
6877 rtx reg = gen_rtx_REG (mode, regno);
6878 XVECEXP (data->ret, 0, data->stack + data->nregs)
6879 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6880 data->nregs += 1;
6881 regno += GET_MODE_SIZE (mode) / 4;
6882 pos += GET_MODE_SIZE (mode);
6884 while (--nregs > 0);
6887 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
6888 the structure between PARMS->intoffset and BITPOS to registers. */
6890 inline void
6891 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6892 assign_data_t *data)
6894 if (fp)
6896 assign_int_registers (bitpos, data);
6898 assign_fp_registers (field, bitpos, data);
6900 else
6902 if (data->intoffset < 0)
6903 data->intoffset = bitpos;
6907 /* Used by function_arg and sparc_function_value_1 to implement the complex
6908 conventions of the 64-bit ABI for passing and returning structures.
6909 Return an expression valid as a return value for the FUNCTION_ARG
6910 and TARGET_FUNCTION_VALUE.
6912 TYPE is the data type of the argument (as a tree).
6913 This is null for libcalls where that information may
6914 not be available.
6915 MODE is the argument's machine mode.
6916 SLOTNO is the index number of the argument's slot in the parameter array.
6917 NAMED is true if this argument is a named parameter
6918 (otherwise it is an extra parameter matching an ellipsis).
6919 REGBASE is the regno of the base register for the parameter array. */
6921 static rtx
6922 function_arg_record_value (const_tree type, machine_mode mode,
6923 int slotno, bool named, int regbase)
6925 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6926 assign_data_t data;
6927 int nregs;
6929 data.slotno = slotno;
6930 data.regbase = regbase;
6932 /* Count how many registers we need. */
6933 data.nregs = 0;
6934 data.intoffset = 0;
6935 data.stack = false;
6936 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
6938 /* Take into account pending integer fields. */
6939 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
6940 data.nregs += nregs;
6942 /* Allocate the vector and handle some annoying special cases. */
6943 nregs = data.nregs;
6945 if (nregs == 0)
6947 /* ??? Empty structure has no value? Duh? */
6948 if (typesize <= 0)
6950 /* Though there's nothing really to store, return a word register
6951 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6952 leads to breakage due to the fact that there are zero bytes to
6953 load. */
6954 return gen_rtx_REG (mode, regbase);
6957 /* ??? C++ has structures with no fields, and yet a size. Give up
6958 for now and pass everything back in integer registers. */
6959 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6960 if (nregs + slotno > SPARC_INT_ARG_MAX)
6961 nregs = SPARC_INT_ARG_MAX - slotno;
6964 gcc_assert (nregs > 0);
6966 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
6968 /* If at least one field must be passed on the stack, generate
6969 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6970 also be passed on the stack. We can't do much better because the
6971 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6972 of structures for which the fields passed exclusively in registers
6973 are not at the beginning of the structure. */
6974 if (data.stack)
6975 XVECEXP (data.ret, 0, 0)
6976 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6978 /* Assign the registers. */
6979 data.nregs = 0;
6980 data.intoffset = 0;
6981 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
6983 /* Assign pending integer fields. */
6984 assign_int_registers (typesize * BITS_PER_UNIT, &data);
6986 gcc_assert (data.nregs == nregs);
6988 return data.ret;
6991 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6992 of the 64-bit ABI for passing and returning unions.
6993 Return an expression valid as a return value for the FUNCTION_ARG
6994 and TARGET_FUNCTION_VALUE.
6996 SIZE is the size in bytes of the union.
6997 MODE is the argument's machine mode.
6998 REGNO is the hard register the union will be passed in. */
7000 static rtx
7001 function_arg_union_value (int size, machine_mode mode, int slotno,
7002 int regno)
7004 int nwords = CEIL_NWORDS (size), i;
7005 rtx regs;
7007 /* See comment in previous function for empty structures. */
7008 if (nwords == 0)
7009 return gen_rtx_REG (mode, regno);
7011 if (slotno == SPARC_INT_ARG_MAX - 1)
7012 nwords = 1;
7014 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7016 for (i = 0; i < nwords; i++)
7018 /* Unions are passed left-justified. */
7019 XVECEXP (regs, 0, i)
7020 = gen_rtx_EXPR_LIST (VOIDmode,
7021 gen_rtx_REG (word_mode, regno),
7022 GEN_INT (UNITS_PER_WORD * i));
7023 regno++;
7026 return regs;
7029 /* Used by function_arg and sparc_function_value_1 to implement the conventions
7030 for passing and returning BLKmode vectors.
7031 Return an expression valid as a return value for the FUNCTION_ARG
7032 and TARGET_FUNCTION_VALUE.
7034 SIZE is the size in bytes of the vector.
7035 REGNO is the FP hard register the vector will be passed in. */
7037 static rtx
7038 function_arg_vector_value (int size, int regno)
7040 const int nregs = MAX (1, size / 8);
7041 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
7043 if (size < 8)
7044 XVECEXP (regs, 0, 0)
7045 = gen_rtx_EXPR_LIST (VOIDmode,
7046 gen_rtx_REG (SImode, regno),
7047 const0_rtx);
7048 else
7049 for (int i = 0; i < nregs; i++)
7050 XVECEXP (regs, 0, i)
7051 = gen_rtx_EXPR_LIST (VOIDmode,
7052 gen_rtx_REG (DImode, regno + 2*i),
7053 GEN_INT (i*8));
7055 return regs;
7058 /* Determine where to put an argument to a function.
7059 Value is zero to push the argument on the stack,
7060 or a hard register in which to store the argument.
7062 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7063 the preceding args and about the function being called.
7064 MODE is the argument's machine mode.
7065 TYPE is the data type of the argument (as a tree).
7066 This is null for libcalls where that information may
7067 not be available.
7068 NAMED is true if this argument is a named parameter
7069 (otherwise it is an extra parameter matching an ellipsis).
7070 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7071 TARGET_FUNCTION_INCOMING_ARG. */
7073 static rtx
7074 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
7075 const_tree type, bool named, bool incoming)
7077 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7079 int regbase = (incoming
7080 ? SPARC_INCOMING_INT_ARG_FIRST
7081 : SPARC_OUTGOING_INT_ARG_FIRST);
7082 int slotno, regno, padding;
7083 enum mode_class mclass = GET_MODE_CLASS (mode);
7085 slotno = function_arg_slotno (cum, mode, type, named, incoming,
7086 &regno, &padding);
7087 if (slotno == -1)
7088 return 0;
7090 /* Vector types deserve special treatment because they are polymorphic wrt
7091 their mode, depending upon whether VIS instructions are enabled. */
7092 if (type && TREE_CODE (type) == VECTOR_TYPE)
7094 HOST_WIDE_INT size = int_size_in_bytes (type);
7095 gcc_assert ((TARGET_ARCH32 && size <= 8)
7096 || (TARGET_ARCH64 && size <= 16));
7098 if (mode == BLKmode)
7099 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
7101 mclass = MODE_FLOAT;
7104 if (TARGET_ARCH32)
7105 return gen_rtx_REG (mode, regno);
7107 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7108 and are promoted to registers if possible. */
7109 if (type && TREE_CODE (type) == RECORD_TYPE)
7111 HOST_WIDE_INT size = int_size_in_bytes (type);
7112 gcc_assert (size <= 16);
7114 return function_arg_record_value (type, mode, slotno, named, regbase);
7117 /* Unions up to 16 bytes in size are passed in integer registers. */
7118 else if (type && TREE_CODE (type) == UNION_TYPE)
7120 HOST_WIDE_INT size = int_size_in_bytes (type);
7121 gcc_assert (size <= 16);
7123 return function_arg_union_value (size, mode, slotno, regno);
7126 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7127 but also have the slot allocated for them.
7128 If no prototype is in scope fp values in register slots get passed
7129 in two places, either fp regs and int regs or fp regs and memory. */
7130 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7131 && SPARC_FP_REG_P (regno))
7133 rtx reg = gen_rtx_REG (mode, regno);
7134 if (cum->prototype_p || cum->libcall_p)
7135 return reg;
7136 else
7138 rtx v0, v1;
7140 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7142 int intreg;
7144 /* On incoming, we don't need to know that the value
7145 is passed in %f0 and %i0, and it confuses other parts
7146 causing needless spillage even on the simplest cases. */
7147 if (incoming)
7148 return reg;
7150 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7151 + (regno - SPARC_FP_ARG_FIRST) / 2);
7153 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7154 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7155 const0_rtx);
7156 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7158 else
7160 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7161 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7162 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7167 /* All other aggregate types are passed in an integer register in a mode
7168 corresponding to the size of the type. */
7169 else if (type && AGGREGATE_TYPE_P (type))
7171 HOST_WIDE_INT size = int_size_in_bytes (type);
7172 gcc_assert (size <= 16);
7174 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7177 return gen_rtx_REG (mode, regno);
7180 /* Handle the TARGET_FUNCTION_ARG target hook. */
7182 static rtx
7183 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
7184 const_tree type, bool named)
7186 return sparc_function_arg_1 (cum, mode, type, named, false);
7189 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7191 static rtx
7192 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
7193 const_tree type, bool named)
7195 return sparc_function_arg_1 (cum, mode, type, named, true);
7198 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7200 static unsigned int
7201 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7203 return ((TARGET_ARCH64
7204 && (GET_MODE_ALIGNMENT (mode) == 128
7205 || (type && TYPE_ALIGN (type) == 128)))
7206 ? 128
7207 : PARM_BOUNDARY);
7210 /* For an arg passed partly in registers and partly in memory,
7211 this is the number of bytes of registers used.
7212 For args passed entirely in registers or entirely in memory, zero.
7214 Any arg that starts in the first 6 regs but won't entirely fit in them
7215 needs partial registers on v8. On v9, structures with integer
7216 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7217 values that begin in the last fp reg [where "last fp reg" varies with the
7218 mode] will be split between that reg and memory. */
7220 static int
7221 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7222 tree type, bool named)
7224 int slotno, regno, padding;
7226 /* We pass false for incoming here, it doesn't matter. */
7227 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7228 false, &regno, &padding);
7230 if (slotno == -1)
7231 return 0;
7233 if (TARGET_ARCH32)
7235 if ((slotno + (mode == BLKmode
7236 ? CEIL_NWORDS (int_size_in_bytes (type))
7237 : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7238 > SPARC_INT_ARG_MAX)
7239 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7241 else
7243 /* We are guaranteed by pass_by_reference that the size of the
7244 argument is not greater than 16 bytes, so we only need to return
7245 one word if the argument is partially passed in registers. */
7247 if (type && AGGREGATE_TYPE_P (type))
7249 int size = int_size_in_bytes (type);
7251 if (size > UNITS_PER_WORD
7252 && (slotno == SPARC_INT_ARG_MAX - 1
7253 || slotno == SPARC_FP_ARG_MAX - 1))
7254 return UNITS_PER_WORD;
7256 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7257 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7258 && ! (TARGET_FPU && named)))
7260 /* The complex types are passed as packed types. */
7261 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7262 && slotno == SPARC_INT_ARG_MAX - 1)
7263 return UNITS_PER_WORD;
7265 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7267 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7268 > SPARC_FP_ARG_MAX)
7269 return UNITS_PER_WORD;
7273 return 0;
7276 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7277 Specify whether to pass the argument by reference. */
7279 static bool
7280 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7281 machine_mode mode, const_tree type,
7282 bool named ATTRIBUTE_UNUSED)
7284 if (TARGET_ARCH32)
7285 /* Original SPARC 32-bit ABI says that structures and unions,
7286 and quad-precision floats are passed by reference. For Pascal,
7287 also pass arrays by reference. All other base types are passed
7288 in registers.
7290 Extended ABI (as implemented by the Sun compiler) says that all
7291 complex floats are passed by reference. Pass complex integers
7292 in registers up to 8 bytes. More generally, enforce the 2-word
7293 cap for passing arguments in registers.
7295 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7296 integers are passed like floats of the same size, that is in
7297 registers up to 8 bytes. Pass all vector floats by reference
7298 like structure and unions. */
7299 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7300 || mode == SCmode
7301 /* Catch CDImode, TFmode, DCmode and TCmode. */
7302 || GET_MODE_SIZE (mode) > 8
7303 || (type
7304 && TREE_CODE (type) == VECTOR_TYPE
7305 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7306 else
7307 /* Original SPARC 64-bit ABI says that structures and unions
7308 smaller than 16 bytes are passed in registers, as well as
7309 all other base types.
7311 Extended ABI (as implemented by the Sun compiler) says that
7312 complex floats are passed in registers up to 16 bytes. Pass
7313 all complex integers in registers up to 16 bytes. More generally,
7314 enforce the 2-word cap for passing arguments in registers.
7316 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7317 integers are passed like floats of the same size, that is in
7318 registers (up to 16 bytes). Pass all vector floats like structure
7319 and unions. */
7320 return ((type
7321 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7322 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7323 /* Catch CTImode and TCmode. */
7324 || GET_MODE_SIZE (mode) > 16);
7327 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7328 Update the data in CUM to advance over an argument
7329 of mode MODE and data type TYPE.
7330 TYPE is null for libcalls where that information may not be available. */
7332 static void
7333 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7334 const_tree type, bool named)
7336 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7337 int regno, padding;
7339 /* We pass false for incoming here, it doesn't matter. */
7340 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7342 /* If argument requires leading padding, add it. */
7343 cum->words += padding;
7345 if (TARGET_ARCH32)
7346 cum->words += (mode == BLKmode
7347 ? CEIL_NWORDS (int_size_in_bytes (type))
7348 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7349 else
7351 if (type && AGGREGATE_TYPE_P (type))
7353 int size = int_size_in_bytes (type);
7355 if (size <= 8)
7356 ++cum->words;
7357 else if (size <= 16)
7358 cum->words += 2;
7359 else /* passed by reference */
7360 ++cum->words;
7362 else
7363 cum->words += (mode == BLKmode
7364 ? CEIL_NWORDS (int_size_in_bytes (type))
7365 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7369 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7370 are always stored left shifted in their argument slot. */
7372 static pad_direction
7373 sparc_function_arg_padding (machine_mode mode, const_tree type)
7375 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7376 return PAD_UPWARD;
7378 /* Fall back to the default. */
7379 return default_function_arg_padding (mode, type);
7382 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7383 Specify whether to return the return value in memory. */
7385 static bool
7386 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7388 if (TARGET_ARCH32)
7389 /* Original SPARC 32-bit ABI says that structures and unions,
7390 and quad-precision floats are returned in memory. All other
7391 base types are returned in registers.
7393 Extended ABI (as implemented by the Sun compiler) says that
7394 all complex floats are returned in registers (8 FP registers
7395 at most for '_Complex long double'). Return all complex integers
7396 in registers (4 at most for '_Complex long long').
7398 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7399 integers are returned like floats of the same size, that is in
7400 registers up to 8 bytes and in memory otherwise. Return all
7401 vector floats in memory like structure and unions; note that
7402 they always have BLKmode like the latter. */
7403 return (TYPE_MODE (type) == BLKmode
7404 || TYPE_MODE (type) == TFmode
7405 || (TREE_CODE (type) == VECTOR_TYPE
7406 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7407 else
7408 /* Original SPARC 64-bit ABI says that structures and unions
7409 smaller than 32 bytes are returned in registers, as well as
7410 all other base types.
7412 Extended ABI (as implemented by the Sun compiler) says that all
7413 complex floats are returned in registers (8 FP registers at most
7414 for '_Complex long double'). Return all complex integers in
7415 registers (4 at most for '_Complex TItype').
7417 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7418 integers are returned like floats of the same size, that is in
7419 registers. Return all vector floats like structure and unions;
7420 note that they always have BLKmode like the latter. */
7421 return (TYPE_MODE (type) == BLKmode
7422 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7425 /* Handle the TARGET_STRUCT_VALUE target hook.
7426 Return where to find the structure return value address. */
7428 static rtx
7429 sparc_struct_value_rtx (tree fndecl, int incoming)
7431 if (TARGET_ARCH64)
7432 return 0;
7433 else
7435 rtx mem;
7437 if (incoming)
7438 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7439 STRUCT_VALUE_OFFSET));
7440 else
7441 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7442 STRUCT_VALUE_OFFSET));
7444 /* Only follow the SPARC ABI for fixed-size structure returns.
7445 Variable size structure returns are handled per the normal
7446 procedures in GCC. This is enabled by -mstd-struct-return */
7447 if (incoming == 2
7448 && sparc_std_struct_return
7449 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7450 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7452 /* We must check and adjust the return address, as it is optional
7453 as to whether the return object is really provided. */
7454 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7455 rtx scratch = gen_reg_rtx (SImode);
7456 rtx_code_label *endlab = gen_label_rtx ();
7458 /* Calculate the return object size. */
7459 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7460 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7461 /* Construct a temporary return value. */
7462 rtx temp_val
7463 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7465 /* Implement SPARC 32-bit psABI callee return struct checking:
7467 Fetch the instruction where we will return to and see if
7468 it's an unimp instruction (the most significant 10 bits
7469 will be zero). */
7470 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7471 plus_constant (Pmode,
7472 ret_reg, 8)));
7473 /* Assume the size is valid and pre-adjust. */
7474 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7475 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7476 0, endlab);
7477 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7478 /* Write the address of the memory pointed to by temp_val into
7479 the memory pointed to by mem. */
7480 emit_move_insn (mem, XEXP (temp_val, 0));
7481 emit_label (endlab);
7484 return mem;
7488 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7489 For v9, function return values are subject to the same rules as arguments,
7490 except that up to 32 bytes may be returned in registers. */
7492 static rtx
7493 sparc_function_value_1 (const_tree type, machine_mode mode,
7494 bool outgoing)
7496 /* Beware that the two values are swapped here wrt function_arg. */
7497 int regbase = (outgoing
7498 ? SPARC_INCOMING_INT_ARG_FIRST
7499 : SPARC_OUTGOING_INT_ARG_FIRST);
7500 enum mode_class mclass = GET_MODE_CLASS (mode);
7501 int regno;
7503 /* Vector types deserve special treatment because they are polymorphic wrt
7504 their mode, depending upon whether VIS instructions are enabled. */
7505 if (type && TREE_CODE (type) == VECTOR_TYPE)
7507 HOST_WIDE_INT size = int_size_in_bytes (type);
7508 gcc_assert ((TARGET_ARCH32 && size <= 8)
7509 || (TARGET_ARCH64 && size <= 32));
7511 if (mode == BLKmode)
7512 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7514 mclass = MODE_FLOAT;
7517 if (TARGET_ARCH64 && type)
7519 /* Structures up to 32 bytes in size are returned in registers. */
7520 if (TREE_CODE (type) == RECORD_TYPE)
7522 HOST_WIDE_INT size = int_size_in_bytes (type);
7523 gcc_assert (size <= 32);
7525 return function_arg_record_value (type, mode, 0, 1, regbase);
7528 /* Unions up to 32 bytes in size are returned in integer registers. */
7529 else if (TREE_CODE (type) == UNION_TYPE)
7531 HOST_WIDE_INT size = int_size_in_bytes (type);
7532 gcc_assert (size <= 32);
7534 return function_arg_union_value (size, mode, 0, regbase);
7537 /* Objects that require it are returned in FP registers. */
7538 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7541 /* All other aggregate types are returned in an integer register in a
7542 mode corresponding to the size of the type. */
7543 else if (AGGREGATE_TYPE_P (type))
7545 /* All other aggregate types are passed in an integer register
7546 in a mode corresponding to the size of the type. */
7547 HOST_WIDE_INT size = int_size_in_bytes (type);
7548 gcc_assert (size <= 32);
7550 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7552 /* ??? We probably should have made the same ABI change in
7553 3.4.0 as the one we made for unions. The latter was
7554 required by the SCD though, while the former is not
7555 specified, so we favored compatibility and efficiency.
7557 Now we're stuck for aggregates larger than 16 bytes,
7558 because OImode vanished in the meantime. Let's not
7559 try to be unduly clever, and simply follow the ABI
7560 for unions in that case. */
7561 if (mode == BLKmode)
7562 return function_arg_union_value (size, mode, 0, regbase);
7563 else
7564 mclass = MODE_INT;
7567 /* We should only have pointer and integer types at this point. This
7568 must match sparc_promote_function_mode. */
7569 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7570 mode = word_mode;
7573 /* We should only have pointer and integer types at this point, except with
7574 -freg-struct-return. This must match sparc_promote_function_mode. */
7575 else if (TARGET_ARCH32
7576 && !(type && AGGREGATE_TYPE_P (type))
7577 && mclass == MODE_INT
7578 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7579 mode = word_mode;
7581 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7582 regno = SPARC_FP_ARG_FIRST;
7583 else
7584 regno = regbase;
7586 return gen_rtx_REG (mode, regno);
7589 /* Handle TARGET_FUNCTION_VALUE.
7590 On the SPARC, the value is found in the first "output" register, but the
7591 called function leaves it in the first "input" register. */
7593 static rtx
7594 sparc_function_value (const_tree valtype,
7595 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7596 bool outgoing)
7598 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7601 /* Handle TARGET_LIBCALL_VALUE. */
7603 static rtx
7604 sparc_libcall_value (machine_mode mode,
7605 const_rtx fun ATTRIBUTE_UNUSED)
7607 return sparc_function_value_1 (NULL_TREE, mode, false);
7610 /* Handle FUNCTION_VALUE_REGNO_P.
7611 On the SPARC, the first "output" reg is used for integer values, and the
7612 first floating point register is used for floating point values. */
7614 static bool
7615 sparc_function_value_regno_p (const unsigned int regno)
7617 return (regno == 8 || (TARGET_FPU && regno == 32));
7620 /* Do what is necessary for `va_start'. We look at the current function
7621 to determine if stdarg or varargs is used and return the address of
7622 the first unnamed parameter. */
7624 static rtx
7625 sparc_builtin_saveregs (void)
7627 int first_reg = crtl->args.info.words;
7628 rtx address;
7629 int regno;
7631 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7632 emit_move_insn (gen_rtx_MEM (word_mode,
7633 gen_rtx_PLUS (Pmode,
7634 frame_pointer_rtx,
7635 GEN_INT (FIRST_PARM_OFFSET (0)
7636 + (UNITS_PER_WORD
7637 * regno)))),
7638 gen_rtx_REG (word_mode,
7639 SPARC_INCOMING_INT_ARG_FIRST + regno));
7641 address = gen_rtx_PLUS (Pmode,
7642 frame_pointer_rtx,
7643 GEN_INT (FIRST_PARM_OFFSET (0)
7644 + UNITS_PER_WORD * first_reg));
7646 return address;
7649 /* Implement `va_start' for stdarg. */
7651 static void
7652 sparc_va_start (tree valist, rtx nextarg)
7654 nextarg = expand_builtin_saveregs ();
7655 std_expand_builtin_va_start (valist, nextarg);
7658 /* Implement `va_arg' for stdarg. */
7660 static tree
7661 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7662 gimple_seq *post_p)
7664 HOST_WIDE_INT size, rsize, align;
7665 tree addr, incr;
7666 bool indirect;
7667 tree ptrtype = build_pointer_type (type);
7669 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7671 indirect = true;
7672 size = rsize = UNITS_PER_WORD;
7673 align = 0;
7675 else
7677 indirect = false;
7678 size = int_size_in_bytes (type);
7679 rsize = ROUND_UP (size, UNITS_PER_WORD);
7680 align = 0;
7682 if (TARGET_ARCH64)
7684 /* For SPARC64, objects requiring 16-byte alignment get it. */
7685 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7686 align = 2 * UNITS_PER_WORD;
7688 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7689 are left-justified in their slots. */
7690 if (AGGREGATE_TYPE_P (type))
7692 if (size == 0)
7693 size = rsize = UNITS_PER_WORD;
7694 else
7695 size = rsize;
7700 incr = valist;
7701 if (align)
7703 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7704 incr = fold_convert (sizetype, incr);
7705 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7706 size_int (-align));
7707 incr = fold_convert (ptr_type_node, incr);
7710 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7711 addr = incr;
7713 if (BYTES_BIG_ENDIAN && size < rsize)
7714 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7716 if (indirect)
7718 addr = fold_convert (build_pointer_type (ptrtype), addr);
7719 addr = build_va_arg_indirect_ref (addr);
7722 /* If the address isn't aligned properly for the type, we need a temporary.
7723 FIXME: This is inefficient, usually we can do this in registers. */
7724 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7726 tree tmp = create_tmp_var (type, "va_arg_tmp");
7727 tree dest_addr = build_fold_addr_expr (tmp);
7728 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7729 3, dest_addr, addr, size_int (rsize));
7730 TREE_ADDRESSABLE (tmp) = 1;
7731 gimplify_and_add (copy, pre_p);
7732 addr = dest_addr;
7735 else
7736 addr = fold_convert (ptrtype, addr);
7738 incr = fold_build_pointer_plus_hwi (incr, rsize);
7739 gimplify_assign (valist, incr, post_p);
7741 return build_va_arg_indirect_ref (addr);
7744 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7745 Specify whether the vector mode is supported by the hardware. */
7747 static bool
7748 sparc_vector_mode_supported_p (machine_mode mode)
7750 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7753 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7755 static machine_mode
7756 sparc_preferred_simd_mode (scalar_mode mode)
7758 if (TARGET_VIS)
7759 switch (mode)
7761 case E_SImode:
7762 return V2SImode;
7763 case E_HImode:
7764 return V4HImode;
7765 case E_QImode:
7766 return V8QImode;
7768 default:;
7771 return word_mode;
7774 /* Return the string to output an unconditional branch to LABEL, which is
7775 the operand number of the label.
7777 DEST is the destination insn (i.e. the label), INSN is the source. */
7779 const char *
7780 output_ubranch (rtx dest, rtx_insn *insn)
7782 static char string[64];
7783 bool v9_form = false;
7784 int delta;
7785 char *p;
7787 /* Even if we are trying to use cbcond for this, evaluate
7788 whether we can use V9 branches as our backup plan. */
7790 delta = 5000000;
7791 if (INSN_ADDRESSES_SET_P ())
7792 delta = (INSN_ADDRESSES (INSN_UID (dest))
7793 - INSN_ADDRESSES (INSN_UID (insn)));
7795 /* Leave some instructions for "slop". */
7796 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7797 v9_form = true;
7799 if (TARGET_CBCOND)
7801 bool emit_nop = emit_cbcond_nop (insn);
7802 bool far = false;
7803 const char *rval;
7805 if (delta < -500 || delta > 500)
7806 far = true;
7808 if (far)
7810 if (v9_form)
7811 rval = "ba,a,pt\t%%xcc, %l0";
7812 else
7813 rval = "b,a\t%l0";
7815 else
7817 if (emit_nop)
7818 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7819 else
7820 rval = "cwbe\t%%g0, %%g0, %l0";
7822 return rval;
7825 if (v9_form)
7826 strcpy (string, "ba%*,pt\t%%xcc, ");
7827 else
7828 strcpy (string, "b%*\t");
7830 p = strchr (string, '\0');
7831 *p++ = '%';
7832 *p++ = 'l';
7833 *p++ = '0';
7834 *p++ = '%';
7835 *p++ = '(';
7836 *p = '\0';
7838 return string;
7841 /* Return the string to output a conditional branch to LABEL, which is
7842 the operand number of the label. OP is the conditional expression.
7843 XEXP (OP, 0) is assumed to be a condition code register (integer or
7844 floating point) and its mode specifies what kind of comparison we made.
7846 DEST is the destination insn (i.e. the label), INSN is the source.
7848 REVERSED is nonzero if we should reverse the sense of the comparison.
7850 ANNUL is nonzero if we should generate an annulling branch. */
7852 const char *
7853 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7854 rtx_insn *insn)
7856 static char string[64];
7857 enum rtx_code code = GET_CODE (op);
7858 rtx cc_reg = XEXP (op, 0);
7859 machine_mode mode = GET_MODE (cc_reg);
7860 const char *labelno, *branch;
7861 int spaces = 8, far;
7862 char *p;
7864 /* v9 branches are limited to +-1MB. If it is too far away,
7865 change
7867 bne,pt %xcc, .LC30
7871 be,pn %xcc, .+12
7873 ba .LC30
7877 fbne,a,pn %fcc2, .LC29
7881 fbe,pt %fcc2, .+16
7883 ba .LC29 */
7885 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7886 if (reversed ^ far)
7888 /* Reversal of FP compares takes care -- an ordered compare
7889 becomes an unordered compare and vice versa. */
7890 if (mode == CCFPmode || mode == CCFPEmode)
7891 code = reverse_condition_maybe_unordered (code);
7892 else
7893 code = reverse_condition (code);
7896 /* Start by writing the branch condition. */
7897 if (mode == CCFPmode || mode == CCFPEmode)
7899 switch (code)
7901 case NE:
7902 branch = "fbne";
7903 break;
7904 case EQ:
7905 branch = "fbe";
7906 break;
7907 case GE:
7908 branch = "fbge";
7909 break;
7910 case GT:
7911 branch = "fbg";
7912 break;
7913 case LE:
7914 branch = "fble";
7915 break;
7916 case LT:
7917 branch = "fbl";
7918 break;
7919 case UNORDERED:
7920 branch = "fbu";
7921 break;
7922 case ORDERED:
7923 branch = "fbo";
7924 break;
7925 case UNGT:
7926 branch = "fbug";
7927 break;
7928 case UNLT:
7929 branch = "fbul";
7930 break;
7931 case UNEQ:
7932 branch = "fbue";
7933 break;
7934 case UNGE:
7935 branch = "fbuge";
7936 break;
7937 case UNLE:
7938 branch = "fbule";
7939 break;
7940 case LTGT:
7941 branch = "fblg";
7942 break;
7943 default:
7944 gcc_unreachable ();
7947 /* ??? !v9: FP branches cannot be preceded by another floating point
7948 insn. Because there is currently no concept of pre-delay slots,
7949 we can fix this only by always emitting a nop before a floating
7950 point branch. */
7952 string[0] = '\0';
7953 if (! TARGET_V9)
7954 strcpy (string, "nop\n\t");
7955 strcat (string, branch);
7957 else
7959 switch (code)
7961 case NE:
7962 if (mode == CCVmode || mode == CCXVmode)
7963 branch = "bvs";
7964 else
7965 branch = "bne";
7966 break;
7967 case EQ:
7968 if (mode == CCVmode || mode == CCXVmode)
7969 branch = "bvc";
7970 else
7971 branch = "be";
7972 break;
7973 case GE:
7974 if (mode == CCNZmode || mode == CCXNZmode)
7975 branch = "bpos";
7976 else
7977 branch = "bge";
7978 break;
7979 case GT:
7980 branch = "bg";
7981 break;
7982 case LE:
7983 branch = "ble";
7984 break;
7985 case LT:
7986 if (mode == CCNZmode || mode == CCXNZmode)
7987 branch = "bneg";
7988 else
7989 branch = "bl";
7990 break;
7991 case GEU:
7992 branch = "bgeu";
7993 break;
7994 case GTU:
7995 branch = "bgu";
7996 break;
7997 case LEU:
7998 branch = "bleu";
7999 break;
8000 case LTU:
8001 branch = "blu";
8002 break;
8003 default:
8004 gcc_unreachable ();
8006 strcpy (string, branch);
8008 spaces -= strlen (branch);
8009 p = strchr (string, '\0');
8011 /* Now add the annulling, the label, and a possible noop. */
8012 if (annul && ! far)
8014 strcpy (p, ",a");
8015 p += 2;
8016 spaces -= 2;
8019 if (TARGET_V9)
8021 rtx note;
8022 int v8 = 0;
8024 if (! far && insn && INSN_ADDRESSES_SET_P ())
8026 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8027 - INSN_ADDRESSES (INSN_UID (insn)));
8028 /* Leave some instructions for "slop". */
8029 if (delta < -260000 || delta >= 260000)
8030 v8 = 1;
8033 switch (mode)
8035 case E_CCmode:
8036 case E_CCNZmode:
8037 case E_CCCmode:
8038 case E_CCVmode:
8039 labelno = "%%icc, ";
8040 if (v8)
8041 labelno = "";
8042 break;
8043 case E_CCXmode:
8044 case E_CCXNZmode:
8045 case E_CCXCmode:
8046 case E_CCXVmode:
8047 labelno = "%%xcc, ";
8048 gcc_assert (!v8);
8049 break;
8050 case E_CCFPmode:
8051 case E_CCFPEmode:
8053 static char v9_fcc_labelno[] = "%%fccX, ";
8054 /* Set the char indicating the number of the fcc reg to use. */
8055 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8056 labelno = v9_fcc_labelno;
8057 if (v8)
8059 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8060 labelno = "";
8063 break;
8064 default:
8065 gcc_unreachable ();
8068 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8070 strcpy (p,
8071 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8072 >= profile_probability::even ()) ^ far)
8073 ? ",pt" : ",pn");
8074 p += 3;
8075 spaces -= 3;
8078 else
8079 labelno = "";
8081 if (spaces > 0)
8082 *p++ = '\t';
8083 else
8084 *p++ = ' ';
8085 strcpy (p, labelno);
8086 p = strchr (p, '\0');
8087 if (far)
8089 strcpy (p, ".+12\n\t nop\n\tb\t");
8090 /* Skip the next insn if requested or
8091 if we know that it will be a nop. */
8092 if (annul || ! final_sequence)
8093 p[3] = '6';
8094 p += 14;
8096 *p++ = '%';
8097 *p++ = 'l';
8098 *p++ = label + '0';
8099 *p++ = '%';
8100 *p++ = '#';
8101 *p = '\0';
8103 return string;
8106 /* Emit a library call comparison between floating point X and Y.
8107 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8108 Return the new operator to be used in the comparison sequence.
8110 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8111 values as arguments instead of the TFmode registers themselves,
8112 that's why we cannot call emit_float_lib_cmp. */
8115 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8117 const char *qpfunc;
8118 rtx slot0, slot1, result, tem, tem2, libfunc;
8119 machine_mode mode;
8120 enum rtx_code new_comparison;
8122 switch (comparison)
8124 case EQ:
8125 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8126 break;
8128 case NE:
8129 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8130 break;
8132 case GT:
8133 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8134 break;
8136 case GE:
8137 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8138 break;
8140 case LT:
8141 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8142 break;
8144 case LE:
8145 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8146 break;
8148 case ORDERED:
8149 case UNORDERED:
8150 case UNGT:
8151 case UNLT:
8152 case UNEQ:
8153 case UNGE:
8154 case UNLE:
8155 case LTGT:
8156 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8157 break;
8159 default:
8160 gcc_unreachable ();
8163 if (TARGET_ARCH64)
8165 if (MEM_P (x))
8167 tree expr = MEM_EXPR (x);
8168 if (expr)
8169 mark_addressable (expr);
8170 slot0 = x;
8172 else
8174 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8175 emit_move_insn (slot0, x);
8178 if (MEM_P (y))
8180 tree expr = MEM_EXPR (y);
8181 if (expr)
8182 mark_addressable (expr);
8183 slot1 = y;
8185 else
8187 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8188 emit_move_insn (slot1, y);
8191 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8192 emit_library_call (libfunc, LCT_NORMAL,
8193 DImode,
8194 XEXP (slot0, 0), Pmode,
8195 XEXP (slot1, 0), Pmode);
8196 mode = DImode;
8198 else
8200 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8201 emit_library_call (libfunc, LCT_NORMAL,
8202 SImode,
8203 x, TFmode, y, TFmode);
8204 mode = SImode;
8208 /* Immediately move the result of the libcall into a pseudo
8209 register so reload doesn't clobber the value if it needs
8210 the return register for a spill reg. */
8211 result = gen_reg_rtx (mode);
8212 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8214 switch (comparison)
8216 default:
8217 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8218 case ORDERED:
8219 case UNORDERED:
8220 new_comparison = (comparison == UNORDERED ? EQ : NE);
8221 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8222 case UNGT:
8223 case UNGE:
8224 new_comparison = (comparison == UNGT ? GT : NE);
8225 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8226 case UNLE:
8227 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8228 case UNLT:
8229 tem = gen_reg_rtx (mode);
8230 if (TARGET_ARCH32)
8231 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8232 else
8233 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8234 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8235 case UNEQ:
8236 case LTGT:
8237 tem = gen_reg_rtx (mode);
8238 if (TARGET_ARCH32)
8239 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8240 else
8241 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8242 tem2 = gen_reg_rtx (mode);
8243 if (TARGET_ARCH32)
8244 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8245 else
8246 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8247 new_comparison = (comparison == UNEQ ? EQ : NE);
8248 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8251 gcc_unreachable ();
8254 /* Generate an unsigned DImode to FP conversion. This is the same code
8255 optabs would emit if we didn't have TFmode patterns. */
8257 void
8258 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8260 rtx i0, i1, f0, in, out;
8262 out = operands[0];
8263 in = force_reg (DImode, operands[1]);
8264 rtx_code_label *neglab = gen_label_rtx ();
8265 rtx_code_label *donelab = gen_label_rtx ();
8266 i0 = gen_reg_rtx (DImode);
8267 i1 = gen_reg_rtx (DImode);
8268 f0 = gen_reg_rtx (mode);
8270 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8272 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8273 emit_jump_insn (gen_jump (donelab));
8274 emit_barrier ();
8276 emit_label (neglab);
8278 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8279 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8280 emit_insn (gen_iordi3 (i0, i0, i1));
8281 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8282 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8284 emit_label (donelab);
8287 /* Generate an FP to unsigned DImode conversion. This is the same code
8288 optabs would emit if we didn't have TFmode patterns. */
8290 void
8291 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8293 rtx i0, i1, f0, in, out, limit;
8295 out = operands[0];
8296 in = force_reg (mode, operands[1]);
8297 rtx_code_label *neglab = gen_label_rtx ();
8298 rtx_code_label *donelab = gen_label_rtx ();
8299 i0 = gen_reg_rtx (DImode);
8300 i1 = gen_reg_rtx (DImode);
8301 limit = gen_reg_rtx (mode);
8302 f0 = gen_reg_rtx (mode);
8304 emit_move_insn (limit,
8305 const_double_from_real_value (
8306 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8307 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8309 emit_insn (gen_rtx_SET (out,
8310 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8311 emit_jump_insn (gen_jump (donelab));
8312 emit_barrier ();
8314 emit_label (neglab);
8316 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8317 emit_insn (gen_rtx_SET (i0,
8318 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8319 emit_insn (gen_movdi (i1, const1_rtx));
8320 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8321 emit_insn (gen_xordi3 (out, i0, i1));
8323 emit_label (donelab);
8326 /* Return the string to output a compare and branch instruction to DEST.
8327 DEST is the destination insn (i.e. the label), INSN is the source,
8328 and OP is the conditional expression. */
8330 const char *
8331 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8333 machine_mode mode = GET_MODE (XEXP (op, 0));
8334 enum rtx_code code = GET_CODE (op);
8335 const char *cond_str, *tmpl;
8336 int far, emit_nop, len;
8337 static char string[64];
8338 char size_char;
8340 /* Compare and Branch is limited to +-2KB. If it is too far away,
8341 change
8343 cxbne X, Y, .LC30
8347 cxbe X, Y, .+16
8349 ba,pt xcc, .LC30
8350 nop */
8352 len = get_attr_length (insn);
8354 far = len == 4;
8355 emit_nop = len == 2;
8357 if (far)
8358 code = reverse_condition (code);
8360 size_char = ((mode == SImode) ? 'w' : 'x');
8362 switch (code)
8364 case NE:
8365 cond_str = "ne";
8366 break;
8368 case EQ:
8369 cond_str = "e";
8370 break;
8372 case GE:
8373 cond_str = "ge";
8374 break;
8376 case GT:
8377 cond_str = "g";
8378 break;
8380 case LE:
8381 cond_str = "le";
8382 break;
8384 case LT:
8385 cond_str = "l";
8386 break;
8388 case GEU:
8389 cond_str = "cc";
8390 break;
8392 case GTU:
8393 cond_str = "gu";
8394 break;
8396 case LEU:
8397 cond_str = "leu";
8398 break;
8400 case LTU:
8401 cond_str = "cs";
8402 break;
8404 default:
8405 gcc_unreachable ();
8408 if (far)
8410 int veryfar = 1, delta;
8412 if (INSN_ADDRESSES_SET_P ())
8414 delta = (INSN_ADDRESSES (INSN_UID (dest))
8415 - INSN_ADDRESSES (INSN_UID (insn)));
8416 /* Leave some instructions for "slop". */
8417 if (delta >= -260000 && delta < 260000)
8418 veryfar = 0;
8421 if (veryfar)
8422 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8423 else
8424 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8426 else
8428 if (emit_nop)
8429 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8430 else
8431 tmpl = "c%cb%s\t%%1, %%2, %%3";
8434 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8436 return string;
8439 /* Return the string to output a conditional branch to LABEL, testing
8440 register REG. LABEL is the operand number of the label; REG is the
8441 operand number of the reg. OP is the conditional expression. The mode
8442 of REG says what kind of comparison we made.
8444 DEST is the destination insn (i.e. the label), INSN is the source.
8446 REVERSED is nonzero if we should reverse the sense of the comparison.
8448 ANNUL is nonzero if we should generate an annulling branch. */
8450 const char *
8451 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8452 int annul, rtx_insn *insn)
8454 static char string[64];
8455 enum rtx_code code = GET_CODE (op);
8456 machine_mode mode = GET_MODE (XEXP (op, 0));
8457 rtx note;
8458 int far;
8459 char *p;
8461 /* branch on register are limited to +-128KB. If it is too far away,
8462 change
8464 brnz,pt %g1, .LC30
8468 brz,pn %g1, .+12
8470 ba,pt %xcc, .LC30
8474 brgez,a,pn %o1, .LC29
8478 brlz,pt %o1, .+16
8480 ba,pt %xcc, .LC29 */
8482 far = get_attr_length (insn) >= 3;
8484 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8485 if (reversed ^ far)
8486 code = reverse_condition (code);
8488 /* Only 64-bit versions of these instructions exist. */
8489 gcc_assert (mode == DImode);
8491 /* Start by writing the branch condition. */
8493 switch (code)
8495 case NE:
8496 strcpy (string, "brnz");
8497 break;
8499 case EQ:
8500 strcpy (string, "brz");
8501 break;
8503 case GE:
8504 strcpy (string, "brgez");
8505 break;
8507 case LT:
8508 strcpy (string, "brlz");
8509 break;
8511 case LE:
8512 strcpy (string, "brlez");
8513 break;
8515 case GT:
8516 strcpy (string, "brgz");
8517 break;
8519 default:
8520 gcc_unreachable ();
8523 p = strchr (string, '\0');
8525 /* Now add the annulling, reg, label, and nop. */
8526 if (annul && ! far)
8528 strcpy (p, ",a");
8529 p += 2;
8532 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8534 strcpy (p,
8535 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8536 >= profile_probability::even ()) ^ far)
8537 ? ",pt" : ",pn");
8538 p += 3;
8541 *p = p < string + 8 ? '\t' : ' ';
8542 p++;
8543 *p++ = '%';
8544 *p++ = '0' + reg;
8545 *p++ = ',';
8546 *p++ = ' ';
8547 if (far)
8549 int veryfar = 1, delta;
8551 if (INSN_ADDRESSES_SET_P ())
8553 delta = (INSN_ADDRESSES (INSN_UID (dest))
8554 - INSN_ADDRESSES (INSN_UID (insn)));
8555 /* Leave some instructions for "slop". */
8556 if (delta >= -260000 && delta < 260000)
8557 veryfar = 0;
8560 strcpy (p, ".+12\n\t nop\n\t");
8561 /* Skip the next insn if requested or
8562 if we know that it will be a nop. */
8563 if (annul || ! final_sequence)
8564 p[3] = '6';
8565 p += 12;
8566 if (veryfar)
8568 strcpy (p, "b\t");
8569 p += 2;
8571 else
8573 strcpy (p, "ba,pt\t%%xcc, ");
8574 p += 13;
8577 *p++ = '%';
8578 *p++ = 'l';
8579 *p++ = '0' + label;
8580 *p++ = '%';
8581 *p++ = '#';
8582 *p = '\0';
8584 return string;
8587 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8588 Such instructions cannot be used in the delay slot of return insn on v9.
8589 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8592 static int
8593 epilogue_renumber (register rtx *where, int test)
8595 register const char *fmt;
8596 register int i;
8597 register enum rtx_code code;
8599 if (*where == 0)
8600 return 0;
8602 code = GET_CODE (*where);
8604 switch (code)
8606 case REG:
8607 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8608 return 1;
8609 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8610 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8611 /* fallthrough */
8612 case SCRATCH:
8613 case CC0:
8614 case PC:
8615 case CONST_INT:
8616 case CONST_WIDE_INT:
8617 case CONST_DOUBLE:
8618 return 0;
8620 /* Do not replace the frame pointer with the stack pointer because
8621 it can cause the delayed instruction to load below the stack.
8622 This occurs when instructions like:
8624 (set (reg/i:SI 24 %i0)
8625 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8626 (const_int -20 [0xffffffec])) 0))
8628 are in the return delayed slot. */
8629 case PLUS:
8630 if (GET_CODE (XEXP (*where, 0)) == REG
8631 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8632 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8633 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8634 return 1;
8635 break;
8637 case MEM:
8638 if (SPARC_STACK_BIAS
8639 && GET_CODE (XEXP (*where, 0)) == REG
8640 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8641 return 1;
8642 break;
8644 default:
8645 break;
8648 fmt = GET_RTX_FORMAT (code);
8650 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8652 if (fmt[i] == 'E')
8654 register int j;
8655 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8656 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8657 return 1;
8659 else if (fmt[i] == 'e'
8660 && epilogue_renumber (&(XEXP (*where, i)), test))
8661 return 1;
8663 return 0;
8666 /* Leaf functions and non-leaf functions have different needs. */
8668 static const int
8669 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8671 static const int
8672 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8674 static const int *const reg_alloc_orders[] = {
8675 reg_leaf_alloc_order,
8676 reg_nonleaf_alloc_order};
8678 void
8679 order_regs_for_local_alloc (void)
8681 static int last_order_nonleaf = 1;
8683 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8685 last_order_nonleaf = !last_order_nonleaf;
8686 memcpy ((char *) reg_alloc_order,
8687 (const char *) reg_alloc_orders[last_order_nonleaf],
8688 FIRST_PSEUDO_REGISTER * sizeof (int));
8692 /* Return 1 if REG and MEM are legitimate enough to allow the various
8693 MEM<-->REG splits to be run. */
8696 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
8698 /* Punt if we are here by mistake. */
8699 gcc_assert (reload_completed);
8701 /* We must have an offsettable memory reference. */
8702 if (!offsettable_memref_p (mem))
8703 return 0;
8705 /* If we have legitimate args for ldd/std, we do not want
8706 the split to happen. */
8707 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
8708 return 0;
8710 /* Success. */
8711 return 1;
8714 /* Split a REG <-- MEM move into a pair of moves in MODE. */
8716 void
8717 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
8719 rtx high_part = gen_highpart (mode, dest);
8720 rtx low_part = gen_lowpart (mode, dest);
8721 rtx word0 = adjust_address (src, mode, 0);
8722 rtx word1 = adjust_address (src, mode, 4);
8724 if (reg_overlap_mentioned_p (high_part, word1))
8726 emit_move_insn_1 (low_part, word1);
8727 emit_move_insn_1 (high_part, word0);
8729 else
8731 emit_move_insn_1 (high_part, word0);
8732 emit_move_insn_1 (low_part, word1);
8736 /* Split a MEM <-- REG move into a pair of moves in MODE. */
8738 void
8739 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
8741 rtx word0 = adjust_address (dest, mode, 0);
8742 rtx word1 = adjust_address (dest, mode, 4);
8743 rtx high_part = gen_highpart (mode, src);
8744 rtx low_part = gen_lowpart (mode, src);
8746 emit_move_insn_1 (word0, high_part);
8747 emit_move_insn_1 (word1, low_part);
8750 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
8753 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
8755 /* Punt if we are here by mistake. */
8756 gcc_assert (reload_completed);
8758 if (GET_CODE (reg1) == SUBREG)
8759 reg1 = SUBREG_REG (reg1);
8760 if (GET_CODE (reg1) != REG)
8761 return 0;
8762 const int regno1 = REGNO (reg1);
8764 if (GET_CODE (reg2) == SUBREG)
8765 reg2 = SUBREG_REG (reg2);
8766 if (GET_CODE (reg2) != REG)
8767 return 0;
8768 const int regno2 = REGNO (reg2);
8770 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8771 return 1;
8773 if (TARGET_VIS3)
8775 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8776 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8777 return 1;
8780 return 0;
8783 /* Split a REG <--> REG move into a pair of moves in MODE. */
8785 void
8786 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
8788 rtx dest1 = gen_highpart (mode, dest);
8789 rtx dest2 = gen_lowpart (mode, dest);
8790 rtx src1 = gen_highpart (mode, src);
8791 rtx src2 = gen_lowpart (mode, src);
8793 /* Now emit using the real source and destination we found, swapping
8794 the order if we detect overlap. */
8795 if (reg_overlap_mentioned_p (dest1, src2))
8797 emit_move_insn_1 (dest2, src2);
8798 emit_move_insn_1 (dest1, src1);
8800 else
8802 emit_move_insn_1 (dest1, src1);
8803 emit_move_insn_1 (dest2, src2);
8807 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8808 This makes them candidates for using ldd and std insns.
8810 Note reg1 and reg2 *must* be hard registers. */
8813 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8815 /* We might have been passed a SUBREG. */
8816 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8817 return 0;
8819 if (REGNO (reg1) % 2 != 0)
8820 return 0;
8822 /* Integer ldd is deprecated in SPARC V9 */
8823 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8824 return 0;
8826 return (REGNO (reg1) == REGNO (reg2) - 1);
8829 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8830 an ldd or std insn.
8832 This can only happen when addr1 and addr2, the addresses in mem1
8833 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8834 addr1 must also be aligned on a 64-bit boundary.
8836 Also iff dependent_reg_rtx is not null it should not be used to
8837 compute the address for mem1, i.e. we cannot optimize a sequence
8838 like:
8839 ld [%o0], %o0
8840 ld [%o0 + 4], %o1
8842 ldd [%o0], %o0
8843 nor:
8844 ld [%g3 + 4], %g3
8845 ld [%g3], %g2
8847 ldd [%g3], %g2
8849 But, note that the transformation from:
8850 ld [%g2 + 4], %g3
8851 ld [%g2], %g2
8853 ldd [%g2], %g2
8854 is perfectly fine. Thus, the peephole2 patterns always pass us
8855 the destination register of the first load, never the second one.
8857 For stores we don't have a similar problem, so dependent_reg_rtx is
8858 NULL_RTX. */
8861 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8863 rtx addr1, addr2;
8864 unsigned int reg1;
8865 HOST_WIDE_INT offset1;
8867 /* The mems cannot be volatile. */
8868 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8869 return 0;
8871 /* MEM1 should be aligned on a 64-bit boundary. */
8872 if (MEM_ALIGN (mem1) < 64)
8873 return 0;
8875 addr1 = XEXP (mem1, 0);
8876 addr2 = XEXP (mem2, 0);
8878 /* Extract a register number and offset (if used) from the first addr. */
8879 if (GET_CODE (addr1) == PLUS)
8881 /* If not a REG, return zero. */
8882 if (GET_CODE (XEXP (addr1, 0)) != REG)
8883 return 0;
8884 else
8886 reg1 = REGNO (XEXP (addr1, 0));
8887 /* The offset must be constant! */
8888 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8889 return 0;
8890 offset1 = INTVAL (XEXP (addr1, 1));
8893 else if (GET_CODE (addr1) != REG)
8894 return 0;
8895 else
8897 reg1 = REGNO (addr1);
8898 /* This was a simple (mem (reg)) expression. Offset is 0. */
8899 offset1 = 0;
8902 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8903 if (GET_CODE (addr2) != PLUS)
8904 return 0;
8906 if (GET_CODE (XEXP (addr2, 0)) != REG
8907 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8908 return 0;
8910 if (reg1 != REGNO (XEXP (addr2, 0)))
8911 return 0;
8913 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8914 return 0;
8916 /* The first offset must be evenly divisible by 8 to ensure the
8917 address is 64-bit aligned. */
8918 if (offset1 % 8 != 0)
8919 return 0;
8921 /* The offset for the second addr must be 4 more than the first addr. */
8922 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8923 return 0;
8925 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8926 instructions. */
8927 return 1;
8930 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8933 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
8935 rtx x = widen_memory_access (mem1, mode, 0);
8936 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8937 return x;
8940 /* Return 1 if reg is a pseudo, or is the first register in
8941 a hard register pair. This makes it suitable for use in
8942 ldd and std insns. */
8945 register_ok_for_ldd (rtx reg)
8947 /* We might have been passed a SUBREG. */
8948 if (!REG_P (reg))
8949 return 0;
8951 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8952 return (REGNO (reg) % 2 == 0);
8954 return 1;
8957 /* Return 1 if OP, a MEM, has an address which is known to be
8958 aligned to an 8-byte boundary. */
8961 memory_ok_for_ldd (rtx op)
8963 /* In 64-bit mode, we assume that the address is word-aligned. */
8964 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8965 return 0;
8967 if (! can_create_pseudo_p ()
8968 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8969 return 0;
8971 return 1;
8974 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8976 static bool
8977 sparc_print_operand_punct_valid_p (unsigned char code)
8979 if (code == '#'
8980 || code == '*'
8981 || code == '('
8982 || code == ')'
8983 || code == '_'
8984 || code == '&')
8985 return true;
8987 return false;
8990 /* Implement TARGET_PRINT_OPERAND.
8991 Print operand X (an rtx) in assembler syntax to file FILE.
8992 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8993 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8995 static void
8996 sparc_print_operand (FILE *file, rtx x, int code)
8998 const char *s;
9000 switch (code)
9002 case '#':
9003 /* Output an insn in a delay slot. */
9004 if (final_sequence)
9005 sparc_indent_opcode = 1;
9006 else
9007 fputs ("\n\t nop", file);
9008 return;
9009 case '*':
9010 /* Output an annul flag if there's nothing for the delay slot and we
9011 are optimizing. This is always used with '(' below.
9012 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9013 this is a dbx bug. So, we only do this when optimizing.
9014 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9015 Always emit a nop in case the next instruction is a branch. */
9016 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9017 fputs (",a", file);
9018 return;
9019 case '(':
9020 /* Output a 'nop' if there's nothing for the delay slot and we are
9021 not optimizing. This is always used with '*' above. */
9022 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9023 fputs ("\n\t nop", file);
9024 else if (final_sequence)
9025 sparc_indent_opcode = 1;
9026 return;
9027 case ')':
9028 /* Output the right displacement from the saved PC on function return.
9029 The caller may have placed an "unimp" insn immediately after the call
9030 so we have to account for it. This insn is used in the 32-bit ABI
9031 when calling a function that returns a non zero-sized structure. The
9032 64-bit ABI doesn't have it. Be careful to have this test be the same
9033 as that for the call. The exception is when sparc_std_struct_return
9034 is enabled, the psABI is followed exactly and the adjustment is made
9035 by the code in sparc_struct_value_rtx. The call emitted is the same
9036 when sparc_std_struct_return is enabled. */
9037 if (!TARGET_ARCH64
9038 && cfun->returns_struct
9039 && !sparc_std_struct_return
9040 && DECL_SIZE (DECL_RESULT (current_function_decl))
9041 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9042 == INTEGER_CST
9043 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9044 fputs ("12", file);
9045 else
9046 fputc ('8', file);
9047 return;
9048 case '_':
9049 /* Output the Embedded Medium/Anywhere code model base register. */
9050 fputs (EMBMEDANY_BASE_REG, file);
9051 return;
9052 case '&':
9053 /* Print some local dynamic TLS name. */
9054 if (const char *name = get_some_local_dynamic_name ())
9055 assemble_name (file, name);
9056 else
9057 output_operand_lossage ("'%%&' used without any "
9058 "local dynamic TLS references");
9059 return;
9061 case 'Y':
9062 /* Adjust the operand to take into account a RESTORE operation. */
9063 if (GET_CODE (x) == CONST_INT)
9064 break;
9065 else if (GET_CODE (x) != REG)
9066 output_operand_lossage ("invalid %%Y operand");
9067 else if (REGNO (x) < 8)
9068 fputs (reg_names[REGNO (x)], file);
9069 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9070 fputs (reg_names[REGNO (x)-16], file);
9071 else
9072 output_operand_lossage ("invalid %%Y operand");
9073 return;
9074 case 'L':
9075 /* Print out the low order register name of a register pair. */
9076 if (WORDS_BIG_ENDIAN)
9077 fputs (reg_names[REGNO (x)+1], file);
9078 else
9079 fputs (reg_names[REGNO (x)], file);
9080 return;
9081 case 'H':
9082 /* Print out the high order register name of a register pair. */
9083 if (WORDS_BIG_ENDIAN)
9084 fputs (reg_names[REGNO (x)], file);
9085 else
9086 fputs (reg_names[REGNO (x)+1], file);
9087 return;
9088 case 'R':
9089 /* Print out the second register name of a register pair or quad.
9090 I.e., R (%o0) => %o1. */
9091 fputs (reg_names[REGNO (x)+1], file);
9092 return;
9093 case 'S':
9094 /* Print out the third register name of a register quad.
9095 I.e., S (%o0) => %o2. */
9096 fputs (reg_names[REGNO (x)+2], file);
9097 return;
9098 case 'T':
9099 /* Print out the fourth register name of a register quad.
9100 I.e., T (%o0) => %o3. */
9101 fputs (reg_names[REGNO (x)+3], file);
9102 return;
9103 case 'x':
9104 /* Print a condition code register. */
9105 if (REGNO (x) == SPARC_ICC_REG)
9107 switch (GET_MODE (x))
9109 case E_CCmode:
9110 case E_CCNZmode:
9111 case E_CCCmode:
9112 case E_CCVmode:
9113 s = "%icc";
9114 break;
9115 case E_CCXmode:
9116 case E_CCXNZmode:
9117 case E_CCXCmode:
9118 case E_CCXVmode:
9119 s = "%xcc";
9120 break;
9121 default:
9122 gcc_unreachable ();
9124 fputs (s, file);
9126 else
9127 /* %fccN register */
9128 fputs (reg_names[REGNO (x)], file);
9129 return;
9130 case 'm':
9131 /* Print the operand's address only. */
9132 output_address (GET_MODE (x), XEXP (x, 0));
9133 return;
9134 case 'r':
9135 /* In this case we need a register. Use %g0 if the
9136 operand is const0_rtx. */
9137 if (x == const0_rtx
9138 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9140 fputs ("%g0", file);
9141 return;
9143 else
9144 break;
9146 case 'A':
9147 switch (GET_CODE (x))
9149 case IOR:
9150 s = "or";
9151 break;
9152 case AND:
9153 s = "and";
9154 break;
9155 case XOR:
9156 s = "xor";
9157 break;
9158 default:
9159 output_operand_lossage ("invalid %%A operand");
9160 s = "";
9161 break;
9163 fputs (s, file);
9164 return;
9166 case 'B':
9167 switch (GET_CODE (x))
9169 case IOR:
9170 s = "orn";
9171 break;
9172 case AND:
9173 s = "andn";
9174 break;
9175 case XOR:
9176 s = "xnor";
9177 break;
9178 default:
9179 output_operand_lossage ("invalid %%B operand");
9180 s = "";
9181 break;
9183 fputs (s, file);
9184 return;
9186 /* This is used by the conditional move instructions. */
9187 case 'C':
9189 machine_mode mode = GET_MODE (XEXP (x, 0));
9190 switch (GET_CODE (x))
9192 case NE:
9193 if (mode == CCVmode || mode == CCXVmode)
9194 s = "vs";
9195 else
9196 s = "ne";
9197 break;
9198 case EQ:
9199 if (mode == CCVmode || mode == CCXVmode)
9200 s = "vc";
9201 else
9202 s = "e";
9203 break;
9204 case GE:
9205 if (mode == CCNZmode || mode == CCXNZmode)
9206 s = "pos";
9207 else
9208 s = "ge";
9209 break;
9210 case GT:
9211 s = "g";
9212 break;
9213 case LE:
9214 s = "le";
9215 break;
9216 case LT:
9217 if (mode == CCNZmode || mode == CCXNZmode)
9218 s = "neg";
9219 else
9220 s = "l";
9221 break;
9222 case GEU:
9223 s = "geu";
9224 break;
9225 case GTU:
9226 s = "gu";
9227 break;
9228 case LEU:
9229 s = "leu";
9230 break;
9231 case LTU:
9232 s = "lu";
9233 break;
9234 case LTGT:
9235 s = "lg";
9236 break;
9237 case UNORDERED:
9238 s = "u";
9239 break;
9240 case ORDERED:
9241 s = "o";
9242 break;
9243 case UNLT:
9244 s = "ul";
9245 break;
9246 case UNLE:
9247 s = "ule";
9248 break;
9249 case UNGT:
9250 s = "ug";
9251 break;
9252 case UNGE:
9253 s = "uge"
9254 ; break;
9255 case UNEQ:
9256 s = "ue";
9257 break;
9258 default:
9259 output_operand_lossage ("invalid %%C operand");
9260 s = "";
9261 break;
9263 fputs (s, file);
9264 return;
9267 /* This are used by the movr instruction pattern. */
9268 case 'D':
9270 switch (GET_CODE (x))
9272 case NE:
9273 s = "ne";
9274 break;
9275 case EQ:
9276 s = "e";
9277 break;
9278 case GE:
9279 s = "gez";
9280 break;
9281 case LT:
9282 s = "lz";
9283 break;
9284 case LE:
9285 s = "lez";
9286 break;
9287 case GT:
9288 s = "gz";
9289 break;
9290 default:
9291 output_operand_lossage ("invalid %%D operand");
9292 s = "";
9293 break;
9295 fputs (s, file);
9296 return;
9299 case 'b':
9301 /* Print a sign-extended character. */
9302 int i = trunc_int_for_mode (INTVAL (x), QImode);
9303 fprintf (file, "%d", i);
9304 return;
9307 case 'f':
9308 /* Operand must be a MEM; write its address. */
9309 if (GET_CODE (x) != MEM)
9310 output_operand_lossage ("invalid %%f operand");
9311 output_address (GET_MODE (x), XEXP (x, 0));
9312 return;
9314 case 's':
9316 /* Print a sign-extended 32-bit value. */
9317 HOST_WIDE_INT i;
9318 if (GET_CODE(x) == CONST_INT)
9319 i = INTVAL (x);
9320 else
9322 output_operand_lossage ("invalid %%s operand");
9323 return;
9325 i = trunc_int_for_mode (i, SImode);
9326 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9327 return;
9330 case 0:
9331 /* Do nothing special. */
9332 break;
9334 default:
9335 /* Undocumented flag. */
9336 output_operand_lossage ("invalid operand output code");
9339 if (GET_CODE (x) == REG)
9340 fputs (reg_names[REGNO (x)], file);
9341 else if (GET_CODE (x) == MEM)
9343 fputc ('[', file);
9344 /* Poor Sun assembler doesn't understand absolute addressing. */
9345 if (CONSTANT_P (XEXP (x, 0)))
9346 fputs ("%g0+", file);
9347 output_address (GET_MODE (x), XEXP (x, 0));
9348 fputc (']', file);
9350 else if (GET_CODE (x) == HIGH)
9352 fputs ("%hi(", file);
9353 output_addr_const (file, XEXP (x, 0));
9354 fputc (')', file);
9356 else if (GET_CODE (x) == LO_SUM)
9358 sparc_print_operand (file, XEXP (x, 0), 0);
9359 if (TARGET_CM_MEDMID)
9360 fputs ("+%l44(", file);
9361 else
9362 fputs ("+%lo(", file);
9363 output_addr_const (file, XEXP (x, 1));
9364 fputc (')', file);
9366 else if (GET_CODE (x) == CONST_DOUBLE)
9367 output_operand_lossage ("floating-point constant not a valid immediate operand");
9368 else
9369 output_addr_const (file, x);
9372 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9374 static void
9375 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9377 register rtx base, index = 0;
9378 int offset = 0;
9379 register rtx addr = x;
9381 if (REG_P (addr))
9382 fputs (reg_names[REGNO (addr)], file);
9383 else if (GET_CODE (addr) == PLUS)
9385 if (CONST_INT_P (XEXP (addr, 0)))
9386 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9387 else if (CONST_INT_P (XEXP (addr, 1)))
9388 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9389 else
9390 base = XEXP (addr, 0), index = XEXP (addr, 1);
9391 if (GET_CODE (base) == LO_SUM)
9393 gcc_assert (USE_AS_OFFSETABLE_LO10
9394 && TARGET_ARCH64
9395 && ! TARGET_CM_MEDMID);
9396 output_operand (XEXP (base, 0), 0);
9397 fputs ("+%lo(", file);
9398 output_address (VOIDmode, XEXP (base, 1));
9399 fprintf (file, ")+%d", offset);
9401 else
9403 fputs (reg_names[REGNO (base)], file);
9404 if (index == 0)
9405 fprintf (file, "%+d", offset);
9406 else if (REG_P (index))
9407 fprintf (file, "+%s", reg_names[REGNO (index)]);
9408 else if (GET_CODE (index) == SYMBOL_REF
9409 || GET_CODE (index) == LABEL_REF
9410 || GET_CODE (index) == CONST)
9411 fputc ('+', file), output_addr_const (file, index);
9412 else gcc_unreachable ();
9415 else if (GET_CODE (addr) == MINUS
9416 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9418 output_addr_const (file, XEXP (addr, 0));
9419 fputs ("-(", file);
9420 output_addr_const (file, XEXP (addr, 1));
9421 fputs ("-.)", file);
9423 else if (GET_CODE (addr) == LO_SUM)
9425 output_operand (XEXP (addr, 0), 0);
9426 if (TARGET_CM_MEDMID)
9427 fputs ("+%l44(", file);
9428 else
9429 fputs ("+%lo(", file);
9430 output_address (VOIDmode, XEXP (addr, 1));
9431 fputc (')', file);
9433 else if (flag_pic
9434 && GET_CODE (addr) == CONST
9435 && GET_CODE (XEXP (addr, 0)) == MINUS
9436 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9437 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9438 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9440 addr = XEXP (addr, 0);
9441 output_addr_const (file, XEXP (addr, 0));
9442 /* Group the args of the second CONST in parenthesis. */
9443 fputs ("-(", file);
9444 /* Skip past the second CONST--it does nothing for us. */
9445 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9446 /* Close the parenthesis. */
9447 fputc (')', file);
9449 else
9451 output_addr_const (file, addr);
9455 /* Target hook for assembling integer objects. The sparc version has
9456 special handling for aligned DI-mode objects. */
9458 static bool
9459 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9461 /* ??? We only output .xword's for symbols and only then in environments
9462 where the assembler can handle them. */
9463 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9465 if (TARGET_V9)
9467 assemble_integer_with_op ("\t.xword\t", x);
9468 return true;
9470 else
9472 assemble_aligned_integer (4, const0_rtx);
9473 assemble_aligned_integer (4, x);
9474 return true;
9477 return default_assemble_integer (x, size, aligned_p);
9480 /* Return the value of a code used in the .proc pseudo-op that says
9481 what kind of result this function returns. For non-C types, we pick
9482 the closest C type. */
9484 #ifndef SHORT_TYPE_SIZE
9485 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9486 #endif
9488 #ifndef INT_TYPE_SIZE
9489 #define INT_TYPE_SIZE BITS_PER_WORD
9490 #endif
9492 #ifndef LONG_TYPE_SIZE
9493 #define LONG_TYPE_SIZE BITS_PER_WORD
9494 #endif
9496 #ifndef LONG_LONG_TYPE_SIZE
9497 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9498 #endif
9500 #ifndef FLOAT_TYPE_SIZE
9501 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9502 #endif
9504 #ifndef DOUBLE_TYPE_SIZE
9505 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9506 #endif
9508 #ifndef LONG_DOUBLE_TYPE_SIZE
9509 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9510 #endif
9512 unsigned long
9513 sparc_type_code (register tree type)
9515 register unsigned long qualifiers = 0;
9516 register unsigned shift;
9518 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9519 setting more, since some assemblers will give an error for this. Also,
9520 we must be careful to avoid shifts of 32 bits or more to avoid getting
9521 unpredictable results. */
9523 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9525 switch (TREE_CODE (type))
9527 case ERROR_MARK:
9528 return qualifiers;
9530 case ARRAY_TYPE:
9531 qualifiers |= (3 << shift);
9532 break;
9534 case FUNCTION_TYPE:
9535 case METHOD_TYPE:
9536 qualifiers |= (2 << shift);
9537 break;
9539 case POINTER_TYPE:
9540 case REFERENCE_TYPE:
9541 case OFFSET_TYPE:
9542 qualifiers |= (1 << shift);
9543 break;
9545 case RECORD_TYPE:
9546 return (qualifiers | 8);
9548 case UNION_TYPE:
9549 case QUAL_UNION_TYPE:
9550 return (qualifiers | 9);
9552 case ENUMERAL_TYPE:
9553 return (qualifiers | 10);
9555 case VOID_TYPE:
9556 return (qualifiers | 16);
9558 case INTEGER_TYPE:
9559 /* If this is a range type, consider it to be the underlying
9560 type. */
9561 if (TREE_TYPE (type) != 0)
9562 break;
9564 /* Carefully distinguish all the standard types of C,
9565 without messing up if the language is not C. We do this by
9566 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9567 look at both the names and the above fields, but that's redundant.
9568 Any type whose size is between two C types will be considered
9569 to be the wider of the two types. Also, we do not have a
9570 special code to use for "long long", so anything wider than
9571 long is treated the same. Note that we can't distinguish
9572 between "int" and "long" in this code if they are the same
9573 size, but that's fine, since neither can the assembler. */
9575 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9576 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9578 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9579 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9581 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9582 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9584 else
9585 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9587 case REAL_TYPE:
9588 /* If this is a range type, consider it to be the underlying
9589 type. */
9590 if (TREE_TYPE (type) != 0)
9591 break;
9593 /* Carefully distinguish all the standard types of C,
9594 without messing up if the language is not C. */
9596 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9597 return (qualifiers | 6);
9599 else
9600 return (qualifiers | 7);
9602 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9603 /* ??? We need to distinguish between double and float complex types,
9604 but I don't know how yet because I can't reach this code from
9605 existing front-ends. */
9606 return (qualifiers | 7); /* Who knows? */
9608 case VECTOR_TYPE:
9609 case BOOLEAN_TYPE: /* Boolean truth value type. */
9610 case LANG_TYPE:
9611 case NULLPTR_TYPE:
9612 return qualifiers;
9614 default:
9615 gcc_unreachable (); /* Not a type! */
9619 return qualifiers;
9622 /* Nested function support. */
9624 /* Emit RTL insns to initialize the variable parts of a trampoline.
9625 FNADDR is an RTX for the address of the function's pure code.
9626 CXT is an RTX for the static chain value for the function.
9628 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9629 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9630 (to store insns). This is a bit excessive. Perhaps a different
9631 mechanism would be better here.
9633 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9635 static void
9636 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9638 /* SPARC 32-bit trampoline:
9640 sethi %hi(fn), %g1
9641 sethi %hi(static), %g2
9642 jmp %g1+%lo(fn)
9643 or %g2, %lo(static), %g2
9645 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9646 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9649 emit_move_insn
9650 (adjust_address (m_tramp, SImode, 0),
9651 expand_binop (SImode, ior_optab,
9652 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9653 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9654 NULL_RTX, 1, OPTAB_DIRECT));
9656 emit_move_insn
9657 (adjust_address (m_tramp, SImode, 4),
9658 expand_binop (SImode, ior_optab,
9659 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9660 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9661 NULL_RTX, 1, OPTAB_DIRECT));
9663 emit_move_insn
9664 (adjust_address (m_tramp, SImode, 8),
9665 expand_binop (SImode, ior_optab,
9666 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9667 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9668 NULL_RTX, 1, OPTAB_DIRECT));
9670 emit_move_insn
9671 (adjust_address (m_tramp, SImode, 12),
9672 expand_binop (SImode, ior_optab,
9673 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9674 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9675 NULL_RTX, 1, OPTAB_DIRECT));
9677 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9678 aligned on a 16 byte boundary so one flush clears it all. */
9679 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9680 if (sparc_cpu != PROCESSOR_ULTRASPARC
9681 && sparc_cpu != PROCESSOR_ULTRASPARC3
9682 && sparc_cpu != PROCESSOR_NIAGARA
9683 && sparc_cpu != PROCESSOR_NIAGARA2
9684 && sparc_cpu != PROCESSOR_NIAGARA3
9685 && sparc_cpu != PROCESSOR_NIAGARA4
9686 && sparc_cpu != PROCESSOR_NIAGARA7
9687 && sparc_cpu != PROCESSOR_M8)
9688 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9690 /* Call __enable_execute_stack after writing onto the stack to make sure
9691 the stack address is accessible. */
9692 #ifdef HAVE_ENABLE_EXECUTE_STACK
9693 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9694 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9695 #endif
9699 /* The 64-bit version is simpler because it makes more sense to load the
9700 values as "immediate" data out of the trampoline. It's also easier since
9701 we can read the PC without clobbering a register. */
9703 static void
9704 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9706 /* SPARC 64-bit trampoline:
9708 rd %pc, %g1
9709 ldx [%g1+24], %g5
9710 jmp %g5
9711 ldx [%g1+16], %g5
9712 +16 bytes data
9715 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9716 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9717 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9718 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9719 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9720 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9721 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9722 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9723 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9724 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9725 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9727 if (sparc_cpu != PROCESSOR_ULTRASPARC
9728 && sparc_cpu != PROCESSOR_ULTRASPARC3
9729 && sparc_cpu != PROCESSOR_NIAGARA
9730 && sparc_cpu != PROCESSOR_NIAGARA2
9731 && sparc_cpu != PROCESSOR_NIAGARA3
9732 && sparc_cpu != PROCESSOR_NIAGARA4
9733 && sparc_cpu != PROCESSOR_NIAGARA7
9734 && sparc_cpu != PROCESSOR_M8)
9735 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9737 /* Call __enable_execute_stack after writing onto the stack to make sure
9738 the stack address is accessible. */
9739 #ifdef HAVE_ENABLE_EXECUTE_STACK
9740 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9741 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9742 #endif
9745 /* Worker for TARGET_TRAMPOLINE_INIT. */
9747 static void
9748 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9750 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9751 cxt = force_reg (Pmode, cxt);
9752 if (TARGET_ARCH64)
9753 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9754 else
9755 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9758 /* Adjust the cost of a scheduling dependency. Return the new cost of
9759 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9761 static int
9762 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
9763 int cost)
9765 enum attr_type insn_type;
9767 if (recog_memoized (insn) < 0)
9768 return cost;
9770 insn_type = get_attr_type (insn);
9772 if (dep_type == 0)
9774 /* Data dependency; DEP_INSN writes a register that INSN reads some
9775 cycles later. */
9777 /* if a load, then the dependence must be on the memory address;
9778 add an extra "cycle". Note that the cost could be two cycles
9779 if the reg was written late in an instruction group; we ca not tell
9780 here. */
9781 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9782 return cost + 3;
9784 /* Get the delay only if the address of the store is the dependence. */
9785 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9787 rtx pat = PATTERN(insn);
9788 rtx dep_pat = PATTERN (dep_insn);
9790 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9791 return cost; /* This should not happen! */
9793 /* The dependency between the two instructions was on the data that
9794 is being stored. Assume that this implies that the address of the
9795 store is not dependent. */
9796 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9797 return cost;
9799 return cost + 3; /* An approximation. */
9802 /* A shift instruction cannot receive its data from an instruction
9803 in the same cycle; add a one cycle penalty. */
9804 if (insn_type == TYPE_SHIFT)
9805 return cost + 3; /* Split before cascade into shift. */
9807 else
9809 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9810 INSN writes some cycles later. */
9812 /* These are only significant for the fpu unit; writing a fp reg before
9813 the fpu has finished with it stalls the processor. */
9815 /* Reusing an integer register causes no problems. */
9816 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9817 return 0;
9820 return cost;
9823 static int
9824 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
9825 int cost)
9827 enum attr_type insn_type, dep_type;
9828 rtx pat = PATTERN(insn);
9829 rtx dep_pat = PATTERN (dep_insn);
9831 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9832 return cost;
9834 insn_type = get_attr_type (insn);
9835 dep_type = get_attr_type (dep_insn);
9837 switch (dtype)
9839 case 0:
9840 /* Data dependency; DEP_INSN writes a register that INSN reads some
9841 cycles later. */
9843 switch (insn_type)
9845 case TYPE_STORE:
9846 case TYPE_FPSTORE:
9847 /* Get the delay iff the address of the store is the dependence. */
9848 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9849 return cost;
9851 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9852 return cost;
9853 return cost + 3;
9855 case TYPE_LOAD:
9856 case TYPE_SLOAD:
9857 case TYPE_FPLOAD:
9858 /* If a load, then the dependence must be on the memory address. If
9859 the addresses aren't equal, then it might be a false dependency */
9860 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9862 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9863 || GET_CODE (SET_DEST (dep_pat)) != MEM
9864 || GET_CODE (SET_SRC (pat)) != MEM
9865 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9866 XEXP (SET_SRC (pat), 0)))
9867 return cost + 2;
9869 return cost + 8;
9871 break;
9873 case TYPE_BRANCH:
9874 /* Compare to branch latency is 0. There is no benefit from
9875 separating compare and branch. */
9876 if (dep_type == TYPE_COMPARE)
9877 return 0;
9878 /* Floating point compare to branch latency is less than
9879 compare to conditional move. */
9880 if (dep_type == TYPE_FPCMP)
9881 return cost - 1;
9882 break;
9883 default:
9884 break;
9886 break;
9888 case REG_DEP_ANTI:
9889 /* Anti-dependencies only penalize the fpu unit. */
9890 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9891 return 0;
9892 break;
9894 default:
9895 break;
9898 return cost;
9901 static int
9902 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
9903 unsigned int)
9905 switch (sparc_cpu)
9907 case PROCESSOR_SUPERSPARC:
9908 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
9909 break;
9910 case PROCESSOR_HYPERSPARC:
9911 case PROCESSOR_SPARCLITE86X:
9912 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
9913 break;
9914 default:
9915 break;
9917 return cost;
9920 static void
9921 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9922 int sched_verbose ATTRIBUTE_UNUSED,
9923 int max_ready ATTRIBUTE_UNUSED)
9926 static int
9927 sparc_use_sched_lookahead (void)
9929 if (sparc_cpu == PROCESSOR_NIAGARA
9930 || sparc_cpu == PROCESSOR_NIAGARA2
9931 || sparc_cpu == PROCESSOR_NIAGARA3)
9932 return 0;
9933 if (sparc_cpu == PROCESSOR_NIAGARA4
9934 || sparc_cpu == PROCESSOR_NIAGARA7
9935 || sparc_cpu == PROCESSOR_M8)
9936 return 2;
9937 if (sparc_cpu == PROCESSOR_ULTRASPARC
9938 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9939 return 4;
9940 if ((1 << sparc_cpu) &
9941 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9942 (1 << PROCESSOR_SPARCLITE86X)))
9943 return 3;
9944 return 0;
9947 static int
9948 sparc_issue_rate (void)
9950 switch (sparc_cpu)
9952 case PROCESSOR_NIAGARA:
9953 case PROCESSOR_NIAGARA2:
9954 case PROCESSOR_NIAGARA3:
9955 default:
9956 return 1;
9957 case PROCESSOR_NIAGARA4:
9958 case PROCESSOR_NIAGARA7:
9959 case PROCESSOR_V9:
9960 /* Assume V9 processors are capable of at least dual-issue. */
9961 return 2;
9962 case PROCESSOR_SUPERSPARC:
9963 return 3;
9964 case PROCESSOR_HYPERSPARC:
9965 case PROCESSOR_SPARCLITE86X:
9966 return 2;
9967 case PROCESSOR_ULTRASPARC:
9968 case PROCESSOR_ULTRASPARC3:
9969 case PROCESSOR_M8:
9970 return 4;
9974 static int
9975 set_extends (rtx_insn *insn)
9977 register rtx pat = PATTERN (insn);
9979 switch (GET_CODE (SET_SRC (pat)))
9981 /* Load and some shift instructions zero extend. */
9982 case MEM:
9983 case ZERO_EXTEND:
9984 /* sethi clears the high bits */
9985 case HIGH:
9986 /* LO_SUM is used with sethi. sethi cleared the high
9987 bits and the values used with lo_sum are positive */
9988 case LO_SUM:
9989 /* Store flag stores 0 or 1 */
9990 case LT: case LTU:
9991 case GT: case GTU:
9992 case LE: case LEU:
9993 case GE: case GEU:
9994 case EQ:
9995 case NE:
9996 return 1;
9997 case AND:
9999 rtx op0 = XEXP (SET_SRC (pat), 0);
10000 rtx op1 = XEXP (SET_SRC (pat), 1);
10001 if (GET_CODE (op1) == CONST_INT)
10002 return INTVAL (op1) >= 0;
10003 if (GET_CODE (op0) != REG)
10004 return 0;
10005 if (sparc_check_64 (op0, insn) == 1)
10006 return 1;
10007 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10009 case IOR:
10010 case XOR:
10012 rtx op0 = XEXP (SET_SRC (pat), 0);
10013 rtx op1 = XEXP (SET_SRC (pat), 1);
10014 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10015 return 0;
10016 if (GET_CODE (op1) == CONST_INT)
10017 return INTVAL (op1) >= 0;
10018 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10020 case LSHIFTRT:
10021 return GET_MODE (SET_SRC (pat)) == SImode;
10022 /* Positive integers leave the high bits zero. */
10023 case CONST_INT:
10024 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10025 case ASHIFTRT:
10026 case SIGN_EXTEND:
10027 return - (GET_MODE (SET_SRC (pat)) == SImode);
10028 case REG:
10029 return sparc_check_64 (SET_SRC (pat), insn);
10030 default:
10031 return 0;
10035 /* We _ought_ to have only one kind per function, but... */
10036 static GTY(()) rtx sparc_addr_diff_list;
10037 static GTY(()) rtx sparc_addr_list;
10039 void
10040 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10042 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10043 if (diff)
10044 sparc_addr_diff_list
10045 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10046 else
10047 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10050 static void
10051 sparc_output_addr_vec (rtx vec)
10053 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10054 int idx, vlen = XVECLEN (body, 0);
10056 #ifdef ASM_OUTPUT_ADDR_VEC_START
10057 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10058 #endif
10060 #ifdef ASM_OUTPUT_CASE_LABEL
10061 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10062 NEXT_INSN (lab));
10063 #else
10064 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10065 #endif
10067 for (idx = 0; idx < vlen; idx++)
10069 ASM_OUTPUT_ADDR_VEC_ELT
10070 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10073 #ifdef ASM_OUTPUT_ADDR_VEC_END
10074 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10075 #endif
10078 static void
10079 sparc_output_addr_diff_vec (rtx vec)
10081 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10082 rtx base = XEXP (XEXP (body, 0), 0);
10083 int idx, vlen = XVECLEN (body, 1);
10085 #ifdef ASM_OUTPUT_ADDR_VEC_START
10086 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10087 #endif
10089 #ifdef ASM_OUTPUT_CASE_LABEL
10090 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10091 NEXT_INSN (lab));
10092 #else
10093 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10094 #endif
10096 for (idx = 0; idx < vlen; idx++)
10098 ASM_OUTPUT_ADDR_DIFF_ELT
10099 (asm_out_file,
10100 body,
10101 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10102 CODE_LABEL_NUMBER (base));
10105 #ifdef ASM_OUTPUT_ADDR_VEC_END
10106 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10107 #endif
10110 static void
10111 sparc_output_deferred_case_vectors (void)
10113 rtx t;
10114 int align;
10116 if (sparc_addr_list == NULL_RTX
10117 && sparc_addr_diff_list == NULL_RTX)
10118 return;
10120 /* Align to cache line in the function's code section. */
10121 switch_to_section (current_function_section ());
10123 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10124 if (align > 0)
10125 ASM_OUTPUT_ALIGN (asm_out_file, align);
10127 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10128 sparc_output_addr_vec (XEXP (t, 0));
10129 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10130 sparc_output_addr_diff_vec (XEXP (t, 0));
10132 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10135 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10136 unknown. Return 1 if the high bits are zero, -1 if the register is
10137 sign extended. */
10139 sparc_check_64 (rtx x, rtx_insn *insn)
10141 /* If a register is set only once it is safe to ignore insns this
10142 code does not know how to handle. The loop will either recognize
10143 the single set and return the correct value or fail to recognize
10144 it and return 0. */
10145 int set_once = 0;
10146 rtx y = x;
10148 gcc_assert (GET_CODE (x) == REG);
10150 if (GET_MODE (x) == DImode)
10151 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10153 if (flag_expensive_optimizations
10154 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10155 set_once = 1;
10157 if (insn == 0)
10159 if (set_once)
10160 insn = get_last_insn_anywhere ();
10161 else
10162 return 0;
10165 while ((insn = PREV_INSN (insn)))
10167 switch (GET_CODE (insn))
10169 case JUMP_INSN:
10170 case NOTE:
10171 break;
10172 case CODE_LABEL:
10173 case CALL_INSN:
10174 default:
10175 if (! set_once)
10176 return 0;
10177 break;
10178 case INSN:
10180 rtx pat = PATTERN (insn);
10181 if (GET_CODE (pat) != SET)
10182 return 0;
10183 if (rtx_equal_p (x, SET_DEST (pat)))
10184 return set_extends (insn);
10185 if (y && rtx_equal_p (y, SET_DEST (pat)))
10186 return set_extends (insn);
10187 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10188 return 0;
10192 return 0;
10195 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10196 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10198 const char *
10199 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10201 static char asm_code[60];
10203 /* The scratch register is only required when the destination
10204 register is not a 64-bit global or out register. */
10205 if (which_alternative != 2)
10206 operands[3] = operands[0];
10208 /* We can only shift by constants <= 63. */
10209 if (GET_CODE (operands[2]) == CONST_INT)
10210 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10212 if (GET_CODE (operands[1]) == CONST_INT)
10214 output_asm_insn ("mov\t%1, %3", operands);
10216 else
10218 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10219 if (sparc_check_64 (operands[1], insn) <= 0)
10220 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10221 output_asm_insn ("or\t%L1, %3, %3", operands);
10224 strcpy (asm_code, opcode);
10226 if (which_alternative != 2)
10227 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10228 else
10229 return
10230 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10233 /* Output rtl to increment the profiler label LABELNO
10234 for profiling a function entry. */
10236 void
10237 sparc_profile_hook (int labelno)
10239 char buf[32];
10240 rtx lab, fun;
10242 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10243 if (NO_PROFILE_COUNTERS)
10245 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10247 else
10249 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10250 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10251 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10255 #ifdef TARGET_SOLARIS
10256 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10258 static void
10259 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10260 tree decl ATTRIBUTE_UNUSED)
10262 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10264 solaris_elf_asm_comdat_section (name, flags, decl);
10265 return;
10268 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10270 if (!(flags & SECTION_DEBUG))
10271 fputs (",#alloc", asm_out_file);
10272 if (flags & SECTION_WRITE)
10273 fputs (",#write", asm_out_file);
10274 if (flags & SECTION_TLS)
10275 fputs (",#tls", asm_out_file);
10276 if (flags & SECTION_CODE)
10277 fputs (",#execinstr", asm_out_file);
10279 if (flags & SECTION_NOTYPE)
10281 else if (flags & SECTION_BSS)
10282 fputs (",#nobits", asm_out_file);
10283 else
10284 fputs (",#progbits", asm_out_file);
10286 fputc ('\n', asm_out_file);
10288 #endif /* TARGET_SOLARIS */
10290 /* We do not allow indirect calls to be optimized into sibling calls.
10292 We cannot use sibling calls when delayed branches are disabled
10293 because they will likely require the call delay slot to be filled.
10295 Also, on SPARC 32-bit we cannot emit a sibling call when the
10296 current function returns a structure. This is because the "unimp
10297 after call" convention would cause the callee to return to the
10298 wrong place. The generic code already disallows cases where the
10299 function being called returns a structure.
10301 It may seem strange how this last case could occur. Usually there
10302 is code after the call which jumps to epilogue code which dumps the
10303 return value into the struct return area. That ought to invalidate
10304 the sibling call right? Well, in the C++ case we can end up passing
10305 the pointer to the struct return area to a constructor (which returns
10306 void) and then nothing else happens. Such a sibling call would look
10307 valid without the added check here.
10309 VxWorks PIC PLT entries require the global pointer to be initialized
10310 on entry. We therefore can't emit sibling calls to them. */
10311 static bool
10312 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10314 return (decl
10315 && flag_delayed_branch
10316 && (TARGET_ARCH64 || ! cfun->returns_struct)
10317 && !(TARGET_VXWORKS_RTP
10318 && flag_pic
10319 && !targetm.binds_local_p (decl)));
10322 /* libfunc renaming. */
10324 static void
10325 sparc_init_libfuncs (void)
10327 if (TARGET_ARCH32)
10329 /* Use the subroutines that Sun's library provides for integer
10330 multiply and divide. The `*' prevents an underscore from
10331 being prepended by the compiler. .umul is a little faster
10332 than .mul. */
10333 set_optab_libfunc (smul_optab, SImode, "*.umul");
10334 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10335 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10336 set_optab_libfunc (smod_optab, SImode, "*.rem");
10337 set_optab_libfunc (umod_optab, SImode, "*.urem");
10339 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10340 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10341 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10342 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10343 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10344 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10346 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10347 is because with soft-float, the SFmode and DFmode sqrt
10348 instructions will be absent, and the compiler will notice and
10349 try to use the TFmode sqrt instruction for calls to the
10350 builtin function sqrt, but this fails. */
10351 if (TARGET_FPU)
10352 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10354 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10355 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10356 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10357 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10358 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10359 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10361 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10362 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10363 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10364 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10366 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10367 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10368 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10369 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10371 if (DITF_CONVERSION_LIBFUNCS)
10373 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10374 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10375 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10376 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10379 if (SUN_CONVERSION_LIBFUNCS)
10381 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10382 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10383 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10384 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10387 if (TARGET_ARCH64)
10389 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10390 do not exist in the library. Make sure the compiler does not
10391 emit calls to them by accident. (It should always use the
10392 hardware instructions.) */
10393 set_optab_libfunc (smul_optab, SImode, 0);
10394 set_optab_libfunc (sdiv_optab, SImode, 0);
10395 set_optab_libfunc (udiv_optab, SImode, 0);
10396 set_optab_libfunc (smod_optab, SImode, 0);
10397 set_optab_libfunc (umod_optab, SImode, 0);
10399 if (SUN_INTEGER_MULTIPLY_64)
10401 set_optab_libfunc (smul_optab, DImode, "__mul64");
10402 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10403 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10404 set_optab_libfunc (smod_optab, DImode, "__rem64");
10405 set_optab_libfunc (umod_optab, DImode, "__urem64");
10408 if (SUN_CONVERSION_LIBFUNCS)
10410 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10411 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10412 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10413 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10418 /* SPARC builtins. */
10419 enum sparc_builtins
10421 /* FPU builtins. */
10422 SPARC_BUILTIN_LDFSR,
10423 SPARC_BUILTIN_STFSR,
10425 /* VIS 1.0 builtins. */
10426 SPARC_BUILTIN_FPACK16,
10427 SPARC_BUILTIN_FPACK32,
10428 SPARC_BUILTIN_FPACKFIX,
10429 SPARC_BUILTIN_FEXPAND,
10430 SPARC_BUILTIN_FPMERGE,
10431 SPARC_BUILTIN_FMUL8X16,
10432 SPARC_BUILTIN_FMUL8X16AU,
10433 SPARC_BUILTIN_FMUL8X16AL,
10434 SPARC_BUILTIN_FMUL8SUX16,
10435 SPARC_BUILTIN_FMUL8ULX16,
10436 SPARC_BUILTIN_FMULD8SUX16,
10437 SPARC_BUILTIN_FMULD8ULX16,
10438 SPARC_BUILTIN_FALIGNDATAV4HI,
10439 SPARC_BUILTIN_FALIGNDATAV8QI,
10440 SPARC_BUILTIN_FALIGNDATAV2SI,
10441 SPARC_BUILTIN_FALIGNDATADI,
10442 SPARC_BUILTIN_WRGSR,
10443 SPARC_BUILTIN_RDGSR,
10444 SPARC_BUILTIN_ALIGNADDR,
10445 SPARC_BUILTIN_ALIGNADDRL,
10446 SPARC_BUILTIN_PDIST,
10447 SPARC_BUILTIN_EDGE8,
10448 SPARC_BUILTIN_EDGE8L,
10449 SPARC_BUILTIN_EDGE16,
10450 SPARC_BUILTIN_EDGE16L,
10451 SPARC_BUILTIN_EDGE32,
10452 SPARC_BUILTIN_EDGE32L,
10453 SPARC_BUILTIN_FCMPLE16,
10454 SPARC_BUILTIN_FCMPLE32,
10455 SPARC_BUILTIN_FCMPNE16,
10456 SPARC_BUILTIN_FCMPNE32,
10457 SPARC_BUILTIN_FCMPGT16,
10458 SPARC_BUILTIN_FCMPGT32,
10459 SPARC_BUILTIN_FCMPEQ16,
10460 SPARC_BUILTIN_FCMPEQ32,
10461 SPARC_BUILTIN_FPADD16,
10462 SPARC_BUILTIN_FPADD16S,
10463 SPARC_BUILTIN_FPADD32,
10464 SPARC_BUILTIN_FPADD32S,
10465 SPARC_BUILTIN_FPSUB16,
10466 SPARC_BUILTIN_FPSUB16S,
10467 SPARC_BUILTIN_FPSUB32,
10468 SPARC_BUILTIN_FPSUB32S,
10469 SPARC_BUILTIN_ARRAY8,
10470 SPARC_BUILTIN_ARRAY16,
10471 SPARC_BUILTIN_ARRAY32,
10473 /* VIS 2.0 builtins. */
10474 SPARC_BUILTIN_EDGE8N,
10475 SPARC_BUILTIN_EDGE8LN,
10476 SPARC_BUILTIN_EDGE16N,
10477 SPARC_BUILTIN_EDGE16LN,
10478 SPARC_BUILTIN_EDGE32N,
10479 SPARC_BUILTIN_EDGE32LN,
10480 SPARC_BUILTIN_BMASK,
10481 SPARC_BUILTIN_BSHUFFLEV4HI,
10482 SPARC_BUILTIN_BSHUFFLEV8QI,
10483 SPARC_BUILTIN_BSHUFFLEV2SI,
10484 SPARC_BUILTIN_BSHUFFLEDI,
10486 /* VIS 3.0 builtins. */
10487 SPARC_BUILTIN_CMASK8,
10488 SPARC_BUILTIN_CMASK16,
10489 SPARC_BUILTIN_CMASK32,
10490 SPARC_BUILTIN_FCHKSM16,
10491 SPARC_BUILTIN_FSLL16,
10492 SPARC_BUILTIN_FSLAS16,
10493 SPARC_BUILTIN_FSRL16,
10494 SPARC_BUILTIN_FSRA16,
10495 SPARC_BUILTIN_FSLL32,
10496 SPARC_BUILTIN_FSLAS32,
10497 SPARC_BUILTIN_FSRL32,
10498 SPARC_BUILTIN_FSRA32,
10499 SPARC_BUILTIN_PDISTN,
10500 SPARC_BUILTIN_FMEAN16,
10501 SPARC_BUILTIN_FPADD64,
10502 SPARC_BUILTIN_FPSUB64,
10503 SPARC_BUILTIN_FPADDS16,
10504 SPARC_BUILTIN_FPADDS16S,
10505 SPARC_BUILTIN_FPSUBS16,
10506 SPARC_BUILTIN_FPSUBS16S,
10507 SPARC_BUILTIN_FPADDS32,
10508 SPARC_BUILTIN_FPADDS32S,
10509 SPARC_BUILTIN_FPSUBS32,
10510 SPARC_BUILTIN_FPSUBS32S,
10511 SPARC_BUILTIN_FUCMPLE8,
10512 SPARC_BUILTIN_FUCMPNE8,
10513 SPARC_BUILTIN_FUCMPGT8,
10514 SPARC_BUILTIN_FUCMPEQ8,
10515 SPARC_BUILTIN_FHADDS,
10516 SPARC_BUILTIN_FHADDD,
10517 SPARC_BUILTIN_FHSUBS,
10518 SPARC_BUILTIN_FHSUBD,
10519 SPARC_BUILTIN_FNHADDS,
10520 SPARC_BUILTIN_FNHADDD,
10521 SPARC_BUILTIN_UMULXHI,
10522 SPARC_BUILTIN_XMULX,
10523 SPARC_BUILTIN_XMULXHI,
10525 /* VIS 4.0 builtins. */
10526 SPARC_BUILTIN_FPADD8,
10527 SPARC_BUILTIN_FPADDS8,
10528 SPARC_BUILTIN_FPADDUS8,
10529 SPARC_BUILTIN_FPADDUS16,
10530 SPARC_BUILTIN_FPCMPLE8,
10531 SPARC_BUILTIN_FPCMPGT8,
10532 SPARC_BUILTIN_FPCMPULE16,
10533 SPARC_BUILTIN_FPCMPUGT16,
10534 SPARC_BUILTIN_FPCMPULE32,
10535 SPARC_BUILTIN_FPCMPUGT32,
10536 SPARC_BUILTIN_FPMAX8,
10537 SPARC_BUILTIN_FPMAX16,
10538 SPARC_BUILTIN_FPMAX32,
10539 SPARC_BUILTIN_FPMAXU8,
10540 SPARC_BUILTIN_FPMAXU16,
10541 SPARC_BUILTIN_FPMAXU32,
10542 SPARC_BUILTIN_FPMIN8,
10543 SPARC_BUILTIN_FPMIN16,
10544 SPARC_BUILTIN_FPMIN32,
10545 SPARC_BUILTIN_FPMINU8,
10546 SPARC_BUILTIN_FPMINU16,
10547 SPARC_BUILTIN_FPMINU32,
10548 SPARC_BUILTIN_FPSUB8,
10549 SPARC_BUILTIN_FPSUBS8,
10550 SPARC_BUILTIN_FPSUBUS8,
10551 SPARC_BUILTIN_FPSUBUS16,
10553 /* VIS 4.0B builtins. */
10555 /* Note that all the DICTUNPACK* entries should be kept
10556 contiguous. */
10557 SPARC_BUILTIN_FIRST_DICTUNPACK,
10558 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10559 SPARC_BUILTIN_DICTUNPACK16,
10560 SPARC_BUILTIN_DICTUNPACK32,
10561 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10563 /* Note that all the FPCMP*SHL entries should be kept
10564 contiguous. */
10565 SPARC_BUILTIN_FIRST_FPCMPSHL,
10566 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10567 SPARC_BUILTIN_FPCMPGT8SHL,
10568 SPARC_BUILTIN_FPCMPEQ8SHL,
10569 SPARC_BUILTIN_FPCMPNE8SHL,
10570 SPARC_BUILTIN_FPCMPLE16SHL,
10571 SPARC_BUILTIN_FPCMPGT16SHL,
10572 SPARC_BUILTIN_FPCMPEQ16SHL,
10573 SPARC_BUILTIN_FPCMPNE16SHL,
10574 SPARC_BUILTIN_FPCMPLE32SHL,
10575 SPARC_BUILTIN_FPCMPGT32SHL,
10576 SPARC_BUILTIN_FPCMPEQ32SHL,
10577 SPARC_BUILTIN_FPCMPNE32SHL,
10578 SPARC_BUILTIN_FPCMPULE8SHL,
10579 SPARC_BUILTIN_FPCMPUGT8SHL,
10580 SPARC_BUILTIN_FPCMPULE16SHL,
10581 SPARC_BUILTIN_FPCMPUGT16SHL,
10582 SPARC_BUILTIN_FPCMPULE32SHL,
10583 SPARC_BUILTIN_FPCMPUGT32SHL,
10584 SPARC_BUILTIN_FPCMPDE8SHL,
10585 SPARC_BUILTIN_FPCMPDE16SHL,
10586 SPARC_BUILTIN_FPCMPDE32SHL,
10587 SPARC_BUILTIN_FPCMPUR8SHL,
10588 SPARC_BUILTIN_FPCMPUR16SHL,
10589 SPARC_BUILTIN_FPCMPUR32SHL,
10590 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
10592 SPARC_BUILTIN_MAX
10595 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10596 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10598 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
10599 The instruction should require a constant operand of some sort. The
10600 function prints an error if OPVAL is not valid. */
10602 static int
10603 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
10605 if (GET_CODE (opval) != CONST_INT)
10607 error ("%qs expects a constant argument", insn_data[icode].name);
10608 return false;
10611 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
10613 error ("constant argument out of range for %qs", insn_data[icode].name);
10614 return false;
10616 return true;
10619 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10620 function decl or NULL_TREE if the builtin was not added. */
10622 static tree
10623 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10624 tree type)
10626 tree t
10627 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10629 if (t)
10631 sparc_builtins[code] = t;
10632 sparc_builtins_icode[code] = icode;
10635 return t;
10638 /* Likewise, but also marks the function as "const". */
10640 static tree
10641 def_builtin_const (const char *name, enum insn_code icode,
10642 enum sparc_builtins code, tree type)
10644 tree t = def_builtin (name, icode, code, type);
10646 if (t)
10647 TREE_READONLY (t) = 1;
10649 return t;
10652 /* Implement the TARGET_INIT_BUILTINS target hook.
10653 Create builtin functions for special SPARC instructions. */
10655 static void
10656 sparc_init_builtins (void)
10658 if (TARGET_FPU)
10659 sparc_fpu_init_builtins ();
10661 if (TARGET_VIS)
10662 sparc_vis_init_builtins ();
10665 /* Create builtin functions for FPU instructions. */
10667 static void
10668 sparc_fpu_init_builtins (void)
10670 tree ftype
10671 = build_function_type_list (void_type_node,
10672 build_pointer_type (unsigned_type_node), 0);
10673 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10674 SPARC_BUILTIN_LDFSR, ftype);
10675 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10676 SPARC_BUILTIN_STFSR, ftype);
10679 /* Create builtin functions for VIS instructions. */
10681 static void
10682 sparc_vis_init_builtins (void)
10684 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10685 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10686 tree v4hi = build_vector_type (intHI_type_node, 4);
10687 tree v2hi = build_vector_type (intHI_type_node, 2);
10688 tree v2si = build_vector_type (intSI_type_node, 2);
10689 tree v1si = build_vector_type (intSI_type_node, 1);
10691 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10692 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10693 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10694 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10695 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10696 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10697 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10698 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10699 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10700 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10701 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10702 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10703 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10704 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10705 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10706 v8qi, v8qi,
10707 intDI_type_node, 0);
10708 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10709 v8qi, v8qi, 0);
10710 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10711 v8qi, v8qi, 0);
10712 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
10713 intSI_type_node, 0);
10714 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
10715 intSI_type_node, 0);
10716 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
10717 intDI_type_node, 0);
10718 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10719 intDI_type_node,
10720 intDI_type_node, 0);
10721 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10722 intSI_type_node,
10723 intSI_type_node, 0);
10724 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10725 ptr_type_node,
10726 intSI_type_node, 0);
10727 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10728 ptr_type_node,
10729 intDI_type_node, 0);
10730 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10731 ptr_type_node,
10732 ptr_type_node, 0);
10733 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10734 ptr_type_node,
10735 ptr_type_node, 0);
10736 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10737 v4hi, v4hi, 0);
10738 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10739 v2si, v2si, 0);
10740 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10741 v4hi, v4hi, 0);
10742 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10743 v2si, v2si, 0);
10744 tree void_ftype_di = build_function_type_list (void_type_node,
10745 intDI_type_node, 0);
10746 tree di_ftype_void = build_function_type_list (intDI_type_node,
10747 void_type_node, 0);
10748 tree void_ftype_si = build_function_type_list (void_type_node,
10749 intSI_type_node, 0);
10750 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10751 float_type_node,
10752 float_type_node, 0);
10753 tree df_ftype_df_df = build_function_type_list (double_type_node,
10754 double_type_node,
10755 double_type_node, 0);
10757 /* Packing and expanding vectors. */
10758 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10759 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10760 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10761 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10762 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10763 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10764 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10765 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10766 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10767 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10769 /* Multiplications. */
10770 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10771 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10772 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10773 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10774 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10775 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10776 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10777 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10778 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10779 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10780 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10781 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10782 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10783 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10785 /* Data aligning. */
10786 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10787 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10788 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10789 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10790 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10791 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10792 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10793 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10795 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10796 SPARC_BUILTIN_WRGSR, void_ftype_di);
10797 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10798 SPARC_BUILTIN_RDGSR, di_ftype_void);
10800 if (TARGET_ARCH64)
10802 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10803 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10804 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10805 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10807 else
10809 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10810 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10811 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10812 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10815 /* Pixel distance. */
10816 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10817 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10819 /* Edge handling. */
10820 if (TARGET_ARCH64)
10822 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10823 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10824 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10825 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10826 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10827 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10828 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10829 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10830 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10831 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10832 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10833 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10835 else
10837 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10838 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10839 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10840 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10841 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10842 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10843 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10844 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10845 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10846 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10847 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10848 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10851 /* Pixel compare. */
10852 if (TARGET_ARCH64)
10854 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10855 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10856 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10857 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10858 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10859 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10860 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10861 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10862 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10863 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10864 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10865 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10866 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10867 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10868 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10869 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10871 else
10873 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10874 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10875 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10876 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10877 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10878 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10879 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10880 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10881 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10882 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10883 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10884 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10885 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10886 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10887 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10888 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10891 /* Addition and subtraction. */
10892 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10893 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10894 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10895 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10896 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10897 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10898 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10899 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10900 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10901 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10902 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10903 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10904 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10905 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10906 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10907 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10909 /* Three-dimensional array addressing. */
10910 if (TARGET_ARCH64)
10912 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10913 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10914 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10915 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10916 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10917 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10919 else
10921 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10922 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10923 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10924 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10925 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10926 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10929 if (TARGET_VIS2)
10931 /* Edge handling. */
10932 if (TARGET_ARCH64)
10934 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10935 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10936 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10937 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10938 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10939 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10940 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10941 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10942 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10943 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10944 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10945 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10947 else
10949 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10950 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10951 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10952 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10953 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10954 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10955 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10956 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10957 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10958 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10959 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10960 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10963 /* Byte mask and shuffle. */
10964 if (TARGET_ARCH64)
10965 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10966 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10967 else
10968 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10969 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10970 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10971 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10972 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10973 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10974 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10975 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10976 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10977 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10980 if (TARGET_VIS3)
10982 if (TARGET_ARCH64)
10984 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10985 SPARC_BUILTIN_CMASK8, void_ftype_di);
10986 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10987 SPARC_BUILTIN_CMASK16, void_ftype_di);
10988 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10989 SPARC_BUILTIN_CMASK32, void_ftype_di);
10991 else
10993 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10994 SPARC_BUILTIN_CMASK8, void_ftype_si);
10995 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10996 SPARC_BUILTIN_CMASK16, void_ftype_si);
10997 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10998 SPARC_BUILTIN_CMASK32, void_ftype_si);
11001 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11002 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11004 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11005 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11006 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11007 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11008 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11009 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11010 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11011 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11012 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11013 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11014 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11015 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11016 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11017 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11018 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11019 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11021 if (TARGET_ARCH64)
11022 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11023 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11024 else
11025 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11026 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11028 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11029 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11030 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11031 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11032 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11033 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11035 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11036 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11037 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11038 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11039 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11040 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11041 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11042 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11043 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11044 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11045 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11046 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11047 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11048 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11049 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11050 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11052 if (TARGET_ARCH64)
11054 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11055 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11056 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11057 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11058 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11059 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11060 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11061 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11063 else
11065 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11066 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11067 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11068 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11069 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11070 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11071 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11072 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11075 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11076 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11077 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11078 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11079 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11080 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11081 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11082 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11083 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11084 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11085 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11086 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11088 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11089 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11090 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11091 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11092 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11093 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11096 if (TARGET_VIS4)
11098 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11099 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11100 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11101 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11102 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11103 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11104 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11105 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11108 if (TARGET_ARCH64)
11110 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11111 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11112 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11113 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11114 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11115 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11116 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11117 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11118 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11119 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11120 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11121 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11123 else
11125 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11126 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11127 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11128 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11129 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11130 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11131 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11132 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11133 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11134 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11135 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11136 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11139 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11140 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11141 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11142 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11143 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11144 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11145 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11146 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11147 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11148 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11149 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11150 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11151 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11152 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11153 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11154 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11155 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11156 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11157 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11158 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11159 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11160 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11161 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11162 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11163 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11164 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11165 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11166 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11167 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11168 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11169 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11170 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11173 if (TARGET_VIS4B)
11175 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11176 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11177 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11178 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11179 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11180 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11182 if (TARGET_ARCH64)
11184 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11185 v8qi, v8qi,
11186 intSI_type_node, 0);
11187 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11188 v4hi, v4hi,
11189 intSI_type_node, 0);
11190 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11191 v2si, v2si,
11192 intSI_type_node, 0);
11194 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11195 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11196 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11197 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11198 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11199 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11200 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11201 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11203 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11204 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11205 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11206 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11207 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11208 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11209 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11210 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11212 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11213 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11214 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11215 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11216 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11217 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11218 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11219 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11222 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11223 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11224 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11225 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11227 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11228 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11229 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11230 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11232 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11233 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11234 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11235 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11237 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11238 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11239 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11240 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11241 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11242 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11244 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11245 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11246 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11247 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11248 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11249 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11252 else
11254 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11255 v8qi, v8qi,
11256 intSI_type_node, 0);
11257 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11258 v4hi, v4hi,
11259 intSI_type_node, 0);
11260 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11261 v2si, v2si,
11262 intSI_type_node, 0);
11264 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11265 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11266 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11267 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11268 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11269 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11270 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11271 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11273 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11274 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11275 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11276 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11277 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11278 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11279 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11280 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11282 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11283 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11284 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11285 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11286 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11287 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11288 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11289 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11292 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11293 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11294 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11295 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11297 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11298 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11299 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11300 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11302 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11303 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11304 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11305 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11307 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11308 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11309 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11310 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11311 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11312 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11314 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11315 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11316 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11317 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11318 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11319 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11324 /* Implement TARGET_BUILTIN_DECL hook. */
11326 static tree
11327 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11329 if (code >= SPARC_BUILTIN_MAX)
11330 return error_mark_node;
11332 return sparc_builtins[code];
11335 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11337 static rtx
11338 sparc_expand_builtin (tree exp, rtx target,
11339 rtx subtarget ATTRIBUTE_UNUSED,
11340 machine_mode tmode ATTRIBUTE_UNUSED,
11341 int ignore ATTRIBUTE_UNUSED)
11343 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11344 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11345 enum insn_code icode = sparc_builtins_icode[code];
11346 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11347 call_expr_arg_iterator iter;
11348 int arg_count = 0;
11349 rtx pat, op[4];
11350 tree arg;
11352 if (nonvoid)
11354 machine_mode tmode = insn_data[icode].operand[0].mode;
11355 if (!target
11356 || GET_MODE (target) != tmode
11357 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11358 op[0] = gen_reg_rtx (tmode);
11359 else
11360 op[0] = target;
11363 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11365 const struct insn_operand_data *insn_op;
11366 int idx;
11368 if (arg == error_mark_node)
11369 return NULL_RTX;
11371 arg_count++;
11372 idx = arg_count - !nonvoid;
11373 insn_op = &insn_data[icode].operand[idx];
11374 op[arg_count] = expand_normal (arg);
11376 /* Some of the builtins require constant arguments. We check
11377 for this here. */
11378 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11379 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11380 && arg_count == 3)
11381 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11382 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11383 && arg_count == 2))
11385 if (!check_constant_argument (icode, idx, op[arg_count]))
11386 return const0_rtx;
11389 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11391 if (!address_operand (op[arg_count], SImode))
11393 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11394 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11396 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11399 else if (insn_op->mode == V1DImode
11400 && GET_MODE (op[arg_count]) == DImode)
11401 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11403 else if (insn_op->mode == V1SImode
11404 && GET_MODE (op[arg_count]) == SImode)
11405 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11407 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11408 insn_op->mode))
11409 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11412 switch (arg_count)
11414 case 0:
11415 pat = GEN_FCN (icode) (op[0]);
11416 break;
11417 case 1:
11418 if (nonvoid)
11419 pat = GEN_FCN (icode) (op[0], op[1]);
11420 else
11421 pat = GEN_FCN (icode) (op[1]);
11422 break;
11423 case 2:
11424 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11425 break;
11426 case 3:
11427 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11428 break;
11429 default:
11430 gcc_unreachable ();
11433 if (!pat)
11434 return NULL_RTX;
11436 emit_insn (pat);
11438 return (nonvoid ? op[0] : const0_rtx);
11441 /* Return the upper 16 bits of the 8x16 multiplication. */
11443 static int
11444 sparc_vis_mul8x16 (int e8, int e16)
11446 return (e8 * e16 + 128) / 256;
11449 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11450 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11452 static void
11453 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11454 tree inner_type, tree cst0, tree cst1)
11456 unsigned i, num = VECTOR_CST_NELTS (cst0);
11457 int scale;
11459 switch (fncode)
11461 case SPARC_BUILTIN_FMUL8X16:
11462 for (i = 0; i < num; ++i)
11464 int val
11465 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11466 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11467 n_elts->quick_push (build_int_cst (inner_type, val));
11469 break;
11471 case SPARC_BUILTIN_FMUL8X16AU:
11472 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11474 for (i = 0; i < num; ++i)
11476 int val
11477 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11478 scale);
11479 n_elts->quick_push (build_int_cst (inner_type, val));
11481 break;
11483 case SPARC_BUILTIN_FMUL8X16AL:
11484 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11486 for (i = 0; i < num; ++i)
11488 int val
11489 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11490 scale);
11491 n_elts->quick_push (build_int_cst (inner_type, val));
11493 break;
11495 default:
11496 gcc_unreachable ();
11500 /* Implement TARGET_FOLD_BUILTIN hook.
11502 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11503 result of the function call is ignored. NULL_TREE is returned if the
11504 function could not be folded. */
11506 static tree
11507 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11508 tree *args, bool ignore)
11510 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11511 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11512 tree arg0, arg1, arg2;
11514 if (ignore)
11515 switch (code)
11517 case SPARC_BUILTIN_LDFSR:
11518 case SPARC_BUILTIN_STFSR:
11519 case SPARC_BUILTIN_ALIGNADDR:
11520 case SPARC_BUILTIN_WRGSR:
11521 case SPARC_BUILTIN_BMASK:
11522 case SPARC_BUILTIN_CMASK8:
11523 case SPARC_BUILTIN_CMASK16:
11524 case SPARC_BUILTIN_CMASK32:
11525 break;
11527 default:
11528 return build_zero_cst (rtype);
11531 switch (code)
11533 case SPARC_BUILTIN_FEXPAND:
11534 arg0 = args[0];
11535 STRIP_NOPS (arg0);
11537 if (TREE_CODE (arg0) == VECTOR_CST)
11539 tree inner_type = TREE_TYPE (rtype);
11540 unsigned i;
11542 auto_vec<tree, 32> n_elts (VECTOR_CST_NELTS (arg0));
11543 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11545 unsigned HOST_WIDE_INT val
11546 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11547 n_elts.quick_push (build_int_cst (inner_type, val << 4));
11549 return build_vector (rtype, n_elts);
11551 break;
11553 case SPARC_BUILTIN_FMUL8X16:
11554 case SPARC_BUILTIN_FMUL8X16AU:
11555 case SPARC_BUILTIN_FMUL8X16AL:
11556 arg0 = args[0];
11557 arg1 = args[1];
11558 STRIP_NOPS (arg0);
11559 STRIP_NOPS (arg1);
11561 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11563 tree inner_type = TREE_TYPE (rtype);
11564 auto_vec<tree, 32> n_elts (VECTOR_CST_NELTS (arg0));
11565 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
11566 return build_vector (rtype, n_elts);
11568 break;
11570 case SPARC_BUILTIN_FPMERGE:
11571 arg0 = args[0];
11572 arg1 = args[1];
11573 STRIP_NOPS (arg0);
11574 STRIP_NOPS (arg1);
11576 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11578 auto_vec<tree, 32> n_elts (2 * VECTOR_CST_NELTS (arg0));
11579 unsigned i;
11580 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11582 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
11583 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
11586 return build_vector (rtype, n_elts);
11588 break;
11590 case SPARC_BUILTIN_PDIST:
11591 case SPARC_BUILTIN_PDISTN:
11592 arg0 = args[0];
11593 arg1 = args[1];
11594 STRIP_NOPS (arg0);
11595 STRIP_NOPS (arg1);
11596 if (code == SPARC_BUILTIN_PDIST)
11598 arg2 = args[2];
11599 STRIP_NOPS (arg2);
11601 else
11602 arg2 = integer_zero_node;
11604 if (TREE_CODE (arg0) == VECTOR_CST
11605 && TREE_CODE (arg1) == VECTOR_CST
11606 && TREE_CODE (arg2) == INTEGER_CST)
11608 bool overflow = false;
11609 widest_int result = wi::to_widest (arg2);
11610 widest_int tmp;
11611 unsigned i;
11613 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11615 tree e0 = VECTOR_CST_ELT (arg0, i);
11616 tree e1 = VECTOR_CST_ELT (arg1, i);
11618 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11620 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11621 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11622 if (wi::neg_p (tmp))
11623 tmp = wi::neg (tmp, &neg2_ovf);
11624 else
11625 neg2_ovf = false;
11626 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11627 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
11630 gcc_assert (!overflow);
11632 return wide_int_to_tree (rtype, result);
11635 default:
11636 break;
11639 return NULL_TREE;
11642 /* ??? This duplicates information provided to the compiler by the
11643 ??? scheduler description. Some day, teach genautomata to output
11644 ??? the latencies and then CSE will just use that. */
11646 static bool
11647 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11648 int opno ATTRIBUTE_UNUSED,
11649 int *total, bool speed ATTRIBUTE_UNUSED)
11651 int code = GET_CODE (x);
11652 bool float_mode_p = FLOAT_MODE_P (mode);
11654 switch (code)
11656 case CONST_INT:
11657 if (SMALL_INT (x))
11658 *total = 0;
11659 else
11660 *total = 2;
11661 return true;
11663 case CONST_WIDE_INT:
11664 *total = 0;
11665 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11666 *total += 2;
11667 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11668 *total += 2;
11669 return true;
11671 case HIGH:
11672 *total = 2;
11673 return true;
11675 case CONST:
11676 case LABEL_REF:
11677 case SYMBOL_REF:
11678 *total = 4;
11679 return true;
11681 case CONST_DOUBLE:
11682 *total = 8;
11683 return true;
11685 case MEM:
11686 /* If outer-code was a sign or zero extension, a cost
11687 of COSTS_N_INSNS (1) was already added in. This is
11688 why we are subtracting it back out. */
11689 if (outer_code == ZERO_EXTEND)
11691 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11693 else if (outer_code == SIGN_EXTEND)
11695 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11697 else if (float_mode_p)
11699 *total = sparc_costs->float_load;
11701 else
11703 *total = sparc_costs->int_load;
11706 return true;
11708 case PLUS:
11709 case MINUS:
11710 if (float_mode_p)
11711 *total = sparc_costs->float_plusminus;
11712 else
11713 *total = COSTS_N_INSNS (1);
11714 return false;
11716 case FMA:
11718 rtx sub;
11720 gcc_assert (float_mode_p);
11721 *total = sparc_costs->float_mul;
11723 sub = XEXP (x, 0);
11724 if (GET_CODE (sub) == NEG)
11725 sub = XEXP (sub, 0);
11726 *total += rtx_cost (sub, mode, FMA, 0, speed);
11728 sub = XEXP (x, 2);
11729 if (GET_CODE (sub) == NEG)
11730 sub = XEXP (sub, 0);
11731 *total += rtx_cost (sub, mode, FMA, 2, speed);
11732 return true;
11735 case MULT:
11736 if (float_mode_p)
11737 *total = sparc_costs->float_mul;
11738 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11739 *total = COSTS_N_INSNS (25);
11740 else
11742 int bit_cost;
11744 bit_cost = 0;
11745 if (sparc_costs->int_mul_bit_factor)
11747 int nbits;
11749 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11751 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11752 for (nbits = 0; value != 0; value &= value - 1)
11753 nbits++;
11755 else
11756 nbits = 7;
11758 if (nbits < 3)
11759 nbits = 3;
11760 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11761 bit_cost = COSTS_N_INSNS (bit_cost);
11764 if (mode == DImode || !TARGET_HARD_MUL)
11765 *total = sparc_costs->int_mulX + bit_cost;
11766 else
11767 *total = sparc_costs->int_mul + bit_cost;
11769 return false;
11771 case ASHIFT:
11772 case ASHIFTRT:
11773 case LSHIFTRT:
11774 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11775 return false;
11777 case DIV:
11778 case UDIV:
11779 case MOD:
11780 case UMOD:
11781 if (float_mode_p)
11783 if (mode == DFmode)
11784 *total = sparc_costs->float_div_df;
11785 else
11786 *total = sparc_costs->float_div_sf;
11788 else
11790 if (mode == DImode)
11791 *total = sparc_costs->int_divX;
11792 else
11793 *total = sparc_costs->int_div;
11795 return false;
11797 case NEG:
11798 if (! float_mode_p)
11800 *total = COSTS_N_INSNS (1);
11801 return false;
11803 /* FALLTHRU */
11805 case ABS:
11806 case FLOAT:
11807 case UNSIGNED_FLOAT:
11808 case FIX:
11809 case UNSIGNED_FIX:
11810 case FLOAT_EXTEND:
11811 case FLOAT_TRUNCATE:
11812 *total = sparc_costs->float_move;
11813 return false;
11815 case SQRT:
11816 if (mode == DFmode)
11817 *total = sparc_costs->float_sqrt_df;
11818 else
11819 *total = sparc_costs->float_sqrt_sf;
11820 return false;
11822 case COMPARE:
11823 if (float_mode_p)
11824 *total = sparc_costs->float_cmp;
11825 else
11826 *total = COSTS_N_INSNS (1);
11827 return false;
11829 case IF_THEN_ELSE:
11830 if (float_mode_p)
11831 *total = sparc_costs->float_cmove;
11832 else
11833 *total = sparc_costs->int_cmove;
11834 return false;
11836 case IOR:
11837 /* Handle the NAND vector patterns. */
11838 if (sparc_vector_mode_supported_p (mode)
11839 && GET_CODE (XEXP (x, 0)) == NOT
11840 && GET_CODE (XEXP (x, 1)) == NOT)
11842 *total = COSTS_N_INSNS (1);
11843 return true;
11845 else
11846 return false;
11848 default:
11849 return false;
11853 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11855 static inline bool
11856 general_or_i64_p (reg_class_t rclass)
11858 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11861 /* Implement TARGET_REGISTER_MOVE_COST. */
11863 static int
11864 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11865 reg_class_t from, reg_class_t to)
11867 bool need_memory = false;
11869 /* This helps postreload CSE to eliminate redundant comparisons. */
11870 if (from == NO_REGS || to == NO_REGS)
11871 return 100;
11873 if (from == FPCC_REGS || to == FPCC_REGS)
11874 need_memory = true;
11875 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11876 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11878 if (TARGET_VIS3)
11880 int size = GET_MODE_SIZE (mode);
11881 if (size == 8 || size == 4)
11883 if (! TARGET_ARCH32 || size == 4)
11884 return 4;
11885 else
11886 return 6;
11889 need_memory = true;
11892 if (need_memory)
11894 if (sparc_cpu == PROCESSOR_ULTRASPARC
11895 || sparc_cpu == PROCESSOR_ULTRASPARC3
11896 || sparc_cpu == PROCESSOR_NIAGARA
11897 || sparc_cpu == PROCESSOR_NIAGARA2
11898 || sparc_cpu == PROCESSOR_NIAGARA3
11899 || sparc_cpu == PROCESSOR_NIAGARA4
11900 || sparc_cpu == PROCESSOR_NIAGARA7
11901 || sparc_cpu == PROCESSOR_M8)
11902 return 12;
11904 return 6;
11907 return 2;
11910 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11911 This is achieved by means of a manual dynamic stack space allocation in
11912 the current frame. We make the assumption that SEQ doesn't contain any
11913 function calls, with the possible exception of calls to the GOT helper. */
11915 static void
11916 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11918 /* We must preserve the lowest 16 words for the register save area. */
11919 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11920 /* We really need only 2 words of fresh stack space. */
11921 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11923 rtx slot
11924 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11925 SPARC_STACK_BIAS + offset));
11927 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11928 emit_insn (gen_rtx_SET (slot, reg));
11929 if (reg2)
11930 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
11931 reg2));
11932 emit_insn (seq);
11933 if (reg2)
11934 emit_insn (gen_rtx_SET (reg2,
11935 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11936 emit_insn (gen_rtx_SET (reg, slot));
11937 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11940 /* Output the assembler code for a thunk function. THUNK_DECL is the
11941 declaration for the thunk function itself, FUNCTION is the decl for
11942 the target function. DELTA is an immediate constant offset to be
11943 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11944 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11946 static void
11947 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11948 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11949 tree function)
11951 rtx this_rtx, funexp;
11952 rtx_insn *insn;
11953 unsigned int int_arg_first;
11955 reload_completed = 1;
11956 epilogue_completed = 1;
11958 emit_note (NOTE_INSN_PROLOGUE_END);
11960 if (TARGET_FLAT)
11962 sparc_leaf_function_p = 1;
11964 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11966 else if (flag_delayed_branch)
11968 /* We will emit a regular sibcall below, so we need to instruct
11969 output_sibcall that we are in a leaf function. */
11970 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11972 /* This will cause final.c to invoke leaf_renumber_regs so we
11973 must behave as if we were in a not-yet-leafified function. */
11974 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11976 else
11978 /* We will emit the sibcall manually below, so we will need to
11979 manually spill non-leaf registers. */
11980 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11982 /* We really are in a leaf function. */
11983 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11986 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11987 returns a structure, the structure return pointer is there instead. */
11988 if (TARGET_ARCH64
11989 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11990 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11991 else
11992 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11994 /* Add DELTA. When possible use a plain add, otherwise load it into
11995 a register first. */
11996 if (delta)
11998 rtx delta_rtx = GEN_INT (delta);
12000 if (! SPARC_SIMM13_P (delta))
12002 rtx scratch = gen_rtx_REG (Pmode, 1);
12003 emit_move_insn (scratch, delta_rtx);
12004 delta_rtx = scratch;
12007 /* THIS_RTX += DELTA. */
12008 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12011 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12012 if (vcall_offset)
12014 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12015 rtx scratch = gen_rtx_REG (Pmode, 1);
12017 gcc_assert (vcall_offset < 0);
12019 /* SCRATCH = *THIS_RTX. */
12020 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12022 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12023 may not have any available scratch register at this point. */
12024 if (SPARC_SIMM13_P (vcall_offset))
12026 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12027 else if (! fixed_regs[5]
12028 /* The below sequence is made up of at least 2 insns,
12029 while the default method may need only one. */
12030 && vcall_offset < -8192)
12032 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12033 emit_move_insn (scratch2, vcall_offset_rtx);
12034 vcall_offset_rtx = scratch2;
12036 else
12038 rtx increment = GEN_INT (-4096);
12040 /* VCALL_OFFSET is a negative number whose typical range can be
12041 estimated as -32768..0 in 32-bit mode. In almost all cases
12042 it is therefore cheaper to emit multiple add insns than
12043 spilling and loading the constant into a register (at least
12044 6 insns). */
12045 while (! SPARC_SIMM13_P (vcall_offset))
12047 emit_insn (gen_add2_insn (scratch, increment));
12048 vcall_offset += 4096;
12050 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12053 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12054 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12055 gen_rtx_PLUS (Pmode,
12056 scratch,
12057 vcall_offset_rtx)));
12059 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12060 emit_insn (gen_add2_insn (this_rtx, scratch));
12063 /* Generate a tail call to the target function. */
12064 if (! TREE_USED (function))
12066 assemble_external (function);
12067 TREE_USED (function) = 1;
12069 funexp = XEXP (DECL_RTL (function), 0);
12071 if (flag_delayed_branch)
12073 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12074 insn = emit_call_insn (gen_sibcall (funexp));
12075 SIBLING_CALL_P (insn) = 1;
12077 else
12079 /* The hoops we have to jump through in order to generate a sibcall
12080 without using delay slots... */
12081 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12083 if (flag_pic)
12085 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12086 start_sequence ();
12087 load_got_register (); /* clobbers %o7 */
12088 scratch = sparc_legitimize_pic_address (funexp, scratch);
12089 seq = get_insns ();
12090 end_sequence ();
12091 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12093 else if (TARGET_ARCH32)
12095 emit_insn (gen_rtx_SET (scratch,
12096 gen_rtx_HIGH (SImode, funexp)));
12097 emit_insn (gen_rtx_SET (scratch,
12098 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12100 else /* TARGET_ARCH64 */
12102 switch (sparc_cmodel)
12104 case CM_MEDLOW:
12105 case CM_MEDMID:
12106 /* The destination can serve as a temporary. */
12107 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12108 break;
12110 case CM_MEDANY:
12111 case CM_EMBMEDANY:
12112 /* The destination cannot serve as a temporary. */
12113 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12114 start_sequence ();
12115 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12116 seq = get_insns ();
12117 end_sequence ();
12118 emit_and_preserve (seq, spill_reg, 0);
12119 break;
12121 default:
12122 gcc_unreachable ();
12126 emit_jump_insn (gen_indirect_jump (scratch));
12129 emit_barrier ();
12131 /* Run just enough of rest_of_compilation to get the insns emitted.
12132 There's not really enough bulk here to make other passes such as
12133 instruction scheduling worth while. Note that use_thunk calls
12134 assemble_start_function and assemble_end_function. */
12135 insn = get_insns ();
12136 shorten_branches (insn);
12137 final_start_function (insn, file, 1);
12138 final (insn, file, 1);
12139 final_end_function ();
12141 reload_completed = 0;
12142 epilogue_completed = 0;
12145 /* Return true if sparc_output_mi_thunk would be able to output the
12146 assembler code for the thunk function specified by the arguments
12147 it is passed, and false otherwise. */
12148 static bool
12149 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12150 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12151 HOST_WIDE_INT vcall_offset,
12152 const_tree function ATTRIBUTE_UNUSED)
12154 /* Bound the loop used in the default method above. */
12155 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12158 /* How to allocate a 'struct machine_function'. */
12160 static struct machine_function *
12161 sparc_init_machine_status (void)
12163 return ggc_cleared_alloc<machine_function> ();
12166 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12167 We need to emit DTP-relative relocations. */
12169 static void
12170 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12172 switch (size)
12174 case 4:
12175 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12176 break;
12177 case 8:
12178 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12179 break;
12180 default:
12181 gcc_unreachable ();
12183 output_addr_const (file, x);
12184 fputs (")", file);
12187 /* Do whatever processing is required at the end of a file. */
12189 static void
12190 sparc_file_end (void)
12192 /* If we need to emit the special GOT helper function, do so now. */
12193 if (got_helper_rtx)
12195 const char *name = XSTR (got_helper_rtx, 0);
12196 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
12197 #ifdef DWARF2_UNWIND_INFO
12198 bool do_cfi;
12199 #endif
12201 if (USE_HIDDEN_LINKONCE)
12203 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12204 get_identifier (name),
12205 build_function_type_list (void_type_node,
12206 NULL_TREE));
12207 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12208 NULL_TREE, void_type_node);
12209 TREE_PUBLIC (decl) = 1;
12210 TREE_STATIC (decl) = 1;
12211 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12212 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12213 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12214 resolve_unique_section (decl, 0, flag_function_sections);
12215 allocate_struct_function (decl, true);
12216 cfun->is_thunk = 1;
12217 current_function_decl = decl;
12218 init_varasm_status ();
12219 assemble_start_function (decl, name);
12221 else
12223 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12224 switch_to_section (text_section);
12225 if (align > 0)
12226 ASM_OUTPUT_ALIGN (asm_out_file, align);
12227 ASM_OUTPUT_LABEL (asm_out_file, name);
12230 #ifdef DWARF2_UNWIND_INFO
12231 do_cfi = dwarf2out_do_cfi_asm ();
12232 if (do_cfi)
12233 fprintf (asm_out_file, "\t.cfi_startproc\n");
12234 #endif
12235 if (flag_delayed_branch)
12236 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
12237 reg_name, reg_name);
12238 else
12239 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
12240 reg_name, reg_name);
12241 #ifdef DWARF2_UNWIND_INFO
12242 if (do_cfi)
12243 fprintf (asm_out_file, "\t.cfi_endproc\n");
12244 #endif
12247 if (NEED_INDICATE_EXEC_STACK)
12248 file_end_indicate_exec_stack ();
12250 #ifdef TARGET_SOLARIS
12251 solaris_file_end ();
12252 #endif
12255 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12256 /* Implement TARGET_MANGLE_TYPE. */
12258 static const char *
12259 sparc_mangle_type (const_tree type)
12261 if (TARGET_ARCH32
12262 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12263 && TARGET_LONG_DOUBLE_128)
12264 return "g";
12266 /* For all other types, use normal C++ mangling. */
12267 return NULL;
12269 #endif
12271 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12272 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12273 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12275 void
12276 sparc_emit_membar_for_model (enum memmodel model,
12277 int load_store, int before_after)
12279 /* Bits for the MEMBAR mmask field. */
12280 const int LoadLoad = 1;
12281 const int StoreLoad = 2;
12282 const int LoadStore = 4;
12283 const int StoreStore = 8;
12285 int mm = 0, implied = 0;
12287 switch (sparc_memory_model)
12289 case SMM_SC:
12290 /* Sequential Consistency. All memory transactions are immediately
12291 visible in sequential execution order. No barriers needed. */
12292 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12293 break;
12295 case SMM_TSO:
12296 /* Total Store Ordering: all memory transactions with store semantics
12297 are followed by an implied StoreStore. */
12298 implied |= StoreStore;
12300 /* If we're not looking for a raw barrer (before+after), then atomic
12301 operations get the benefit of being both load and store. */
12302 if (load_store == 3 && before_after == 1)
12303 implied |= StoreLoad;
12304 /* FALLTHRU */
12306 case SMM_PSO:
12307 /* Partial Store Ordering: all memory transactions with load semantics
12308 are followed by an implied LoadLoad | LoadStore. */
12309 implied |= LoadLoad | LoadStore;
12311 /* If we're not looking for a raw barrer (before+after), then atomic
12312 operations get the benefit of being both load and store. */
12313 if (load_store == 3 && before_after == 2)
12314 implied |= StoreLoad | StoreStore;
12315 /* FALLTHRU */
12317 case SMM_RMO:
12318 /* Relaxed Memory Ordering: no implicit bits. */
12319 break;
12321 default:
12322 gcc_unreachable ();
12325 if (before_after & 1)
12327 if (is_mm_release (model) || is_mm_acq_rel (model)
12328 || is_mm_seq_cst (model))
12330 if (load_store & 1)
12331 mm |= LoadLoad | StoreLoad;
12332 if (load_store & 2)
12333 mm |= LoadStore | StoreStore;
12336 if (before_after & 2)
12338 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12339 || is_mm_seq_cst (model))
12341 if (load_store & 1)
12342 mm |= LoadLoad | LoadStore;
12343 if (load_store & 2)
12344 mm |= StoreLoad | StoreStore;
12348 /* Remove the bits implied by the system memory model. */
12349 mm &= ~implied;
12351 /* For raw barriers (before+after), always emit a barrier.
12352 This will become a compile-time barrier if needed. */
12353 if (mm || before_after == 3)
12354 emit_insn (gen_membar (GEN_INT (mm)));
12357 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12358 compare and swap on the word containing the byte or half-word. */
12360 static void
12361 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12362 rtx oldval, rtx newval)
12364 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12365 rtx addr = gen_reg_rtx (Pmode);
12366 rtx off = gen_reg_rtx (SImode);
12367 rtx oldv = gen_reg_rtx (SImode);
12368 rtx newv = gen_reg_rtx (SImode);
12369 rtx oldvalue = gen_reg_rtx (SImode);
12370 rtx newvalue = gen_reg_rtx (SImode);
12371 rtx res = gen_reg_rtx (SImode);
12372 rtx resv = gen_reg_rtx (SImode);
12373 rtx memsi, val, mask, cc;
12375 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12377 if (Pmode != SImode)
12378 addr1 = gen_lowpart (SImode, addr1);
12379 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12381 memsi = gen_rtx_MEM (SImode, addr);
12382 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12383 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12385 val = copy_to_reg (memsi);
12387 emit_insn (gen_rtx_SET (off,
12388 gen_rtx_XOR (SImode, off,
12389 GEN_INT (GET_MODE (mem) == QImode
12390 ? 3 : 2))));
12392 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12394 if (GET_MODE (mem) == QImode)
12395 mask = force_reg (SImode, GEN_INT (0xff));
12396 else
12397 mask = force_reg (SImode, GEN_INT (0xffff));
12399 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12401 emit_insn (gen_rtx_SET (val,
12402 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12403 val)));
12405 oldval = gen_lowpart (SImode, oldval);
12406 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12408 newval = gen_lowpart_common (SImode, newval);
12409 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12411 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12413 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12415 rtx_code_label *end_label = gen_label_rtx ();
12416 rtx_code_label *loop_label = gen_label_rtx ();
12417 emit_label (loop_label);
12419 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12421 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12423 emit_move_insn (bool_result, const1_rtx);
12425 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12427 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12429 emit_insn (gen_rtx_SET (resv,
12430 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12431 res)));
12433 emit_move_insn (bool_result, const0_rtx);
12435 cc = gen_compare_reg_1 (NE, resv, val);
12436 emit_insn (gen_rtx_SET (val, resv));
12438 /* Use cbranchcc4 to separate the compare and branch! */
12439 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12440 cc, const0_rtx, loop_label));
12442 emit_label (end_label);
12444 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12446 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12448 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12451 /* Expand code to perform a compare-and-swap. */
12453 void
12454 sparc_expand_compare_and_swap (rtx operands[])
12456 rtx bval, retval, mem, oldval, newval;
12457 machine_mode mode;
12458 enum memmodel model;
12460 bval = operands[0];
12461 retval = operands[1];
12462 mem = operands[2];
12463 oldval = operands[3];
12464 newval = operands[4];
12465 model = (enum memmodel) INTVAL (operands[6]);
12466 mode = GET_MODE (mem);
12468 sparc_emit_membar_for_model (model, 3, 1);
12470 if (reg_overlap_mentioned_p (retval, oldval))
12471 oldval = copy_to_reg (oldval);
12473 if (mode == QImode || mode == HImode)
12474 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12475 else
12477 rtx (*gen) (rtx, rtx, rtx, rtx);
12478 rtx x;
12480 if (mode == SImode)
12481 gen = gen_atomic_compare_and_swapsi_1;
12482 else
12483 gen = gen_atomic_compare_and_swapdi_1;
12484 emit_insn (gen (retval, mem, oldval, newval));
12486 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12487 if (x != bval)
12488 convert_move (bval, x, 1);
12491 sparc_emit_membar_for_model (model, 3, 2);
12494 void
12495 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12497 rtx t_1, t_2, t_3;
12499 sel = gen_lowpart (DImode, sel);
12500 switch (vmode)
12502 case E_V2SImode:
12503 /* inp = xxxxxxxAxxxxxxxB */
12504 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12505 NULL_RTX, 1, OPTAB_DIRECT);
12506 /* t_1 = ....xxxxxxxAxxx. */
12507 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12508 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12509 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12510 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12511 /* sel = .......B */
12512 /* t_1 = ...A.... */
12513 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12514 /* sel = ...A...B */
12515 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12516 /* sel = AAAABBBB * 4 */
12517 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12518 /* sel = { A*4, A*4+1, A*4+2, ... } */
12519 break;
12521 case E_V4HImode:
12522 /* inp = xxxAxxxBxxxCxxxD */
12523 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12524 NULL_RTX, 1, OPTAB_DIRECT);
12525 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12526 NULL_RTX, 1, OPTAB_DIRECT);
12527 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12528 NULL_RTX, 1, OPTAB_DIRECT);
12529 /* t_1 = ..xxxAxxxBxxxCxx */
12530 /* t_2 = ....xxxAxxxBxxxC */
12531 /* t_3 = ......xxxAxxxBxx */
12532 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12533 GEN_INT (0x07),
12534 NULL_RTX, 1, OPTAB_DIRECT);
12535 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12536 GEN_INT (0x0700),
12537 NULL_RTX, 1, OPTAB_DIRECT);
12538 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12539 GEN_INT (0x070000),
12540 NULL_RTX, 1, OPTAB_DIRECT);
12541 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12542 GEN_INT (0x07000000),
12543 NULL_RTX, 1, OPTAB_DIRECT);
12544 /* sel = .......D */
12545 /* t_1 = .....C.. */
12546 /* t_2 = ...B.... */
12547 /* t_3 = .A...... */
12548 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12549 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12550 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12551 /* sel = .A.B.C.D */
12552 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12553 /* sel = AABBCCDD * 2 */
12554 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12555 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12556 break;
12558 case E_V8QImode:
12559 /* input = xAxBxCxDxExFxGxH */
12560 sel = expand_simple_binop (DImode, AND, sel,
12561 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12562 | 0x0f0f0f0f),
12563 NULL_RTX, 1, OPTAB_DIRECT);
12564 /* sel = .A.B.C.D.E.F.G.H */
12565 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12566 NULL_RTX, 1, OPTAB_DIRECT);
12567 /* t_1 = ..A.B.C.D.E.F.G. */
12568 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12569 NULL_RTX, 1, OPTAB_DIRECT);
12570 /* sel = .AABBCCDDEEFFGGH */
12571 sel = expand_simple_binop (DImode, AND, sel,
12572 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12573 | 0xff00ff),
12574 NULL_RTX, 1, OPTAB_DIRECT);
12575 /* sel = ..AB..CD..EF..GH */
12576 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12577 NULL_RTX, 1, OPTAB_DIRECT);
12578 /* t_1 = ....AB..CD..EF.. */
12579 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12580 NULL_RTX, 1, OPTAB_DIRECT);
12581 /* sel = ..ABABCDCDEFEFGH */
12582 sel = expand_simple_binop (DImode, AND, sel,
12583 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12584 NULL_RTX, 1, OPTAB_DIRECT);
12585 /* sel = ....ABCD....EFGH */
12586 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12587 NULL_RTX, 1, OPTAB_DIRECT);
12588 /* t_1 = ........ABCD.... */
12589 sel = gen_lowpart (SImode, sel);
12590 t_1 = gen_lowpart (SImode, t_1);
12591 break;
12593 default:
12594 gcc_unreachable ();
12597 /* Always perform the final addition/merge within the bmask insn. */
12598 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12601 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12603 static bool
12604 sparc_frame_pointer_required (void)
12606 /* If the stack pointer is dynamically modified in the function, it cannot
12607 serve as the frame pointer. */
12608 if (cfun->calls_alloca)
12609 return true;
12611 /* If the function receives nonlocal gotos, it needs to save the frame
12612 pointer in the nonlocal_goto_save_area object. */
12613 if (cfun->has_nonlocal_label)
12614 return true;
12616 /* In flat mode, that's it. */
12617 if (TARGET_FLAT)
12618 return false;
12620 /* Otherwise, the frame pointer is required if the function isn't leaf, but
12621 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
12622 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
12625 /* The way this is structured, we can't eliminate SFP in favor of SP
12626 if the frame pointer is required: we want to use the SFP->HFP elimination
12627 in that case. But the test in update_eliminables doesn't know we are
12628 assuming below that we only do the former elimination. */
12630 static bool
12631 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12633 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12636 /* Return the hard frame pointer directly to bypass the stack bias. */
12638 static rtx
12639 sparc_builtin_setjmp_frame_value (void)
12641 return hard_frame_pointer_rtx;
12644 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12645 they won't be allocated. */
12647 static void
12648 sparc_conditional_register_usage (void)
12650 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12652 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12653 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12655 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12656 /* then honor it. */
12657 if (TARGET_ARCH32 && fixed_regs[5])
12658 fixed_regs[5] = 1;
12659 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12660 fixed_regs[5] = 0;
12661 if (! TARGET_V9)
12663 int regno;
12664 for (regno = SPARC_FIRST_V9_FP_REG;
12665 regno <= SPARC_LAST_V9_FP_REG;
12666 regno++)
12667 fixed_regs[regno] = 1;
12668 /* %fcc0 is used by v8 and v9. */
12669 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12670 regno <= SPARC_LAST_V9_FCC_REG;
12671 regno++)
12672 fixed_regs[regno] = 1;
12674 if (! TARGET_FPU)
12676 int regno;
12677 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12678 fixed_regs[regno] = 1;
12680 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12681 /* then honor it. Likewise with g3 and g4. */
12682 if (fixed_regs[2] == 2)
12683 fixed_regs[2] = ! TARGET_APP_REGS;
12684 if (fixed_regs[3] == 2)
12685 fixed_regs[3] = ! TARGET_APP_REGS;
12686 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12687 fixed_regs[4] = ! TARGET_APP_REGS;
12688 else if (TARGET_CM_EMBMEDANY)
12689 fixed_regs[4] = 1;
12690 else if (fixed_regs[4] == 2)
12691 fixed_regs[4] = 0;
12692 if (TARGET_FLAT)
12694 int regno;
12695 /* Disable leaf functions. */
12696 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12697 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12698 leaf_reg_remap [regno] = regno;
12700 if (TARGET_VIS)
12701 global_regs[SPARC_GSR_REG] = 1;
12704 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12706 - We can't load constants into FP registers.
12707 - We can't load FP constants into integer registers when soft-float,
12708 because there is no soft-float pattern with a r/F constraint.
12709 - We can't load FP constants into integer registers for TFmode unless
12710 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12711 - Try and reload integer constants (symbolic or otherwise) back into
12712 registers directly, rather than having them dumped to memory. */
12714 static reg_class_t
12715 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12717 machine_mode mode = GET_MODE (x);
12718 if (CONSTANT_P (x))
12720 if (FP_REG_CLASS_P (rclass)
12721 || rclass == GENERAL_OR_FP_REGS
12722 || rclass == GENERAL_OR_EXTRA_FP_REGS
12723 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12724 || (mode == TFmode && ! const_zero_operand (x, mode)))
12725 return NO_REGS;
12727 if (GET_MODE_CLASS (mode) == MODE_INT)
12728 return GENERAL_REGS;
12730 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12732 if (! FP_REG_CLASS_P (rclass)
12733 || !(const_zero_operand (x, mode)
12734 || const_all_ones_operand (x, mode)))
12735 return NO_REGS;
12739 if (TARGET_VIS3
12740 && ! TARGET_ARCH64
12741 && (rclass == EXTRA_FP_REGS
12742 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12744 int regno = true_regnum (x);
12746 if (SPARC_INT_REG_P (regno))
12747 return (rclass == EXTRA_FP_REGS
12748 ? FP_REGS : GENERAL_OR_FP_REGS);
12751 return rclass;
12754 /* Return true if we use LRA instead of reload pass. */
12756 static bool
12757 sparc_lra_p (void)
12759 return TARGET_LRA;
12762 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12763 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12765 const char *
12766 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12768 char mulstr[32];
12770 gcc_assert (! TARGET_ARCH64);
12772 if (sparc_check_64 (operands[1], insn) <= 0)
12773 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12774 if (which_alternative == 1)
12775 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12776 if (GET_CODE (operands[2]) == CONST_INT)
12778 if (which_alternative == 1)
12780 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12781 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12782 output_asm_insn (mulstr, operands);
12783 return "srlx\t%L0, 32, %H0";
12785 else
12787 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12788 output_asm_insn ("or\t%L1, %3, %3", operands);
12789 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12790 output_asm_insn (mulstr, operands);
12791 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12792 return "mov\t%3, %L0";
12795 else if (rtx_equal_p (operands[1], operands[2]))
12797 if (which_alternative == 1)
12799 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12800 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12801 output_asm_insn (mulstr, operands);
12802 return "srlx\t%L0, 32, %H0";
12804 else
12806 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12807 output_asm_insn ("or\t%L1, %3, %3", operands);
12808 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12809 output_asm_insn (mulstr, operands);
12810 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12811 return "mov\t%3, %L0";
12814 if (sparc_check_64 (operands[2], insn) <= 0)
12815 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12816 if (which_alternative == 1)
12818 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12819 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12820 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12821 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12822 output_asm_insn (mulstr, operands);
12823 return "srlx\t%L0, 32, %H0";
12825 else
12827 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12828 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12829 output_asm_insn ("or\t%L1, %3, %3", operands);
12830 output_asm_insn ("or\t%L2, %4, %4", operands);
12831 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12832 output_asm_insn (mulstr, operands);
12833 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12834 return "mov\t%3, %L0";
12838 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12839 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12840 and INNER_MODE are the modes describing TARGET. */
12842 static void
12843 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12844 machine_mode inner_mode)
12846 rtx t1, final_insn, sel;
12847 int bmask;
12849 t1 = gen_reg_rtx (mode);
12851 elt = convert_modes (SImode, inner_mode, elt, true);
12852 emit_move_insn (gen_lowpart(SImode, t1), elt);
12854 switch (mode)
12856 case E_V2SImode:
12857 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12858 bmask = 0x45674567;
12859 break;
12860 case E_V4HImode:
12861 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12862 bmask = 0x67676767;
12863 break;
12864 case E_V8QImode:
12865 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12866 bmask = 0x77777777;
12867 break;
12868 default:
12869 gcc_unreachable ();
12872 sel = force_reg (SImode, GEN_INT (bmask));
12873 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
12874 emit_insn (final_insn);
12877 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12878 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12880 static void
12881 vector_init_fpmerge (rtx target, rtx elt)
12883 rtx t1, t2, t2_low, t3, t3_low;
12885 t1 = gen_reg_rtx (V4QImode);
12886 elt = convert_modes (SImode, QImode, elt, true);
12887 emit_move_insn (gen_lowpart (SImode, t1), elt);
12889 t2 = gen_reg_rtx (V8QImode);
12890 t2_low = gen_lowpart (V4QImode, t2);
12891 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12893 t3 = gen_reg_rtx (V8QImode);
12894 t3_low = gen_lowpart (V4QImode, t3);
12895 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12897 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12900 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12901 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12903 static void
12904 vector_init_faligndata (rtx target, rtx elt)
12906 rtx t1 = gen_reg_rtx (V4HImode);
12907 int i;
12909 elt = convert_modes (SImode, HImode, elt, true);
12910 emit_move_insn (gen_lowpart (SImode, t1), elt);
12912 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12913 force_reg (SImode, GEN_INT (6)),
12914 const0_rtx));
12916 for (i = 0; i < 4; i++)
12917 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12920 /* Emit code to initialize TARGET to values for individual fields VALS. */
12922 void
12923 sparc_expand_vector_init (rtx target, rtx vals)
12925 const machine_mode mode = GET_MODE (target);
12926 const machine_mode inner_mode = GET_MODE_INNER (mode);
12927 const int n_elts = GET_MODE_NUNITS (mode);
12928 int i, n_var = 0;
12929 bool all_same = true;
12930 rtx mem;
12932 for (i = 0; i < n_elts; i++)
12934 rtx x = XVECEXP (vals, 0, i);
12935 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
12936 n_var++;
12938 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12939 all_same = false;
12942 if (n_var == 0)
12944 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12945 return;
12948 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12950 if (GET_MODE_SIZE (inner_mode) == 4)
12952 emit_move_insn (gen_lowpart (SImode, target),
12953 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12954 return;
12956 else if (GET_MODE_SIZE (inner_mode) == 8)
12958 emit_move_insn (gen_lowpart (DImode, target),
12959 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12960 return;
12963 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12964 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12966 emit_move_insn (gen_highpart (word_mode, target),
12967 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12968 emit_move_insn (gen_lowpart (word_mode, target),
12969 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12970 return;
12973 if (all_same && GET_MODE_SIZE (mode) == 8)
12975 if (TARGET_VIS2)
12977 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12978 return;
12980 if (mode == V8QImode)
12982 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12983 return;
12985 if (mode == V4HImode)
12987 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12988 return;
12992 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12993 for (i = 0; i < n_elts; i++)
12994 emit_move_insn (adjust_address_nv (mem, inner_mode,
12995 i * GET_MODE_SIZE (inner_mode)),
12996 XVECEXP (vals, 0, i));
12997 emit_move_insn (target, mem);
13000 /* Implement TARGET_SECONDARY_RELOAD. */
13002 static reg_class_t
13003 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13004 machine_mode mode, secondary_reload_info *sri)
13006 enum reg_class rclass = (enum reg_class) rclass_i;
13008 sri->icode = CODE_FOR_nothing;
13009 sri->extra_cost = 0;
13011 /* We need a temporary when loading/storing a HImode/QImode value
13012 between memory and the FPU registers. This can happen when combine puts
13013 a paradoxical subreg in a float/fix conversion insn. */
13014 if (FP_REG_CLASS_P (rclass)
13015 && (mode == HImode || mode == QImode)
13016 && (GET_CODE (x) == MEM
13017 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13018 && true_regnum (x) == -1)))
13019 return GENERAL_REGS;
13021 /* On 32-bit we need a temporary when loading/storing a DFmode value
13022 between unaligned memory and the upper FPU registers. */
13023 if (TARGET_ARCH32
13024 && rclass == EXTRA_FP_REGS
13025 && mode == DFmode
13026 && GET_CODE (x) == MEM
13027 && ! mem_min_alignment (x, 8))
13028 return FP_REGS;
13030 if (((TARGET_CM_MEDANY
13031 && symbolic_operand (x, mode))
13032 || (TARGET_CM_EMBMEDANY
13033 && text_segment_operand (x, mode)))
13034 && ! flag_pic)
13036 if (in_p)
13037 sri->icode = direct_optab_handler (reload_in_optab, mode);
13038 else
13039 sri->icode = direct_optab_handler (reload_out_optab, mode);
13040 return NO_REGS;
13043 if (TARGET_VIS3 && TARGET_ARCH32)
13045 int regno = true_regnum (x);
13047 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13048 to move 8-byte values in 4-byte pieces. This only works via
13049 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13050 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13051 an FP_REGS intermediate move. */
13052 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13053 || ((general_or_i64_p (rclass)
13054 || rclass == GENERAL_OR_FP_REGS)
13055 && SPARC_FP_REG_P (regno)))
13057 sri->extra_cost = 2;
13058 return FP_REGS;
13062 return NO_REGS;
13065 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13067 On SPARC when not VIS3 it is not possible to directly move data
13068 between GENERAL_REGS and FP_REGS. */
13070 static bool
13071 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13072 reg_class_t class2)
13074 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13075 && (! TARGET_VIS3
13076 || GET_MODE_SIZE (mode) > 8
13077 || GET_MODE_SIZE (mode) < 4));
13080 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13082 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13083 because the movsi and movsf patterns don't handle r/f moves.
13084 For v8 we copy the default definition. */
13086 static machine_mode
13087 sparc_secondary_memory_needed_mode (machine_mode mode)
13089 if (TARGET_ARCH64)
13091 if (GET_MODE_BITSIZE (mode) < 32)
13092 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13093 return mode;
13095 else
13097 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13098 return mode_for_size (BITS_PER_WORD,
13099 GET_MODE_CLASS (mode), 0).require ();
13100 return mode;
13104 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13105 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13107 bool
13108 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13110 enum rtx_code rc = GET_CODE (operands[1]);
13111 machine_mode cmp_mode;
13112 rtx cc_reg, dst, cmp;
13114 cmp = operands[1];
13115 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13116 return false;
13118 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13119 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13121 cmp_mode = GET_MODE (XEXP (cmp, 0));
13122 rc = GET_CODE (cmp);
13124 dst = operands[0];
13125 if (! rtx_equal_p (operands[2], dst)
13126 && ! rtx_equal_p (operands[3], dst))
13128 if (reg_overlap_mentioned_p (dst, cmp))
13129 dst = gen_reg_rtx (mode);
13131 emit_move_insn (dst, operands[3]);
13133 else if (operands[2] == dst)
13135 operands[2] = operands[3];
13137 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13138 rc = reverse_condition_maybe_unordered (rc);
13139 else
13140 rc = reverse_condition (rc);
13143 if (XEXP (cmp, 1) == const0_rtx
13144 && GET_CODE (XEXP (cmp, 0)) == REG
13145 && cmp_mode == DImode
13146 && v9_regcmp_p (rc))
13147 cc_reg = XEXP (cmp, 0);
13148 else
13149 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13151 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13153 emit_insn (gen_rtx_SET (dst,
13154 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13156 if (dst != operands[0])
13157 emit_move_insn (operands[0], dst);
13159 return true;
13162 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13163 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13164 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13165 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13166 code to be used for the condition mask. */
13168 void
13169 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13171 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13172 enum rtx_code code = GET_CODE (operands[3]);
13174 mask = gen_reg_rtx (Pmode);
13175 cop0 = operands[4];
13176 cop1 = operands[5];
13177 if (code == LT || code == GE)
13179 rtx t;
13181 code = swap_condition (code);
13182 t = cop0; cop0 = cop1; cop1 = t;
13185 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13187 fcmp = gen_rtx_UNSPEC (Pmode,
13188 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13189 fcode);
13191 cmask = gen_rtx_UNSPEC (DImode,
13192 gen_rtvec (2, mask, gsr),
13193 ccode);
13195 bshuf = gen_rtx_UNSPEC (mode,
13196 gen_rtvec (3, operands[1], operands[2], gsr),
13197 UNSPEC_BSHUFFLE);
13199 emit_insn (gen_rtx_SET (mask, fcmp));
13200 emit_insn (gen_rtx_SET (gsr, cmask));
13202 emit_insn (gen_rtx_SET (operands[0], bshuf));
13205 /* On sparc, any mode which naturally allocates into the float
13206 registers should return 4 here. */
13208 unsigned int
13209 sparc_regmode_natural_size (machine_mode mode)
13211 int size = UNITS_PER_WORD;
13213 if (TARGET_ARCH64)
13215 enum mode_class mclass = GET_MODE_CLASS (mode);
13217 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
13218 size = 4;
13221 return size;
13224 /* Implement TARGET_HARD_REGNO_NREGS.
13226 On SPARC, ordinary registers hold 32 bits worth; this means both
13227 integer and floating point registers. On v9, integer regs hold 64
13228 bits worth; floating point regs hold 32 bits worth (this includes the
13229 new fp regs as even the odd ones are included in the hard register
13230 count). */
13232 static unsigned int
13233 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13235 if (regno == SPARC_GSR_REG)
13236 return 1;
13237 if (TARGET_ARCH64)
13239 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13240 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13241 return CEIL (GET_MODE_SIZE (mode), 4);
13243 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13246 /* Implement TARGET_HARD_REGNO_MODE_OK.
13248 ??? Because of the funny way we pass parameters we should allow certain
13249 ??? types of float/complex values to be in integer registers during
13250 ??? RTL generation. This only matters on arch32. */
13252 static bool
13253 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13255 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13258 /* Implement TARGET_MODES_TIEABLE_P.
13260 For V9 we have to deal with the fact that only the lower 32 floating
13261 point registers are 32-bit addressable. */
13263 static bool
13264 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13266 enum mode_class mclass1, mclass2;
13267 unsigned short size1, size2;
13269 if (mode1 == mode2)
13270 return true;
13272 mclass1 = GET_MODE_CLASS (mode1);
13273 mclass2 = GET_MODE_CLASS (mode2);
13274 if (mclass1 != mclass2)
13275 return false;
13277 if (! TARGET_V9)
13278 return true;
13280 /* Classes are the same and we are V9 so we have to deal with upper
13281 vs. lower floating point registers. If one of the modes is a
13282 4-byte mode, and the other is not, we have to mark them as not
13283 tieable because only the lower 32 floating point register are
13284 addressable 32-bits at a time.
13286 We can't just test explicitly for SFmode, otherwise we won't
13287 cover the vector mode cases properly. */
13289 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13290 return true;
13292 size1 = GET_MODE_SIZE (mode1);
13293 size2 = GET_MODE_SIZE (mode2);
13294 if ((size1 > 4 && size2 == 4)
13295 || (size2 > 4 && size1 == 4))
13296 return false;
13298 return true;
13301 /* Implement TARGET_CSTORE_MODE. */
13303 static scalar_int_mode
13304 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13306 return (TARGET_ARCH64 ? DImode : SImode);
13309 /* Return the compound expression made of T1 and T2. */
13311 static inline tree
13312 compound_expr (tree t1, tree t2)
13314 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13317 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13319 static void
13320 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13322 if (!TARGET_FPU)
13323 return;
13325 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13326 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13328 /* We generate the equivalent of feholdexcept (&fenv_var):
13330 unsigned int fenv_var;
13331 __builtin_store_fsr (&fenv_var);
13333 unsigned int tmp1_var;
13334 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13336 __builtin_load_fsr (&tmp1_var); */
13338 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13339 TREE_ADDRESSABLE (fenv_var) = 1;
13340 tree fenv_addr = build_fold_addr_expr (fenv_var);
13341 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13342 tree hold_stfsr
13343 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13344 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13346 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13347 TREE_ADDRESSABLE (tmp1_var) = 1;
13348 tree masked_fenv_var
13349 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13350 build_int_cst (unsigned_type_node,
13351 ~(accrued_exception_mask | trap_enable_mask)));
13352 tree hold_mask
13353 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13354 NULL_TREE, NULL_TREE);
13356 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13357 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13358 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13360 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13362 /* We reload the value of tmp1_var to clear the exceptions:
13364 __builtin_load_fsr (&tmp1_var); */
13366 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13368 /* We generate the equivalent of feupdateenv (&fenv_var):
13370 unsigned int tmp2_var;
13371 __builtin_store_fsr (&tmp2_var);
13373 __builtin_load_fsr (&fenv_var);
13375 if (SPARC_LOW_FE_EXCEPT_VALUES)
13376 tmp2_var >>= 5;
13377 __atomic_feraiseexcept ((int) tmp2_var); */
13379 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13380 TREE_ADDRESSABLE (tmp2_var) = 1;
13381 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13382 tree update_stfsr
13383 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13384 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13386 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13388 tree atomic_feraiseexcept
13389 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13390 tree update_call
13391 = build_call_expr (atomic_feraiseexcept, 1,
13392 fold_convert (integer_type_node, tmp2_var));
13394 if (SPARC_LOW_FE_EXCEPT_VALUES)
13396 tree shifted_tmp2_var
13397 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13398 build_int_cst (unsigned_type_node, 5));
13399 tree update_shift
13400 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13401 update_call = compound_expr (update_shift, update_call);
13404 *update
13405 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13408 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
13410 SImode loads to floating-point registers are not zero-extended.
13411 The definition for LOAD_EXTEND_OP specifies that integer loads
13412 narrower than BITS_PER_WORD will be zero-extended. As a result,
13413 we inhibit changes from SImode unless they are to a mode that is
13414 identical in size.
13416 Likewise for SFmode, since word-mode paradoxical subregs are
13417 problematic on big-endian architectures. */
13419 static bool
13420 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13421 reg_class_t rclass)
13423 if (TARGET_ARCH64
13424 && GET_MODE_SIZE (from) == 4
13425 && GET_MODE_SIZE (to) != 4)
13426 return !reg_classes_intersect_p (rclass, FP_REGS);
13427 return true;
13430 #include "gt-sparc.h"