Turn HARD_REGNO_NREGS into a target hook
[official-gcc.git] / gcc / config / sparc / sparc.c
blob1a750ec8093709cb7a39dfb19144c271ea318fd0
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2017 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "emit-rtl.h"
40 #include "recog.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "debug.h"
52 #include "common/common-target.h"
53 #include "gimplify.h"
54 #include "langhooks.h"
55 #include "reload.h"
56 #include "params.h"
57 #include "tree-pass.h"
58 #include "context.h"
59 #include "builtins.h"
61 /* This file should be included last. */
62 #include "target-def.h"
64 /* Processor costs */
66 struct processor_costs {
67 /* Integer load */
68 const int int_load;
70 /* Integer signed load */
71 const int int_sload;
73 /* Integer zeroed load */
74 const int int_zload;
76 /* Float load */
77 const int float_load;
79 /* fmov, fneg, fabs */
80 const int float_move;
82 /* fadd, fsub */
83 const int float_plusminus;
85 /* fcmp */
86 const int float_cmp;
88 /* fmov, fmovr */
89 const int float_cmove;
91 /* fmul */
92 const int float_mul;
94 /* fdivs */
95 const int float_div_sf;
97 /* fdivd */
98 const int float_div_df;
100 /* fsqrts */
101 const int float_sqrt_sf;
103 /* fsqrtd */
104 const int float_sqrt_df;
106 /* umul/smul */
107 const int int_mul;
109 /* mulX */
110 const int int_mulX;
112 /* integer multiply cost for each bit set past the most
113 significant 3, so the formula for multiply cost becomes:
115 if (rs1 < 0)
116 highest_bit = highest_clear_bit(rs1);
117 else
118 highest_bit = highest_set_bit(rs1);
119 if (highest_bit < 3)
120 highest_bit = 3;
121 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
123 A value of zero indicates that the multiply costs is fixed,
124 and not variable. */
125 const int int_mul_bit_factor;
127 /* udiv/sdiv */
128 const int int_div;
130 /* divX */
131 const int int_divX;
133 /* movcc, movr */
134 const int int_cmove;
136 /* penalty for shifts, due to scheduling rules etc. */
137 const int shift_penalty;
140 static const
141 struct processor_costs cypress_costs = {
142 COSTS_N_INSNS (2), /* int load */
143 COSTS_N_INSNS (2), /* int signed load */
144 COSTS_N_INSNS (2), /* int zeroed load */
145 COSTS_N_INSNS (2), /* float load */
146 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
147 COSTS_N_INSNS (5), /* fadd, fsub */
148 COSTS_N_INSNS (1), /* fcmp */
149 COSTS_N_INSNS (1), /* fmov, fmovr */
150 COSTS_N_INSNS (7), /* fmul */
151 COSTS_N_INSNS (37), /* fdivs */
152 COSTS_N_INSNS (37), /* fdivd */
153 COSTS_N_INSNS (63), /* fsqrts */
154 COSTS_N_INSNS (63), /* fsqrtd */
155 COSTS_N_INSNS (1), /* imul */
156 COSTS_N_INSNS (1), /* imulX */
157 0, /* imul bit factor */
158 COSTS_N_INSNS (1), /* idiv */
159 COSTS_N_INSNS (1), /* idivX */
160 COSTS_N_INSNS (1), /* movcc/movr */
161 0, /* shift penalty */
164 static const
165 struct processor_costs supersparc_costs = {
166 COSTS_N_INSNS (1), /* int load */
167 COSTS_N_INSNS (1), /* int signed load */
168 COSTS_N_INSNS (1), /* int zeroed load */
169 COSTS_N_INSNS (0), /* float load */
170 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
171 COSTS_N_INSNS (3), /* fadd, fsub */
172 COSTS_N_INSNS (3), /* fcmp */
173 COSTS_N_INSNS (1), /* fmov, fmovr */
174 COSTS_N_INSNS (3), /* fmul */
175 COSTS_N_INSNS (6), /* fdivs */
176 COSTS_N_INSNS (9), /* fdivd */
177 COSTS_N_INSNS (12), /* fsqrts */
178 COSTS_N_INSNS (12), /* fsqrtd */
179 COSTS_N_INSNS (4), /* imul */
180 COSTS_N_INSNS (4), /* imulX */
181 0, /* imul bit factor */
182 COSTS_N_INSNS (4), /* idiv */
183 COSTS_N_INSNS (4), /* idivX */
184 COSTS_N_INSNS (1), /* movcc/movr */
185 1, /* shift penalty */
188 static const
189 struct processor_costs hypersparc_costs = {
190 COSTS_N_INSNS (1), /* int load */
191 COSTS_N_INSNS (1), /* int signed load */
192 COSTS_N_INSNS (1), /* int zeroed load */
193 COSTS_N_INSNS (1), /* float load */
194 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
195 COSTS_N_INSNS (1), /* fadd, fsub */
196 COSTS_N_INSNS (1), /* fcmp */
197 COSTS_N_INSNS (1), /* fmov, fmovr */
198 COSTS_N_INSNS (1), /* fmul */
199 COSTS_N_INSNS (8), /* fdivs */
200 COSTS_N_INSNS (12), /* fdivd */
201 COSTS_N_INSNS (17), /* fsqrts */
202 COSTS_N_INSNS (17), /* fsqrtd */
203 COSTS_N_INSNS (17), /* imul */
204 COSTS_N_INSNS (17), /* imulX */
205 0, /* imul bit factor */
206 COSTS_N_INSNS (17), /* idiv */
207 COSTS_N_INSNS (17), /* idivX */
208 COSTS_N_INSNS (1), /* movcc/movr */
209 0, /* shift penalty */
212 static const
213 struct processor_costs leon_costs = {
214 COSTS_N_INSNS (1), /* int load */
215 COSTS_N_INSNS (1), /* int signed load */
216 COSTS_N_INSNS (1), /* int zeroed load */
217 COSTS_N_INSNS (1), /* float load */
218 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
219 COSTS_N_INSNS (1), /* fadd, fsub */
220 COSTS_N_INSNS (1), /* fcmp */
221 COSTS_N_INSNS (1), /* fmov, fmovr */
222 COSTS_N_INSNS (1), /* fmul */
223 COSTS_N_INSNS (15), /* fdivs */
224 COSTS_N_INSNS (15), /* fdivd */
225 COSTS_N_INSNS (23), /* fsqrts */
226 COSTS_N_INSNS (23), /* fsqrtd */
227 COSTS_N_INSNS (5), /* imul */
228 COSTS_N_INSNS (5), /* imulX */
229 0, /* imul bit factor */
230 COSTS_N_INSNS (5), /* idiv */
231 COSTS_N_INSNS (5), /* idivX */
232 COSTS_N_INSNS (1), /* movcc/movr */
233 0, /* shift penalty */
236 static const
237 struct processor_costs leon3_costs = {
238 COSTS_N_INSNS (1), /* int load */
239 COSTS_N_INSNS (1), /* int signed load */
240 COSTS_N_INSNS (1), /* int zeroed load */
241 COSTS_N_INSNS (1), /* float load */
242 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
243 COSTS_N_INSNS (1), /* fadd, fsub */
244 COSTS_N_INSNS (1), /* fcmp */
245 COSTS_N_INSNS (1), /* fmov, fmovr */
246 COSTS_N_INSNS (1), /* fmul */
247 COSTS_N_INSNS (14), /* fdivs */
248 COSTS_N_INSNS (15), /* fdivd */
249 COSTS_N_INSNS (22), /* fsqrts */
250 COSTS_N_INSNS (23), /* fsqrtd */
251 COSTS_N_INSNS (5), /* imul */
252 COSTS_N_INSNS (5), /* imulX */
253 0, /* imul bit factor */
254 COSTS_N_INSNS (35), /* idiv */
255 COSTS_N_INSNS (35), /* idivX */
256 COSTS_N_INSNS (1), /* movcc/movr */
257 0, /* shift penalty */
260 static const
261 struct processor_costs sparclet_costs = {
262 COSTS_N_INSNS (3), /* int load */
263 COSTS_N_INSNS (3), /* int signed load */
264 COSTS_N_INSNS (1), /* int zeroed load */
265 COSTS_N_INSNS (1), /* float load */
266 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
267 COSTS_N_INSNS (1), /* fadd, fsub */
268 COSTS_N_INSNS (1), /* fcmp */
269 COSTS_N_INSNS (1), /* fmov, fmovr */
270 COSTS_N_INSNS (1), /* fmul */
271 COSTS_N_INSNS (1), /* fdivs */
272 COSTS_N_INSNS (1), /* fdivd */
273 COSTS_N_INSNS (1), /* fsqrts */
274 COSTS_N_INSNS (1), /* fsqrtd */
275 COSTS_N_INSNS (5), /* imul */
276 COSTS_N_INSNS (5), /* imulX */
277 0, /* imul bit factor */
278 COSTS_N_INSNS (5), /* idiv */
279 COSTS_N_INSNS (5), /* idivX */
280 COSTS_N_INSNS (1), /* movcc/movr */
281 0, /* shift penalty */
284 static const
285 struct processor_costs ultrasparc_costs = {
286 COSTS_N_INSNS (2), /* int load */
287 COSTS_N_INSNS (3), /* int signed load */
288 COSTS_N_INSNS (2), /* int zeroed load */
289 COSTS_N_INSNS (2), /* float load */
290 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
291 COSTS_N_INSNS (4), /* fadd, fsub */
292 COSTS_N_INSNS (1), /* fcmp */
293 COSTS_N_INSNS (2), /* fmov, fmovr */
294 COSTS_N_INSNS (4), /* fmul */
295 COSTS_N_INSNS (13), /* fdivs */
296 COSTS_N_INSNS (23), /* fdivd */
297 COSTS_N_INSNS (13), /* fsqrts */
298 COSTS_N_INSNS (23), /* fsqrtd */
299 COSTS_N_INSNS (4), /* imul */
300 COSTS_N_INSNS (4), /* imulX */
301 2, /* imul bit factor */
302 COSTS_N_INSNS (37), /* idiv */
303 COSTS_N_INSNS (68), /* idivX */
304 COSTS_N_INSNS (2), /* movcc/movr */
305 2, /* shift penalty */
308 static const
309 struct processor_costs ultrasparc3_costs = {
310 COSTS_N_INSNS (2), /* int load */
311 COSTS_N_INSNS (3), /* int signed load */
312 COSTS_N_INSNS (3), /* int zeroed load */
313 COSTS_N_INSNS (2), /* float load */
314 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
315 COSTS_N_INSNS (4), /* fadd, fsub */
316 COSTS_N_INSNS (5), /* fcmp */
317 COSTS_N_INSNS (3), /* fmov, fmovr */
318 COSTS_N_INSNS (4), /* fmul */
319 COSTS_N_INSNS (17), /* fdivs */
320 COSTS_N_INSNS (20), /* fdivd */
321 COSTS_N_INSNS (20), /* fsqrts */
322 COSTS_N_INSNS (29), /* fsqrtd */
323 COSTS_N_INSNS (6), /* imul */
324 COSTS_N_INSNS (6), /* imulX */
325 0, /* imul bit factor */
326 COSTS_N_INSNS (40), /* idiv */
327 COSTS_N_INSNS (71), /* idivX */
328 COSTS_N_INSNS (2), /* movcc/movr */
329 0, /* shift penalty */
332 static const
333 struct processor_costs niagara_costs = {
334 COSTS_N_INSNS (3), /* int load */
335 COSTS_N_INSNS (3), /* int signed load */
336 COSTS_N_INSNS (3), /* int zeroed load */
337 COSTS_N_INSNS (9), /* float load */
338 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
339 COSTS_N_INSNS (8), /* fadd, fsub */
340 COSTS_N_INSNS (26), /* fcmp */
341 COSTS_N_INSNS (8), /* fmov, fmovr */
342 COSTS_N_INSNS (29), /* fmul */
343 COSTS_N_INSNS (54), /* fdivs */
344 COSTS_N_INSNS (83), /* fdivd */
345 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
346 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
347 COSTS_N_INSNS (11), /* imul */
348 COSTS_N_INSNS (11), /* imulX */
349 0, /* imul bit factor */
350 COSTS_N_INSNS (72), /* idiv */
351 COSTS_N_INSNS (72), /* idivX */
352 COSTS_N_INSNS (1), /* movcc/movr */
353 0, /* shift penalty */
356 static const
357 struct processor_costs niagara2_costs = {
358 COSTS_N_INSNS (3), /* int load */
359 COSTS_N_INSNS (3), /* int signed load */
360 COSTS_N_INSNS (3), /* int zeroed load */
361 COSTS_N_INSNS (3), /* float load */
362 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
363 COSTS_N_INSNS (6), /* fadd, fsub */
364 COSTS_N_INSNS (6), /* fcmp */
365 COSTS_N_INSNS (6), /* fmov, fmovr */
366 COSTS_N_INSNS (6), /* fmul */
367 COSTS_N_INSNS (19), /* fdivs */
368 COSTS_N_INSNS (33), /* fdivd */
369 COSTS_N_INSNS (19), /* fsqrts */
370 COSTS_N_INSNS (33), /* fsqrtd */
371 COSTS_N_INSNS (5), /* imul */
372 COSTS_N_INSNS (5), /* imulX */
373 0, /* imul bit factor */
374 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
375 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
376 COSTS_N_INSNS (1), /* movcc/movr */
377 0, /* shift penalty */
380 static const
381 struct processor_costs niagara3_costs = {
382 COSTS_N_INSNS (3), /* int load */
383 COSTS_N_INSNS (3), /* int signed load */
384 COSTS_N_INSNS (3), /* int zeroed load */
385 COSTS_N_INSNS (3), /* float load */
386 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
387 COSTS_N_INSNS (9), /* fadd, fsub */
388 COSTS_N_INSNS (9), /* fcmp */
389 COSTS_N_INSNS (9), /* fmov, fmovr */
390 COSTS_N_INSNS (9), /* fmul */
391 COSTS_N_INSNS (23), /* fdivs */
392 COSTS_N_INSNS (37), /* fdivd */
393 COSTS_N_INSNS (23), /* fsqrts */
394 COSTS_N_INSNS (37), /* fsqrtd */
395 COSTS_N_INSNS (9), /* imul */
396 COSTS_N_INSNS (9), /* imulX */
397 0, /* imul bit factor */
398 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
399 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
400 COSTS_N_INSNS (1), /* movcc/movr */
401 0, /* shift penalty */
404 static const
405 struct processor_costs niagara4_costs = {
406 COSTS_N_INSNS (5), /* int load */
407 COSTS_N_INSNS (5), /* int signed load */
408 COSTS_N_INSNS (5), /* int zeroed load */
409 COSTS_N_INSNS (5), /* float load */
410 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
411 COSTS_N_INSNS (11), /* fadd, fsub */
412 COSTS_N_INSNS (11), /* fcmp */
413 COSTS_N_INSNS (11), /* fmov, fmovr */
414 COSTS_N_INSNS (11), /* fmul */
415 COSTS_N_INSNS (24), /* fdivs */
416 COSTS_N_INSNS (37), /* fdivd */
417 COSTS_N_INSNS (24), /* fsqrts */
418 COSTS_N_INSNS (37), /* fsqrtd */
419 COSTS_N_INSNS (12), /* imul */
420 COSTS_N_INSNS (12), /* imulX */
421 0, /* imul bit factor */
422 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
423 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
424 COSTS_N_INSNS (1), /* movcc/movr */
425 0, /* shift penalty */
428 static const
429 struct processor_costs niagara7_costs = {
430 COSTS_N_INSNS (5), /* int load */
431 COSTS_N_INSNS (5), /* int signed load */
432 COSTS_N_INSNS (5), /* int zeroed load */
433 COSTS_N_INSNS (5), /* float load */
434 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
435 COSTS_N_INSNS (11), /* fadd, fsub */
436 COSTS_N_INSNS (11), /* fcmp */
437 COSTS_N_INSNS (11), /* fmov, fmovr */
438 COSTS_N_INSNS (11), /* fmul */
439 COSTS_N_INSNS (24), /* fdivs */
440 COSTS_N_INSNS (37), /* fdivd */
441 COSTS_N_INSNS (24), /* fsqrts */
442 COSTS_N_INSNS (37), /* fsqrtd */
443 COSTS_N_INSNS (12), /* imul */
444 COSTS_N_INSNS (12), /* imulX */
445 0, /* imul bit factor */
446 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
447 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
448 COSTS_N_INSNS (1), /* movcc/movr */
449 0, /* shift penalty */
452 static const
453 struct processor_costs m8_costs = {
454 COSTS_N_INSNS (3), /* int load */
455 COSTS_N_INSNS (3), /* int signed load */
456 COSTS_N_INSNS (3), /* int zeroed load */
457 COSTS_N_INSNS (3), /* float load */
458 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
459 COSTS_N_INSNS (9), /* fadd, fsub */
460 COSTS_N_INSNS (9), /* fcmp */
461 COSTS_N_INSNS (9), /* fmov, fmovr */
462 COSTS_N_INSNS (9), /* fmul */
463 COSTS_N_INSNS (26), /* fdivs */
464 COSTS_N_INSNS (30), /* fdivd */
465 COSTS_N_INSNS (33), /* fsqrts */
466 COSTS_N_INSNS (41), /* fsqrtd */
467 COSTS_N_INSNS (12), /* imul */
468 COSTS_N_INSNS (10), /* imulX */
469 0, /* imul bit factor */
470 COSTS_N_INSNS (57), /* udiv/sdiv */
471 COSTS_N_INSNS (30), /* udivx/sdivx */
472 COSTS_N_INSNS (1), /* movcc/movr */
473 0, /* shift penalty */
476 static const struct processor_costs *sparc_costs = &cypress_costs;
478 #ifdef HAVE_AS_RELAX_OPTION
479 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
480 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
481 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
482 somebody does not branch between the sethi and jmp. */
483 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
484 #else
485 #define LEAF_SIBCALL_SLOT_RESERVED_P \
486 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
487 #endif
489 /* Vector to say how input registers are mapped to output registers.
490 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
491 eliminate it. You must use -fomit-frame-pointer to get that. */
492 char leaf_reg_remap[] =
493 { 0, 1, 2, 3, 4, 5, 6, 7,
494 -1, -1, -1, -1, -1, -1, 14, -1,
495 -1, -1, -1, -1, -1, -1, -1, -1,
496 8, 9, 10, 11, 12, 13, -1, 15,
498 32, 33, 34, 35, 36, 37, 38, 39,
499 40, 41, 42, 43, 44, 45, 46, 47,
500 48, 49, 50, 51, 52, 53, 54, 55,
501 56, 57, 58, 59, 60, 61, 62, 63,
502 64, 65, 66, 67, 68, 69, 70, 71,
503 72, 73, 74, 75, 76, 77, 78, 79,
504 80, 81, 82, 83, 84, 85, 86, 87,
505 88, 89, 90, 91, 92, 93, 94, 95,
506 96, 97, 98, 99, 100, 101, 102};
508 /* Vector, indexed by hard register number, which contains 1
509 for a register that is allowable in a candidate for leaf
510 function treatment. */
511 char sparc_leaf_regs[] =
512 { 1, 1, 1, 1, 1, 1, 1, 1,
513 0, 0, 0, 0, 0, 0, 1, 0,
514 0, 0, 0, 0, 0, 0, 0, 0,
515 1, 1, 1, 1, 1, 1, 0, 1,
516 1, 1, 1, 1, 1, 1, 1, 1,
517 1, 1, 1, 1, 1, 1, 1, 1,
518 1, 1, 1, 1, 1, 1, 1, 1,
519 1, 1, 1, 1, 1, 1, 1, 1,
520 1, 1, 1, 1, 1, 1, 1, 1,
521 1, 1, 1, 1, 1, 1, 1, 1,
522 1, 1, 1, 1, 1, 1, 1, 1,
523 1, 1, 1, 1, 1, 1, 1, 1,
524 1, 1, 1, 1, 1, 1, 1};
526 struct GTY(()) machine_function
528 /* Size of the frame of the function. */
529 HOST_WIDE_INT frame_size;
531 /* Size of the frame of the function minus the register window save area
532 and the outgoing argument area. */
533 HOST_WIDE_INT apparent_frame_size;
535 /* Register we pretend the frame pointer is allocated to. Normally, this
536 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
537 record "offset" separately as it may be too big for (reg + disp). */
538 rtx frame_base_reg;
539 HOST_WIDE_INT frame_base_offset;
541 /* Number of global or FP registers to be saved (as 4-byte quantities). */
542 int n_global_fp_regs;
544 /* True if the current function is leaf and uses only leaf regs,
545 so that the SPARC leaf function optimization can be applied.
546 Private version of crtl->uses_only_leaf_regs, see
547 sparc_expand_prologue for the rationale. */
548 int leaf_function_p;
550 /* True if the prologue saves local or in registers. */
551 bool save_local_in_regs_p;
553 /* True if the data calculated by sparc_expand_prologue are valid. */
554 bool prologue_data_valid_p;
557 #define sparc_frame_size cfun->machine->frame_size
558 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
559 #define sparc_frame_base_reg cfun->machine->frame_base_reg
560 #define sparc_frame_base_offset cfun->machine->frame_base_offset
561 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
562 #define sparc_leaf_function_p cfun->machine->leaf_function_p
563 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
564 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
566 /* 1 if the next opcode is to be specially indented. */
567 int sparc_indent_opcode = 0;
569 static void sparc_option_override (void);
570 static void sparc_init_modes (void);
571 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
572 const_tree, bool, bool, int *, int *);
574 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
575 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
577 static void sparc_emit_set_const32 (rtx, rtx);
578 static void sparc_emit_set_const64 (rtx, rtx);
579 static void sparc_output_addr_vec (rtx);
580 static void sparc_output_addr_diff_vec (rtx);
581 static void sparc_output_deferred_case_vectors (void);
582 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
583 static bool sparc_legitimate_constant_p (machine_mode, rtx);
584 static rtx sparc_builtin_saveregs (void);
585 static int epilogue_renumber (rtx *, int);
586 static bool sparc_assemble_integer (rtx, unsigned int, int);
587 static int set_extends (rtx_insn *);
588 static void sparc_asm_function_prologue (FILE *);
589 static void sparc_asm_function_epilogue (FILE *);
590 #ifdef TARGET_SOLARIS
591 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
592 tree) ATTRIBUTE_UNUSED;
593 #endif
594 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
595 static int sparc_issue_rate (void);
596 static void sparc_sched_init (FILE *, int, int);
597 static int sparc_use_sched_lookahead (void);
599 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
600 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
601 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
602 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
603 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
605 static bool sparc_function_ok_for_sibcall (tree, tree);
606 static void sparc_init_libfuncs (void);
607 static void sparc_init_builtins (void);
608 static void sparc_fpu_init_builtins (void);
609 static void sparc_vis_init_builtins (void);
610 static tree sparc_builtin_decl (unsigned, bool);
611 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
612 static tree sparc_fold_builtin (tree, int, tree *, bool);
613 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
614 HOST_WIDE_INT, tree);
615 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
616 HOST_WIDE_INT, const_tree);
617 static struct machine_function * sparc_init_machine_status (void);
618 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
619 static rtx sparc_tls_get_addr (void);
620 static rtx sparc_tls_got (void);
621 static int sparc_register_move_cost (machine_mode,
622 reg_class_t, reg_class_t);
623 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
624 static rtx sparc_function_value (const_tree, const_tree, bool);
625 static rtx sparc_libcall_value (machine_mode, const_rtx);
626 static bool sparc_function_value_regno_p (const unsigned int);
627 static rtx sparc_struct_value_rtx (tree, int);
628 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
629 int *, const_tree, int);
630 static bool sparc_return_in_memory (const_tree, const_tree);
631 static bool sparc_strict_argument_naming (cumulative_args_t);
632 static void sparc_va_start (tree, rtx);
633 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
634 static bool sparc_vector_mode_supported_p (machine_mode);
635 static bool sparc_tls_referenced_p (rtx);
636 static rtx sparc_legitimize_tls_address (rtx);
637 static rtx sparc_legitimize_pic_address (rtx, rtx);
638 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
639 static rtx sparc_delegitimize_address (rtx);
640 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
641 static bool sparc_pass_by_reference (cumulative_args_t,
642 machine_mode, const_tree, bool);
643 static void sparc_function_arg_advance (cumulative_args_t,
644 machine_mode, const_tree, bool);
645 static rtx sparc_function_arg_1 (cumulative_args_t,
646 machine_mode, const_tree, bool, bool);
647 static rtx sparc_function_arg (cumulative_args_t,
648 machine_mode, const_tree, bool);
649 static rtx sparc_function_incoming_arg (cumulative_args_t,
650 machine_mode, const_tree, bool);
651 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
652 static unsigned int sparc_function_arg_boundary (machine_mode,
653 const_tree);
654 static int sparc_arg_partial_bytes (cumulative_args_t,
655 machine_mode, tree, bool);
656 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
657 static void sparc_file_end (void);
658 static bool sparc_frame_pointer_required (void);
659 static bool sparc_can_eliminate (const int, const int);
660 static rtx sparc_builtin_setjmp_frame_value (void);
661 static void sparc_conditional_register_usage (void);
662 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
663 static const char *sparc_mangle_type (const_tree);
664 #endif
665 static void sparc_trampoline_init (rtx, tree, rtx);
666 static machine_mode sparc_preferred_simd_mode (scalar_mode);
667 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
668 static bool sparc_lra_p (void);
669 static bool sparc_print_operand_punct_valid_p (unsigned char);
670 static void sparc_print_operand (FILE *, rtx, int);
671 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
672 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
673 machine_mode,
674 secondary_reload_info *);
675 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
676 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
677 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
678 static unsigned int sparc_min_arithmetic_precision (void);
679 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
680 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
681 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
684 #ifdef SUBTARGET_ATTRIBUTE_TABLE
685 /* Table of valid machine attributes. */
686 static const struct attribute_spec sparc_attribute_table[] =
688 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
689 do_diagnostic } */
690 SUBTARGET_ATTRIBUTE_TABLE,
691 { NULL, 0, 0, false, false, false, NULL, false }
693 #endif
695 /* Option handling. */
697 /* Parsed value. */
698 enum cmodel sparc_cmodel;
700 char sparc_hard_reg_printed[8];
702 /* Initialize the GCC target structure. */
704 /* The default is to use .half rather than .short for aligned HI objects. */
705 #undef TARGET_ASM_ALIGNED_HI_OP
706 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
708 #undef TARGET_ASM_UNALIGNED_HI_OP
709 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
710 #undef TARGET_ASM_UNALIGNED_SI_OP
711 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
712 #undef TARGET_ASM_UNALIGNED_DI_OP
713 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
715 /* The target hook has to handle DI-mode values. */
716 #undef TARGET_ASM_INTEGER
717 #define TARGET_ASM_INTEGER sparc_assemble_integer
719 #undef TARGET_ASM_FUNCTION_PROLOGUE
720 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
721 #undef TARGET_ASM_FUNCTION_EPILOGUE
722 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
724 #undef TARGET_SCHED_ADJUST_COST
725 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
726 #undef TARGET_SCHED_ISSUE_RATE
727 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
728 #undef TARGET_SCHED_INIT
729 #define TARGET_SCHED_INIT sparc_sched_init
730 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
731 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
733 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
734 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
736 #undef TARGET_INIT_LIBFUNCS
737 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
739 #undef TARGET_LEGITIMIZE_ADDRESS
740 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
741 #undef TARGET_DELEGITIMIZE_ADDRESS
742 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
743 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
744 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
746 #undef TARGET_INIT_BUILTINS
747 #define TARGET_INIT_BUILTINS sparc_init_builtins
748 #undef TARGET_BUILTIN_DECL
749 #define TARGET_BUILTIN_DECL sparc_builtin_decl
750 #undef TARGET_EXPAND_BUILTIN
751 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
752 #undef TARGET_FOLD_BUILTIN
753 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
755 #if TARGET_TLS
756 #undef TARGET_HAVE_TLS
757 #define TARGET_HAVE_TLS true
758 #endif
760 #undef TARGET_CANNOT_FORCE_CONST_MEM
761 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
763 #undef TARGET_ASM_OUTPUT_MI_THUNK
764 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
765 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
766 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
768 #undef TARGET_RTX_COSTS
769 #define TARGET_RTX_COSTS sparc_rtx_costs
770 #undef TARGET_ADDRESS_COST
771 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
772 #undef TARGET_REGISTER_MOVE_COST
773 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
775 #undef TARGET_PROMOTE_FUNCTION_MODE
776 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
778 #undef TARGET_FUNCTION_VALUE
779 #define TARGET_FUNCTION_VALUE sparc_function_value
780 #undef TARGET_LIBCALL_VALUE
781 #define TARGET_LIBCALL_VALUE sparc_libcall_value
782 #undef TARGET_FUNCTION_VALUE_REGNO_P
783 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
785 #undef TARGET_STRUCT_VALUE_RTX
786 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
787 #undef TARGET_RETURN_IN_MEMORY
788 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
789 #undef TARGET_MUST_PASS_IN_STACK
790 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
791 #undef TARGET_PASS_BY_REFERENCE
792 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
793 #undef TARGET_ARG_PARTIAL_BYTES
794 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
795 #undef TARGET_FUNCTION_ARG_ADVANCE
796 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
797 #undef TARGET_FUNCTION_ARG
798 #define TARGET_FUNCTION_ARG sparc_function_arg
799 #undef TARGET_FUNCTION_INCOMING_ARG
800 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
801 #undef TARGET_FUNCTION_ARG_PADDING
802 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
803 #undef TARGET_FUNCTION_ARG_BOUNDARY
804 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
806 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
807 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
808 #undef TARGET_STRICT_ARGUMENT_NAMING
809 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
811 #undef TARGET_EXPAND_BUILTIN_VA_START
812 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
813 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
814 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
816 #undef TARGET_VECTOR_MODE_SUPPORTED_P
817 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
819 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
820 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
822 #ifdef SUBTARGET_INSERT_ATTRIBUTES
823 #undef TARGET_INSERT_ATTRIBUTES
824 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
825 #endif
827 #ifdef SUBTARGET_ATTRIBUTE_TABLE
828 #undef TARGET_ATTRIBUTE_TABLE
829 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
830 #endif
832 #undef TARGET_OPTION_OVERRIDE
833 #define TARGET_OPTION_OVERRIDE sparc_option_override
835 #ifdef TARGET_THREAD_SSP_OFFSET
836 #undef TARGET_STACK_PROTECT_GUARD
837 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
838 #endif
840 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
841 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
842 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
843 #endif
845 #undef TARGET_ASM_FILE_END
846 #define TARGET_ASM_FILE_END sparc_file_end
848 #undef TARGET_FRAME_POINTER_REQUIRED
849 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
851 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
852 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
854 #undef TARGET_CAN_ELIMINATE
855 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
857 #undef TARGET_PREFERRED_RELOAD_CLASS
858 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
860 #undef TARGET_SECONDARY_RELOAD
861 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
863 #undef TARGET_CONDITIONAL_REGISTER_USAGE
864 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
866 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
867 #undef TARGET_MANGLE_TYPE
868 #define TARGET_MANGLE_TYPE sparc_mangle_type
869 #endif
871 #undef TARGET_LRA_P
872 #define TARGET_LRA_P sparc_lra_p
874 #undef TARGET_LEGITIMATE_ADDRESS_P
875 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
877 #undef TARGET_LEGITIMATE_CONSTANT_P
878 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
880 #undef TARGET_TRAMPOLINE_INIT
881 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
883 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
884 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
885 #undef TARGET_PRINT_OPERAND
886 #define TARGET_PRINT_OPERAND sparc_print_operand
887 #undef TARGET_PRINT_OPERAND_ADDRESS
888 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
890 /* The value stored by LDSTUB. */
891 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
892 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
894 #undef TARGET_CSTORE_MODE
895 #define TARGET_CSTORE_MODE sparc_cstore_mode
897 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
898 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
900 #undef TARGET_FIXED_CONDITION_CODE_REGS
901 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
903 #undef TARGET_MIN_ARITHMETIC_PRECISION
904 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
906 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
907 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
909 #undef TARGET_HARD_REGNO_NREGS
910 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
911 #undef TARGET_HARD_REGNO_MODE_OK
912 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
914 #undef TARGET_MODES_TIEABLE_P
915 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
917 struct gcc_target targetm = TARGET_INITIALIZER;
919 /* Return the memory reference contained in X if any, zero otherwise. */
921 static rtx
922 mem_ref (rtx x)
924 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
925 x = XEXP (x, 0);
927 if (MEM_P (x))
928 return x;
930 return NULL_RTX;
933 /* We use a machine specific pass to enable workarounds for errata.
935 We need to have the (essentially) final form of the insn stream in order
936 to properly detect the various hazards. Therefore, this machine specific
937 pass runs as late as possible. */
939 /* True if INSN is a md pattern or asm statement. */
940 #define USEFUL_INSN_P(INSN) \
941 (NONDEBUG_INSN_P (INSN) \
942 && GET_CODE (PATTERN (INSN)) != USE \
943 && GET_CODE (PATTERN (INSN)) != CLOBBER)
945 static unsigned int
946 sparc_do_work_around_errata (void)
948 rtx_insn *insn, *next;
950 /* Force all instructions to be split into their final form. */
951 split_all_insns_noflow ();
953 /* Now look for specific patterns in the insn stream. */
954 for (insn = get_insns (); insn; insn = next)
956 bool insert_nop = false;
957 rtx set;
959 /* Look into the instruction in a delay slot. */
960 if (NONJUMP_INSN_P (insn))
961 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
962 insn = seq->insn (1);
964 /* Look for either of these two sequences:
966 Sequence A:
967 1. store of word size or less (e.g. st / stb / sth / stf)
968 2. any single instruction that is not a load or store
969 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
971 Sequence B:
972 1. store of double word size (e.g. std / stdf)
973 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
974 if (sparc_fix_b2bst
975 && NONJUMP_INSN_P (insn)
976 && (set = single_set (insn)) != NULL_RTX
977 && MEM_P (SET_DEST (set)))
979 /* Sequence B begins with a double-word store. */
980 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
981 rtx_insn *after;
982 int i;
984 next = next_active_insn (insn);
985 if (!next)
986 break;
988 for (after = next, i = 0; i < 2; i++)
990 /* Skip empty assembly statements. */
991 if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE)
992 || (USEFUL_INSN_P (after)
993 && (asm_noperands (PATTERN (after))>=0)
994 && !strcmp (decode_asm_operands (PATTERN (after),
995 NULL, NULL, NULL,
996 NULL, NULL), "")))
997 after = next_active_insn (after);
998 if (!after)
999 break;
1001 /* If the insn is a branch, then it cannot be problematic. */
1002 if (!NONJUMP_INSN_P (after)
1003 || GET_CODE (PATTERN (after)) == SEQUENCE)
1004 break;
1006 /* Sequence B is only two instructions long. */
1007 if (seq_b)
1009 /* Add NOP if followed by a store. */
1010 if ((set = single_set (after)) != NULL_RTX
1011 && MEM_P (SET_DEST (set)))
1012 insert_nop = true;
1014 /* Otherwise it is ok. */
1015 break;
1018 /* If the second instruction is a load or a store,
1019 then the sequence cannot be problematic. */
1020 if (i == 0)
1022 if (((set = single_set (after)) != NULL_RTX)
1023 && (MEM_P (SET_DEST (set)) || MEM_P (SET_SRC (set))))
1024 break;
1026 after = next_active_insn (after);
1027 if (!after)
1028 break;
1031 /* Add NOP if third instruction is a store. */
1032 if (i == 1
1033 && ((set = single_set (after)) != NULL_RTX)
1034 && MEM_P (SET_DEST (set)))
1035 insert_nop = true;
1038 else
1039 /* Look for a single-word load into an odd-numbered FP register. */
1040 if (sparc_fix_at697f
1041 && NONJUMP_INSN_P (insn)
1042 && (set = single_set (insn)) != NULL_RTX
1043 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1044 && MEM_P (SET_SRC (set))
1045 && REG_P (SET_DEST (set))
1046 && REGNO (SET_DEST (set)) > 31
1047 && REGNO (SET_DEST (set)) % 2 != 0)
1049 /* The wrong dependency is on the enclosing double register. */
1050 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1051 unsigned int src1, src2, dest;
1052 int code;
1054 next = next_active_insn (insn);
1055 if (!next)
1056 break;
1057 /* If the insn is a branch, then it cannot be problematic. */
1058 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1059 continue;
1061 extract_insn (next);
1062 code = INSN_CODE (next);
1064 switch (code)
1066 case CODE_FOR_adddf3:
1067 case CODE_FOR_subdf3:
1068 case CODE_FOR_muldf3:
1069 case CODE_FOR_divdf3:
1070 dest = REGNO (recog_data.operand[0]);
1071 src1 = REGNO (recog_data.operand[1]);
1072 src2 = REGNO (recog_data.operand[2]);
1073 if (src1 != src2)
1075 /* Case [1-4]:
1076 ld [address], %fx+1
1077 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1078 if ((src1 == x || src2 == x)
1079 && (dest == src1 || dest == src2))
1080 insert_nop = true;
1082 else
1084 /* Case 5:
1085 ld [address], %fx+1
1086 FPOPd %fx, %fx, %fx */
1087 if (src1 == x
1088 && dest == src1
1089 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1090 insert_nop = true;
1092 break;
1094 case CODE_FOR_sqrtdf2:
1095 dest = REGNO (recog_data.operand[0]);
1096 src1 = REGNO (recog_data.operand[1]);
1097 /* Case 6:
1098 ld [address], %fx+1
1099 fsqrtd %fx, %fx */
1100 if (src1 == x && dest == src1)
1101 insert_nop = true;
1102 break;
1104 default:
1105 break;
1109 /* Look for a single-word load into an integer register. */
1110 else if (sparc_fix_ut699
1111 && NONJUMP_INSN_P (insn)
1112 && (set = single_set (insn)) != NULL_RTX
1113 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1114 && mem_ref (SET_SRC (set)) != NULL_RTX
1115 && REG_P (SET_DEST (set))
1116 && REGNO (SET_DEST (set)) < 32)
1118 /* There is no problem if the second memory access has a data
1119 dependency on the first single-cycle load. */
1120 rtx x = SET_DEST (set);
1122 next = next_active_insn (insn);
1123 if (!next)
1124 break;
1125 /* If the insn is a branch, then it cannot be problematic. */
1126 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1127 continue;
1129 /* Look for a second memory access to/from an integer register. */
1130 if ((set = single_set (next)) != NULL_RTX)
1132 rtx src = SET_SRC (set);
1133 rtx dest = SET_DEST (set);
1134 rtx mem;
1136 /* LDD is affected. */
1137 if ((mem = mem_ref (src)) != NULL_RTX
1138 && REG_P (dest)
1139 && REGNO (dest) < 32
1140 && !reg_mentioned_p (x, XEXP (mem, 0)))
1141 insert_nop = true;
1143 /* STD is *not* affected. */
1144 else if (MEM_P (dest)
1145 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1146 && (src == CONST0_RTX (GET_MODE (dest))
1147 || (REG_P (src)
1148 && REGNO (src) < 32
1149 && REGNO (src) != REGNO (x)))
1150 && !reg_mentioned_p (x, XEXP (dest, 0)))
1151 insert_nop = true;
1155 /* Look for a single-word load/operation into an FP register. */
1156 else if (sparc_fix_ut699
1157 && NONJUMP_INSN_P (insn)
1158 && (set = single_set (insn)) != NULL_RTX
1159 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1160 && REG_P (SET_DEST (set))
1161 && REGNO (SET_DEST (set)) > 31)
1163 /* Number of instructions in the problematic window. */
1164 const int n_insns = 4;
1165 /* The problematic combination is with the sibling FP register. */
1166 const unsigned int x = REGNO (SET_DEST (set));
1167 const unsigned int y = x ^ 1;
1168 rtx_insn *after;
1169 int i;
1171 next = next_active_insn (insn);
1172 if (!next)
1173 break;
1174 /* If the insn is a branch, then it cannot be problematic. */
1175 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1176 continue;
1178 /* Look for a second load/operation into the sibling FP register. */
1179 if (!((set = single_set (next)) != NULL_RTX
1180 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1181 && REG_P (SET_DEST (set))
1182 && REGNO (SET_DEST (set)) == y))
1183 continue;
1185 /* Look for a (possible) store from the FP register in the next N
1186 instructions, but bail out if it is again modified or if there
1187 is a store from the sibling FP register before this store. */
1188 for (after = next, i = 0; i < n_insns; i++)
1190 bool branch_p;
1192 after = next_active_insn (after);
1193 if (!after)
1194 break;
1196 /* This is a branch with an empty delay slot. */
1197 if (!NONJUMP_INSN_P (after))
1199 if (++i == n_insns)
1200 break;
1201 branch_p = true;
1202 after = NULL;
1204 /* This is a branch with a filled delay slot. */
1205 else if (rtx_sequence *seq =
1206 dyn_cast <rtx_sequence *> (PATTERN (after)))
1208 if (++i == n_insns)
1209 break;
1210 branch_p = true;
1211 after = seq->insn (1);
1213 /* This is a regular instruction. */
1214 else
1215 branch_p = false;
1217 if (after && (set = single_set (after)) != NULL_RTX)
1219 const rtx src = SET_SRC (set);
1220 const rtx dest = SET_DEST (set);
1221 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1223 /* If the FP register is again modified before the store,
1224 then the store isn't affected. */
1225 if (REG_P (dest)
1226 && (REGNO (dest) == x
1227 || (REGNO (dest) == y && size == 8)))
1228 break;
1230 if (MEM_P (dest) && REG_P (src))
1232 /* If there is a store from the sibling FP register
1233 before the store, then the store is not affected. */
1234 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1235 break;
1237 /* Otherwise, the store is affected. */
1238 if (REGNO (src) == x && size == 4)
1240 insert_nop = true;
1241 break;
1246 /* If we have a branch in the first M instructions, then we
1247 cannot see the (M+2)th instruction so we play safe. */
1248 if (branch_p && i <= (n_insns - 2))
1250 insert_nop = true;
1251 break;
1256 else
1257 next = NEXT_INSN (insn);
1259 if (insert_nop)
1260 emit_insn_before (gen_nop (), next);
1263 return 0;
1266 namespace {
1268 const pass_data pass_data_work_around_errata =
1270 RTL_PASS, /* type */
1271 "errata", /* name */
1272 OPTGROUP_NONE, /* optinfo_flags */
1273 TV_MACH_DEP, /* tv_id */
1274 0, /* properties_required */
1275 0, /* properties_provided */
1276 0, /* properties_destroyed */
1277 0, /* todo_flags_start */
1278 0, /* todo_flags_finish */
1281 class pass_work_around_errata : public rtl_opt_pass
1283 public:
1284 pass_work_around_errata(gcc::context *ctxt)
1285 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1288 /* opt_pass methods: */
1289 virtual bool gate (function *)
1291 return sparc_fix_at697f || sparc_fix_ut699 || sparc_fix_b2bst;
1294 virtual unsigned int execute (function *)
1296 return sparc_do_work_around_errata ();
1299 }; // class pass_work_around_errata
1301 } // anon namespace
1303 rtl_opt_pass *
1304 make_pass_work_around_errata (gcc::context *ctxt)
1306 return new pass_work_around_errata (ctxt);
1309 /* Helpers for TARGET_DEBUG_OPTIONS. */
1310 static void
1311 dump_target_flag_bits (const int flags)
1313 if (flags & MASK_64BIT)
1314 fprintf (stderr, "64BIT ");
1315 if (flags & MASK_APP_REGS)
1316 fprintf (stderr, "APP_REGS ");
1317 if (flags & MASK_FASTER_STRUCTS)
1318 fprintf (stderr, "FASTER_STRUCTS ");
1319 if (flags & MASK_FLAT)
1320 fprintf (stderr, "FLAT ");
1321 if (flags & MASK_FMAF)
1322 fprintf (stderr, "FMAF ");
1323 if (flags & MASK_FSMULD)
1324 fprintf (stderr, "FSMULD ");
1325 if (flags & MASK_FPU)
1326 fprintf (stderr, "FPU ");
1327 if (flags & MASK_HARD_QUAD)
1328 fprintf (stderr, "HARD_QUAD ");
1329 if (flags & MASK_POPC)
1330 fprintf (stderr, "POPC ");
1331 if (flags & MASK_PTR64)
1332 fprintf (stderr, "PTR64 ");
1333 if (flags & MASK_STACK_BIAS)
1334 fprintf (stderr, "STACK_BIAS ");
1335 if (flags & MASK_UNALIGNED_DOUBLES)
1336 fprintf (stderr, "UNALIGNED_DOUBLES ");
1337 if (flags & MASK_V8PLUS)
1338 fprintf (stderr, "V8PLUS ");
1339 if (flags & MASK_VIS)
1340 fprintf (stderr, "VIS ");
1341 if (flags & MASK_VIS2)
1342 fprintf (stderr, "VIS2 ");
1343 if (flags & MASK_VIS3)
1344 fprintf (stderr, "VIS3 ");
1345 if (flags & MASK_VIS4)
1346 fprintf (stderr, "VIS4 ");
1347 if (flags & MASK_VIS4B)
1348 fprintf (stderr, "VIS4B ");
1349 if (flags & MASK_CBCOND)
1350 fprintf (stderr, "CBCOND ");
1351 if (flags & MASK_DEPRECATED_V8_INSNS)
1352 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1353 if (flags & MASK_SPARCLET)
1354 fprintf (stderr, "SPARCLET ");
1355 if (flags & MASK_SPARCLITE)
1356 fprintf (stderr, "SPARCLITE ");
1357 if (flags & MASK_V8)
1358 fprintf (stderr, "V8 ");
1359 if (flags & MASK_V9)
1360 fprintf (stderr, "V9 ");
1363 static void
1364 dump_target_flags (const char *prefix, const int flags)
1366 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1367 dump_target_flag_bits (flags);
1368 fprintf(stderr, "]\n");
1371 /* Validate and override various options, and do some machine dependent
1372 initialization. */
1374 static void
1375 sparc_option_override (void)
1377 static struct code_model {
1378 const char *const name;
1379 const enum cmodel value;
1380 } const cmodels[] = {
1381 { "32", CM_32 },
1382 { "medlow", CM_MEDLOW },
1383 { "medmid", CM_MEDMID },
1384 { "medany", CM_MEDANY },
1385 { "embmedany", CM_EMBMEDANY },
1386 { NULL, (enum cmodel) 0 }
1388 const struct code_model *cmodel;
1389 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1390 static struct cpu_default {
1391 const int cpu;
1392 const enum processor_type processor;
1393 } const cpu_default[] = {
1394 /* There must be one entry here for each TARGET_CPU value. */
1395 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1396 { TARGET_CPU_v8, PROCESSOR_V8 },
1397 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1398 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1399 { TARGET_CPU_leon, PROCESSOR_LEON },
1400 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1401 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1402 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1403 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1404 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1405 { TARGET_CPU_v9, PROCESSOR_V9 },
1406 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1407 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1408 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1409 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1410 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1411 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1412 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1413 { TARGET_CPU_m8, PROCESSOR_M8 },
1414 { -1, PROCESSOR_V7 }
1416 const struct cpu_default *def;
1417 /* Table of values for -m{cpu,tune}=. This must match the order of
1418 the enum processor_type in sparc-opts.h. */
1419 static struct cpu_table {
1420 const char *const name;
1421 const int disable;
1422 const int enable;
1423 } const cpu_table[] = {
1424 { "v7", MASK_ISA|MASK_FSMULD, 0 },
1425 { "cypress", MASK_ISA|MASK_FSMULD, 0 },
1426 { "v8", MASK_ISA, MASK_V8 },
1427 /* TI TMS390Z55 supersparc */
1428 { "supersparc", MASK_ISA, MASK_V8 },
1429 { "hypersparc", MASK_ISA, MASK_V8 },
1430 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1431 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1432 { "leon3v7", MASK_ISA|MASK_FSMULD, MASK_LEON3 },
1433 { "sparclite", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1434 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1435 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1436 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1437 { "f934", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1438 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1439 { "sparclet", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1440 /* TEMIC sparclet */
1441 { "tsc701", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1442 { "v9", MASK_ISA, MASK_V9 },
1443 /* UltraSPARC I, II, IIi */
1444 { "ultrasparc", MASK_ISA,
1445 /* Although insns using %y are deprecated, it is a clear win. */
1446 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1447 /* UltraSPARC III */
1448 /* ??? Check if %y issue still holds true. */
1449 { "ultrasparc3", MASK_ISA,
1450 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1451 /* UltraSPARC T1 */
1452 { "niagara", MASK_ISA,
1453 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1454 /* UltraSPARC T2 */
1455 { "niagara2", MASK_ISA,
1456 MASK_V9|MASK_POPC|MASK_VIS2 },
1457 /* UltraSPARC T3 */
1458 { "niagara3", MASK_ISA,
1459 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1460 /* UltraSPARC T4 */
1461 { "niagara4", MASK_ISA,
1462 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1463 /* UltraSPARC M7 */
1464 { "niagara7", MASK_ISA,
1465 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1466 /* UltraSPARC M8 */
1467 { "m8", MASK_ISA,
1468 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC|MASK_VIS4B }
1470 const struct cpu_table *cpu;
1471 unsigned int i;
1473 if (sparc_debug_string != NULL)
1475 const char *q;
1476 char *p;
1478 p = ASTRDUP (sparc_debug_string);
1479 while ((q = strtok (p, ",")) != NULL)
1481 bool invert;
1482 int mask;
1484 p = NULL;
1485 if (*q == '!')
1487 invert = true;
1488 q++;
1490 else
1491 invert = false;
1493 if (! strcmp (q, "all"))
1494 mask = MASK_DEBUG_ALL;
1495 else if (! strcmp (q, "options"))
1496 mask = MASK_DEBUG_OPTIONS;
1497 else
1498 error ("unknown -mdebug-%s switch", q);
1500 if (invert)
1501 sparc_debug &= ~mask;
1502 else
1503 sparc_debug |= mask;
1507 /* Enable the FsMULd instruction by default if not explicitly specified by
1508 the user. It may be later disabled by the CPU (explicitly or not). */
1509 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1510 target_flags |= MASK_FSMULD;
1512 if (TARGET_DEBUG_OPTIONS)
1514 dump_target_flags("Initial target_flags", target_flags);
1515 dump_target_flags("target_flags_explicit", target_flags_explicit);
1518 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1519 SUBTARGET_OVERRIDE_OPTIONS;
1520 #endif
1522 #ifndef SPARC_BI_ARCH
1523 /* Check for unsupported architecture size. */
1524 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1525 error ("%s is not supported by this configuration",
1526 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1527 #endif
1529 /* We force all 64bit archs to use 128 bit long double */
1530 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1532 error ("-mlong-double-64 not allowed with -m64");
1533 target_flags |= MASK_LONG_DOUBLE_128;
1536 /* Code model selection. */
1537 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1539 #ifdef SPARC_BI_ARCH
1540 if (TARGET_ARCH32)
1541 sparc_cmodel = CM_32;
1542 #endif
1544 if (sparc_cmodel_string != NULL)
1546 if (TARGET_ARCH64)
1548 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1549 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1550 break;
1551 if (cmodel->name == NULL)
1552 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1553 else
1554 sparc_cmodel = cmodel->value;
1556 else
1557 error ("-mcmodel= is not supported on 32-bit systems");
1560 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1561 for (i = 8; i < 16; i++)
1562 if (!call_used_regs [i])
1564 error ("-fcall-saved-REG is not supported for out registers");
1565 call_used_regs [i] = 1;
1568 /* Set the default CPU if no -mcpu option was specified. */
1569 if (!global_options_set.x_sparc_cpu_and_features)
1571 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1572 if (def->cpu == TARGET_CPU_DEFAULT)
1573 break;
1574 gcc_assert (def->cpu != -1);
1575 sparc_cpu_and_features = def->processor;
1578 /* Set the default CPU if no -mtune option was specified. */
1579 if (!global_options_set.x_sparc_cpu)
1580 sparc_cpu = sparc_cpu_and_features;
1582 cpu = &cpu_table[(int) sparc_cpu_and_features];
1584 if (TARGET_DEBUG_OPTIONS)
1586 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1587 dump_target_flags ("cpu->disable", cpu->disable);
1588 dump_target_flags ("cpu->enable", cpu->enable);
1591 target_flags &= ~cpu->disable;
1592 target_flags |= (cpu->enable
1593 #ifndef HAVE_AS_FMAF_HPC_VIS3
1594 & ~(MASK_FMAF | MASK_VIS3)
1595 #endif
1596 #ifndef HAVE_AS_SPARC4
1597 & ~MASK_CBCOND
1598 #endif
1599 #ifndef HAVE_AS_SPARC5_VIS4
1600 & ~(MASK_VIS4 | MASK_SUBXC)
1601 #endif
1602 #ifndef HAVE_AS_SPARC6
1603 & ~(MASK_VIS4B)
1604 #endif
1605 #ifndef HAVE_AS_LEON
1606 & ~(MASK_LEON | MASK_LEON3)
1607 #endif
1608 & ~(target_flags_explicit & MASK_FEATURES)
1611 /* -mvis2 implies -mvis. */
1612 if (TARGET_VIS2)
1613 target_flags |= MASK_VIS;
1615 /* -mvis3 implies -mvis2 and -mvis. */
1616 if (TARGET_VIS3)
1617 target_flags |= MASK_VIS2 | MASK_VIS;
1619 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1620 if (TARGET_VIS4)
1621 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1623 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1624 if (TARGET_VIS4B)
1625 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1627 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1628 FPU is disabled. */
1629 if (!TARGET_FPU)
1630 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1631 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1633 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1634 are available; -m64 also implies v9. */
1635 if (TARGET_VIS || TARGET_ARCH64)
1637 target_flags |= MASK_V9;
1638 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1641 /* -mvis also implies -mv8plus on 32-bit. */
1642 if (TARGET_VIS && !TARGET_ARCH64)
1643 target_flags |= MASK_V8PLUS;
1645 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1646 if (TARGET_V9 && TARGET_ARCH32)
1647 target_flags |= MASK_DEPRECATED_V8_INSNS;
1649 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1650 if (!TARGET_V9 || TARGET_ARCH64)
1651 target_flags &= ~MASK_V8PLUS;
1653 /* Don't use stack biasing in 32-bit mode. */
1654 if (TARGET_ARCH32)
1655 target_flags &= ~MASK_STACK_BIAS;
1657 /* Use LRA instead of reload, unless otherwise instructed. */
1658 if (!(target_flags_explicit & MASK_LRA))
1659 target_flags |= MASK_LRA;
1661 /* Enable the back-to-back store errata workaround for LEON3FT. */
1662 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1663 sparc_fix_b2bst = 1;
1665 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1666 if (sparc_fix_ut699)
1667 target_flags &= ~MASK_FSMULD;
1669 /* Supply a default value for align_functions. */
1670 if (align_functions == 0)
1672 if (sparc_cpu == PROCESSOR_ULTRASPARC
1673 || sparc_cpu == PROCESSOR_ULTRASPARC3
1674 || sparc_cpu == PROCESSOR_NIAGARA
1675 || sparc_cpu == PROCESSOR_NIAGARA2
1676 || sparc_cpu == PROCESSOR_NIAGARA3
1677 || sparc_cpu == PROCESSOR_NIAGARA4)
1678 align_functions = 32;
1679 else if (sparc_cpu == PROCESSOR_NIAGARA7
1680 || sparc_cpu == PROCESSOR_M8)
1681 align_functions = 64;
1684 /* Validate PCC_STRUCT_RETURN. */
1685 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1686 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1688 /* Only use .uaxword when compiling for a 64-bit target. */
1689 if (!TARGET_ARCH64)
1690 targetm.asm_out.unaligned_op.di = NULL;
1692 /* Do various machine dependent initializations. */
1693 sparc_init_modes ();
1695 /* Set up function hooks. */
1696 init_machine_status = sparc_init_machine_status;
1698 switch (sparc_cpu)
1700 case PROCESSOR_V7:
1701 case PROCESSOR_CYPRESS:
1702 sparc_costs = &cypress_costs;
1703 break;
1704 case PROCESSOR_V8:
1705 case PROCESSOR_SPARCLITE:
1706 case PROCESSOR_SUPERSPARC:
1707 sparc_costs = &supersparc_costs;
1708 break;
1709 case PROCESSOR_F930:
1710 case PROCESSOR_F934:
1711 case PROCESSOR_HYPERSPARC:
1712 case PROCESSOR_SPARCLITE86X:
1713 sparc_costs = &hypersparc_costs;
1714 break;
1715 case PROCESSOR_LEON:
1716 sparc_costs = &leon_costs;
1717 break;
1718 case PROCESSOR_LEON3:
1719 case PROCESSOR_LEON3V7:
1720 sparc_costs = &leon3_costs;
1721 break;
1722 case PROCESSOR_SPARCLET:
1723 case PROCESSOR_TSC701:
1724 sparc_costs = &sparclet_costs;
1725 break;
1726 case PROCESSOR_V9:
1727 case PROCESSOR_ULTRASPARC:
1728 sparc_costs = &ultrasparc_costs;
1729 break;
1730 case PROCESSOR_ULTRASPARC3:
1731 sparc_costs = &ultrasparc3_costs;
1732 break;
1733 case PROCESSOR_NIAGARA:
1734 sparc_costs = &niagara_costs;
1735 break;
1736 case PROCESSOR_NIAGARA2:
1737 sparc_costs = &niagara2_costs;
1738 break;
1739 case PROCESSOR_NIAGARA3:
1740 sparc_costs = &niagara3_costs;
1741 break;
1742 case PROCESSOR_NIAGARA4:
1743 sparc_costs = &niagara4_costs;
1744 break;
1745 case PROCESSOR_NIAGARA7:
1746 sparc_costs = &niagara7_costs;
1747 break;
1748 case PROCESSOR_M8:
1749 sparc_costs = &m8_costs;
1750 break;
1751 case PROCESSOR_NATIVE:
1752 gcc_unreachable ();
1755 if (sparc_memory_model == SMM_DEFAULT)
1757 /* Choose the memory model for the operating system. */
1758 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1759 if (os_default != SMM_DEFAULT)
1760 sparc_memory_model = os_default;
1761 /* Choose the most relaxed model for the processor. */
1762 else if (TARGET_V9)
1763 sparc_memory_model = SMM_RMO;
1764 else if (TARGET_LEON3)
1765 sparc_memory_model = SMM_TSO;
1766 else if (TARGET_LEON)
1767 sparc_memory_model = SMM_SC;
1768 else if (TARGET_V8)
1769 sparc_memory_model = SMM_PSO;
1770 else
1771 sparc_memory_model = SMM_SC;
1774 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1775 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1776 target_flags |= MASK_LONG_DOUBLE_128;
1777 #endif
1779 if (TARGET_DEBUG_OPTIONS)
1780 dump_target_flags ("Final target_flags", target_flags);
1782 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
1783 can run at the same time. More important, it is the threshold
1784 defining when additional prefetches will be dropped by the
1785 hardware.
1787 The UltraSPARC-III features a documented prefetch queue with a
1788 size of 8. Additional prefetches issued in the cpu are
1789 dropped.
1791 Niagara processors are different. In these processors prefetches
1792 are handled much like regular loads. The L1 miss buffer is 32
1793 entries, but prefetches start getting affected when 30 entries
1794 become occupied. That occupation could be a mix of regular loads
1795 and prefetches though. And that buffer is shared by all threads.
1796 Once the threshold is reached, if the core is running a single
1797 thread the prefetch will retry. If more than one thread is
1798 running, the prefetch will be dropped.
1800 All this makes it very difficult to determine how many
1801 simultaneous prefetches can be issued simultaneously, even in a
1802 single-threaded program. Experimental results show that setting
1803 this parameter to 32 works well when the number of threads is not
1804 high. */
1805 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1806 ((sparc_cpu == PROCESSOR_ULTRASPARC
1807 || sparc_cpu == PROCESSOR_NIAGARA
1808 || sparc_cpu == PROCESSOR_NIAGARA2
1809 || sparc_cpu == PROCESSOR_NIAGARA3
1810 || sparc_cpu == PROCESSOR_NIAGARA4)
1812 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1813 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
1814 || sparc_cpu == PROCESSOR_M8)
1815 ? 32 : 3))),
1816 global_options.x_param_values,
1817 global_options_set.x_param_values);
1819 /* PARAM_L1_CACHE_LINE_SIZE is the size of the L1 cache line, in
1820 bytes.
1822 The Oracle SPARC Architecture (previously the UltraSPARC
1823 Architecture) specification states that when a PREFETCH[A]
1824 instruction is executed an implementation-specific amount of data
1825 is prefetched, and that it is at least 64 bytes long (aligned to
1826 at least 64 bytes).
1828 However, this is not correct. The M7 (and implementations prior
1829 to that) does not guarantee a 64B prefetch into a cache if the
1830 line size is smaller. A single cache line is all that is ever
1831 prefetched. So for the M7, where the L1D$ has 32B lines and the
1832 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
1833 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
1834 is a read_n prefetch, which is the only type which allocates to
1835 the L1.) */
1836 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1837 (sparc_cpu == PROCESSOR_M8
1838 ? 64 : 32),
1839 global_options.x_param_values,
1840 global_options_set.x_param_values);
1842 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
1843 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
1844 Niagara processors feature a L1D$ of 16KB. */
1845 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1846 ((sparc_cpu == PROCESSOR_ULTRASPARC
1847 || sparc_cpu == PROCESSOR_ULTRASPARC3
1848 || sparc_cpu == PROCESSOR_NIAGARA
1849 || sparc_cpu == PROCESSOR_NIAGARA2
1850 || sparc_cpu == PROCESSOR_NIAGARA3
1851 || sparc_cpu == PROCESSOR_NIAGARA4
1852 || sparc_cpu == PROCESSOR_NIAGARA7
1853 || sparc_cpu == PROCESSOR_M8)
1854 ? 16 : 64),
1855 global_options.x_param_values,
1856 global_options_set.x_param_values);
1859 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
1860 that 512 is the default in params.def. */
1861 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
1862 ((sparc_cpu == PROCESSOR_NIAGARA4
1863 || sparc_cpu == PROCESSOR_M8)
1864 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
1865 ? 256 : 512)),
1866 global_options.x_param_values,
1867 global_options_set.x_param_values);
1870 /* Disable save slot sharing for call-clobbered registers by default.
1871 The IRA sharing algorithm works on single registers only and this
1872 pessimizes for double floating-point registers. */
1873 if (!global_options_set.x_flag_ira_share_save_slots)
1874 flag_ira_share_save_slots = 0;
1876 /* Only enable REE by default in 64-bit mode where it helps to eliminate
1877 redundant 32-to-64-bit extensions. */
1878 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
1879 flag_ree = 0;
1882 /* Miscellaneous utilities. */
1884 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1885 or branch on register contents instructions. */
1888 v9_regcmp_p (enum rtx_code code)
1890 return (code == EQ || code == NE || code == GE || code == LT
1891 || code == LE || code == GT);
1894 /* Nonzero if OP is a floating point constant which can
1895 be loaded into an integer register using a single
1896 sethi instruction. */
1899 fp_sethi_p (rtx op)
1901 if (GET_CODE (op) == CONST_DOUBLE)
1903 long i;
1905 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1906 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1909 return 0;
1912 /* Nonzero if OP is a floating point constant which can
1913 be loaded into an integer register using a single
1914 mov instruction. */
1917 fp_mov_p (rtx op)
1919 if (GET_CODE (op) == CONST_DOUBLE)
1921 long i;
1923 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1924 return SPARC_SIMM13_P (i);
1927 return 0;
1930 /* Nonzero if OP is a floating point constant which can
1931 be loaded into an integer register using a high/losum
1932 instruction sequence. */
1935 fp_high_losum_p (rtx op)
1937 /* The constraints calling this should only be in
1938 SFmode move insns, so any constant which cannot
1939 be moved using a single insn will do. */
1940 if (GET_CODE (op) == CONST_DOUBLE)
1942 long i;
1944 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1945 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1948 return 0;
1951 /* Return true if the address of LABEL can be loaded by means of the
1952 mov{si,di}_pic_label_ref patterns in PIC mode. */
1954 static bool
1955 can_use_mov_pic_label_ref (rtx label)
1957 /* VxWorks does not impose a fixed gap between segments; the run-time
1958 gap can be different from the object-file gap. We therefore can't
1959 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1960 are absolutely sure that X is in the same segment as the GOT.
1961 Unfortunately, the flexibility of linker scripts means that we
1962 can't be sure of that in general, so assume that GOT-relative
1963 accesses are never valid on VxWorks. */
1964 if (TARGET_VXWORKS_RTP)
1965 return false;
1967 /* Similarly, if the label is non-local, it might end up being placed
1968 in a different section than the current one; now mov_pic_label_ref
1969 requires the label and the code to be in the same section. */
1970 if (LABEL_REF_NONLOCAL_P (label))
1971 return false;
1973 /* Finally, if we are reordering basic blocks and partition into hot
1974 and cold sections, this might happen for any label. */
1975 if (flag_reorder_blocks_and_partition)
1976 return false;
1978 return true;
1981 /* Expand a move instruction. Return true if all work is done. */
1983 bool
1984 sparc_expand_move (machine_mode mode, rtx *operands)
1986 /* Handle sets of MEM first. */
1987 if (GET_CODE (operands[0]) == MEM)
1989 /* 0 is a register (or a pair of registers) on SPARC. */
1990 if (register_or_zero_operand (operands[1], mode))
1991 return false;
1993 if (!reload_in_progress)
1995 operands[0] = validize_mem (operands[0]);
1996 operands[1] = force_reg (mode, operands[1]);
2000 /* Fixup TLS cases. */
2001 if (TARGET_HAVE_TLS
2002 && CONSTANT_P (operands[1])
2003 && sparc_tls_referenced_p (operands [1]))
2005 operands[1] = sparc_legitimize_tls_address (operands[1]);
2006 return false;
2009 /* Fixup PIC cases. */
2010 if (flag_pic && CONSTANT_P (operands[1]))
2012 if (pic_address_needs_scratch (operands[1]))
2013 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2015 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2016 if (GET_CODE (operands[1]) == LABEL_REF
2017 && can_use_mov_pic_label_ref (operands[1]))
2019 if (mode == SImode)
2021 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2022 return true;
2025 if (mode == DImode)
2027 gcc_assert (TARGET_ARCH64);
2028 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2029 return true;
2033 if (symbolic_operand (operands[1], mode))
2035 operands[1]
2036 = sparc_legitimize_pic_address (operands[1],
2037 reload_in_progress
2038 ? operands[0] : NULL_RTX);
2039 return false;
2043 /* If we are trying to toss an integer constant into FP registers,
2044 or loading a FP or vector constant, force it into memory. */
2045 if (CONSTANT_P (operands[1])
2046 && REG_P (operands[0])
2047 && (SPARC_FP_REG_P (REGNO (operands[0]))
2048 || SCALAR_FLOAT_MODE_P (mode)
2049 || VECTOR_MODE_P (mode)))
2051 /* emit_group_store will send such bogosity to us when it is
2052 not storing directly into memory. So fix this up to avoid
2053 crashes in output_constant_pool. */
2054 if (operands [1] == const0_rtx)
2055 operands[1] = CONST0_RTX (mode);
2057 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2058 always other regs. */
2059 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2060 && (const_zero_operand (operands[1], mode)
2061 || const_all_ones_operand (operands[1], mode)))
2062 return false;
2064 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2065 /* We are able to build any SF constant in integer registers
2066 with at most 2 instructions. */
2067 && (mode == SFmode
2068 /* And any DF constant in integer registers if needed. */
2069 || (mode == DFmode && !can_create_pseudo_p ())))
2070 return false;
2072 operands[1] = force_const_mem (mode, operands[1]);
2073 if (!reload_in_progress)
2074 operands[1] = validize_mem (operands[1]);
2075 return false;
2078 /* Accept non-constants and valid constants unmodified. */
2079 if (!CONSTANT_P (operands[1])
2080 || GET_CODE (operands[1]) == HIGH
2081 || input_operand (operands[1], mode))
2082 return false;
2084 switch (mode)
2086 case E_QImode:
2087 /* All QImode constants require only one insn, so proceed. */
2088 break;
2090 case E_HImode:
2091 case E_SImode:
2092 sparc_emit_set_const32 (operands[0], operands[1]);
2093 return true;
2095 case E_DImode:
2096 /* input_operand should have filtered out 32-bit mode. */
2097 sparc_emit_set_const64 (operands[0], operands[1]);
2098 return true;
2100 case E_TImode:
2102 rtx high, low;
2103 /* TImode isn't available in 32-bit mode. */
2104 split_double (operands[1], &high, &low);
2105 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2106 high));
2107 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2108 low));
2110 return true;
2112 default:
2113 gcc_unreachable ();
2116 return false;
2119 /* Load OP1, a 32-bit constant, into OP0, a register.
2120 We know it can't be done in one insn when we get
2121 here, the move expander guarantees this. */
2123 static void
2124 sparc_emit_set_const32 (rtx op0, rtx op1)
2126 machine_mode mode = GET_MODE (op0);
2127 rtx temp = op0;
2129 if (can_create_pseudo_p ())
2130 temp = gen_reg_rtx (mode);
2132 if (GET_CODE (op1) == CONST_INT)
2134 gcc_assert (!small_int_operand (op1, mode)
2135 && !const_high_operand (op1, mode));
2137 /* Emit them as real moves instead of a HIGH/LO_SUM,
2138 this way CSE can see everything and reuse intermediate
2139 values if it wants. */
2140 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2141 & ~(HOST_WIDE_INT) 0x3ff)));
2143 emit_insn (gen_rtx_SET (op0,
2144 gen_rtx_IOR (mode, temp,
2145 GEN_INT (INTVAL (op1) & 0x3ff))));
2147 else
2149 /* A symbol, emit in the traditional way. */
2150 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2151 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2155 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2156 If TEMP is nonzero, we are forbidden to use any other scratch
2157 registers. Otherwise, we are allowed to generate them as needed.
2159 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2160 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2162 void
2163 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2165 rtx cst, temp1, temp2, temp3, temp4, temp5;
2166 rtx ti_temp = 0;
2168 /* Deal with too large offsets. */
2169 if (GET_CODE (op1) == CONST
2170 && GET_CODE (XEXP (op1, 0)) == PLUS
2171 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2172 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2174 gcc_assert (!temp);
2175 temp1 = gen_reg_rtx (DImode);
2176 temp2 = gen_reg_rtx (DImode);
2177 sparc_emit_set_const64 (temp2, cst);
2178 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2179 NULL_RTX);
2180 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2181 return;
2184 if (temp && GET_MODE (temp) == TImode)
2186 ti_temp = temp;
2187 temp = gen_rtx_REG (DImode, REGNO (temp));
2190 /* SPARC-V9 code-model support. */
2191 switch (sparc_cmodel)
2193 case CM_MEDLOW:
2194 /* The range spanned by all instructions in the object is less
2195 than 2^31 bytes (2GB) and the distance from any instruction
2196 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2197 than 2^31 bytes (2GB).
2199 The executable must be in the low 4TB of the virtual address
2200 space.
2202 sethi %hi(symbol), %temp1
2203 or %temp1, %lo(symbol), %reg */
2204 if (temp)
2205 temp1 = temp; /* op0 is allowed. */
2206 else
2207 temp1 = gen_reg_rtx (DImode);
2209 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2210 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2211 break;
2213 case CM_MEDMID:
2214 /* The range spanned by all instructions in the object is less
2215 than 2^31 bytes (2GB) and the distance from any instruction
2216 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2217 than 2^31 bytes (2GB).
2219 The executable must be in the low 16TB of the virtual address
2220 space.
2222 sethi %h44(symbol), %temp1
2223 or %temp1, %m44(symbol), %temp2
2224 sllx %temp2, 12, %temp3
2225 or %temp3, %l44(symbol), %reg */
2226 if (temp)
2228 temp1 = op0;
2229 temp2 = op0;
2230 temp3 = temp; /* op0 is allowed. */
2232 else
2234 temp1 = gen_reg_rtx (DImode);
2235 temp2 = gen_reg_rtx (DImode);
2236 temp3 = gen_reg_rtx (DImode);
2239 emit_insn (gen_seth44 (temp1, op1));
2240 emit_insn (gen_setm44 (temp2, temp1, op1));
2241 emit_insn (gen_rtx_SET (temp3,
2242 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2243 emit_insn (gen_setl44 (op0, temp3, op1));
2244 break;
2246 case CM_MEDANY:
2247 /* The range spanned by all instructions in the object is less
2248 than 2^31 bytes (2GB) and the distance from any instruction
2249 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2250 than 2^31 bytes (2GB).
2252 The executable can be placed anywhere in the virtual address
2253 space.
2255 sethi %hh(symbol), %temp1
2256 sethi %lm(symbol), %temp2
2257 or %temp1, %hm(symbol), %temp3
2258 sllx %temp3, 32, %temp4
2259 or %temp4, %temp2, %temp5
2260 or %temp5, %lo(symbol), %reg */
2261 if (temp)
2263 /* It is possible that one of the registers we got for operands[2]
2264 might coincide with that of operands[0] (which is why we made
2265 it TImode). Pick the other one to use as our scratch. */
2266 if (rtx_equal_p (temp, op0))
2268 gcc_assert (ti_temp);
2269 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2271 temp1 = op0;
2272 temp2 = temp; /* op0 is _not_ allowed, see above. */
2273 temp3 = op0;
2274 temp4 = op0;
2275 temp5 = op0;
2277 else
2279 temp1 = gen_reg_rtx (DImode);
2280 temp2 = gen_reg_rtx (DImode);
2281 temp3 = gen_reg_rtx (DImode);
2282 temp4 = gen_reg_rtx (DImode);
2283 temp5 = gen_reg_rtx (DImode);
2286 emit_insn (gen_sethh (temp1, op1));
2287 emit_insn (gen_setlm (temp2, op1));
2288 emit_insn (gen_sethm (temp3, temp1, op1));
2289 emit_insn (gen_rtx_SET (temp4,
2290 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2291 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2292 emit_insn (gen_setlo (op0, temp5, op1));
2293 break;
2295 case CM_EMBMEDANY:
2296 /* Old old old backwards compatibility kruft here.
2297 Essentially it is MEDLOW with a fixed 64-bit
2298 virtual base added to all data segment addresses.
2299 Text-segment stuff is computed like MEDANY, we can't
2300 reuse the code above because the relocation knobs
2301 look different.
2303 Data segment: sethi %hi(symbol), %temp1
2304 add %temp1, EMBMEDANY_BASE_REG, %temp2
2305 or %temp2, %lo(symbol), %reg */
2306 if (data_segment_operand (op1, GET_MODE (op1)))
2308 if (temp)
2310 temp1 = temp; /* op0 is allowed. */
2311 temp2 = op0;
2313 else
2315 temp1 = gen_reg_rtx (DImode);
2316 temp2 = gen_reg_rtx (DImode);
2319 emit_insn (gen_embmedany_sethi (temp1, op1));
2320 emit_insn (gen_embmedany_brsum (temp2, temp1));
2321 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2324 /* Text segment: sethi %uhi(symbol), %temp1
2325 sethi %hi(symbol), %temp2
2326 or %temp1, %ulo(symbol), %temp3
2327 sllx %temp3, 32, %temp4
2328 or %temp4, %temp2, %temp5
2329 or %temp5, %lo(symbol), %reg */
2330 else
2332 if (temp)
2334 /* It is possible that one of the registers we got for operands[2]
2335 might coincide with that of operands[0] (which is why we made
2336 it TImode). Pick the other one to use as our scratch. */
2337 if (rtx_equal_p (temp, op0))
2339 gcc_assert (ti_temp);
2340 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2342 temp1 = op0;
2343 temp2 = temp; /* op0 is _not_ allowed, see above. */
2344 temp3 = op0;
2345 temp4 = op0;
2346 temp5 = op0;
2348 else
2350 temp1 = gen_reg_rtx (DImode);
2351 temp2 = gen_reg_rtx (DImode);
2352 temp3 = gen_reg_rtx (DImode);
2353 temp4 = gen_reg_rtx (DImode);
2354 temp5 = gen_reg_rtx (DImode);
2357 emit_insn (gen_embmedany_textuhi (temp1, op1));
2358 emit_insn (gen_embmedany_texthi (temp2, op1));
2359 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2360 emit_insn (gen_rtx_SET (temp4,
2361 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2362 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2363 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2365 break;
2367 default:
2368 gcc_unreachable ();
2372 /* These avoid problems when cross compiling. If we do not
2373 go through all this hair then the optimizer will see
2374 invalid REG_EQUAL notes or in some cases none at all. */
2375 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2376 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2377 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2378 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2380 /* The optimizer is not to assume anything about exactly
2381 which bits are set for a HIGH, they are unspecified.
2382 Unfortunately this leads to many missed optimizations
2383 during CSE. We mask out the non-HIGH bits, and matches
2384 a plain movdi, to alleviate this problem. */
2385 static rtx
2386 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2388 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2391 static rtx
2392 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2394 return gen_rtx_SET (dest, GEN_INT (val));
2397 static rtx
2398 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2400 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2403 static rtx
2404 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2406 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2409 /* Worker routines for 64-bit constant formation on arch64.
2410 One of the key things to be doing in these emissions is
2411 to create as many temp REGs as possible. This makes it
2412 possible for half-built constants to be used later when
2413 such values are similar to something required later on.
2414 Without doing this, the optimizer cannot see such
2415 opportunities. */
2417 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2418 unsigned HOST_WIDE_INT, int);
2420 static void
2421 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2422 unsigned HOST_WIDE_INT low_bits, int is_neg)
2424 unsigned HOST_WIDE_INT high_bits;
2426 if (is_neg)
2427 high_bits = (~low_bits) & 0xffffffff;
2428 else
2429 high_bits = low_bits;
2431 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2432 if (!is_neg)
2434 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2436 else
2438 /* If we are XOR'ing with -1, then we should emit a one's complement
2439 instead. This way the combiner will notice logical operations
2440 such as ANDN later on and substitute. */
2441 if ((low_bits & 0x3ff) == 0x3ff)
2443 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2445 else
2447 emit_insn (gen_rtx_SET (op0,
2448 gen_safe_XOR64 (temp,
2449 (-(HOST_WIDE_INT)0x400
2450 | (low_bits & 0x3ff)))));
2455 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2456 unsigned HOST_WIDE_INT, int);
2458 static void
2459 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2460 unsigned HOST_WIDE_INT high_bits,
2461 unsigned HOST_WIDE_INT low_immediate,
2462 int shift_count)
2464 rtx temp2 = op0;
2466 if ((high_bits & 0xfffffc00) != 0)
2468 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2469 if ((high_bits & ~0xfffffc00) != 0)
2470 emit_insn (gen_rtx_SET (op0,
2471 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2472 else
2473 temp2 = temp;
2475 else
2477 emit_insn (gen_safe_SET64 (temp, high_bits));
2478 temp2 = temp;
2481 /* Now shift it up into place. */
2482 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2483 GEN_INT (shift_count))));
2485 /* If there is a low immediate part piece, finish up by
2486 putting that in as well. */
2487 if (low_immediate != 0)
2488 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2491 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2492 unsigned HOST_WIDE_INT);
2494 /* Full 64-bit constant decomposition. Even though this is the
2495 'worst' case, we still optimize a few things away. */
2496 static void
2497 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2498 unsigned HOST_WIDE_INT high_bits,
2499 unsigned HOST_WIDE_INT low_bits)
2501 rtx sub_temp = op0;
2503 if (can_create_pseudo_p ())
2504 sub_temp = gen_reg_rtx (DImode);
2506 if ((high_bits & 0xfffffc00) != 0)
2508 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2509 if ((high_bits & ~0xfffffc00) != 0)
2510 emit_insn (gen_rtx_SET (sub_temp,
2511 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2512 else
2513 sub_temp = temp;
2515 else
2517 emit_insn (gen_safe_SET64 (temp, high_bits));
2518 sub_temp = temp;
2521 if (can_create_pseudo_p ())
2523 rtx temp2 = gen_reg_rtx (DImode);
2524 rtx temp3 = gen_reg_rtx (DImode);
2525 rtx temp4 = gen_reg_rtx (DImode);
2527 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2528 GEN_INT (32))));
2530 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2531 if ((low_bits & ~0xfffffc00) != 0)
2533 emit_insn (gen_rtx_SET (temp3,
2534 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2535 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2537 else
2539 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2542 else
2544 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2545 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2546 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2547 int to_shift = 12;
2549 /* We are in the middle of reload, so this is really
2550 painful. However we do still make an attempt to
2551 avoid emitting truly stupid code. */
2552 if (low1 != const0_rtx)
2554 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2555 GEN_INT (to_shift))));
2556 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2557 sub_temp = op0;
2558 to_shift = 12;
2560 else
2562 to_shift += 12;
2564 if (low2 != const0_rtx)
2566 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2567 GEN_INT (to_shift))));
2568 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2569 sub_temp = op0;
2570 to_shift = 8;
2572 else
2574 to_shift += 8;
2576 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2577 GEN_INT (to_shift))));
2578 if (low3 != const0_rtx)
2579 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2580 /* phew... */
2584 /* Analyze a 64-bit constant for certain properties. */
2585 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2586 unsigned HOST_WIDE_INT,
2587 int *, int *, int *);
2589 static void
2590 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2591 unsigned HOST_WIDE_INT low_bits,
2592 int *hbsp, int *lbsp, int *abbasp)
2594 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2595 int i;
2597 lowest_bit_set = highest_bit_set = -1;
2598 i = 0;
2601 if ((lowest_bit_set == -1)
2602 && ((low_bits >> i) & 1))
2603 lowest_bit_set = i;
2604 if ((highest_bit_set == -1)
2605 && ((high_bits >> (32 - i - 1)) & 1))
2606 highest_bit_set = (64 - i - 1);
2608 while (++i < 32
2609 && ((highest_bit_set == -1)
2610 || (lowest_bit_set == -1)));
2611 if (i == 32)
2613 i = 0;
2616 if ((lowest_bit_set == -1)
2617 && ((high_bits >> i) & 1))
2618 lowest_bit_set = i + 32;
2619 if ((highest_bit_set == -1)
2620 && ((low_bits >> (32 - i - 1)) & 1))
2621 highest_bit_set = 32 - i - 1;
2623 while (++i < 32
2624 && ((highest_bit_set == -1)
2625 || (lowest_bit_set == -1)));
2627 /* If there are no bits set this should have gone out
2628 as one instruction! */
2629 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2630 all_bits_between_are_set = 1;
2631 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2633 if (i < 32)
2635 if ((low_bits & (1 << i)) != 0)
2636 continue;
2638 else
2640 if ((high_bits & (1 << (i - 32))) != 0)
2641 continue;
2643 all_bits_between_are_set = 0;
2644 break;
2646 *hbsp = highest_bit_set;
2647 *lbsp = lowest_bit_set;
2648 *abbasp = all_bits_between_are_set;
2651 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2653 static int
2654 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2655 unsigned HOST_WIDE_INT low_bits)
2657 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2659 if (high_bits == 0
2660 || high_bits == 0xffffffff)
2661 return 1;
2663 analyze_64bit_constant (high_bits, low_bits,
2664 &highest_bit_set, &lowest_bit_set,
2665 &all_bits_between_are_set);
2667 if ((highest_bit_set == 63
2668 || lowest_bit_set == 0)
2669 && all_bits_between_are_set != 0)
2670 return 1;
2672 if ((highest_bit_set - lowest_bit_set) < 21)
2673 return 1;
2675 return 0;
2678 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2679 unsigned HOST_WIDE_INT,
2680 int, int);
2682 static unsigned HOST_WIDE_INT
2683 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2684 unsigned HOST_WIDE_INT low_bits,
2685 int lowest_bit_set, int shift)
2687 HOST_WIDE_INT hi, lo;
2689 if (lowest_bit_set < 32)
2691 lo = (low_bits >> lowest_bit_set) << shift;
2692 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2694 else
2696 lo = 0;
2697 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2699 gcc_assert (! (hi & lo));
2700 return (hi | lo);
2703 /* Here we are sure to be arch64 and this is an integer constant
2704 being loaded into a register. Emit the most efficient
2705 insn sequence possible. Detection of all the 1-insn cases
2706 has been done already. */
2707 static void
2708 sparc_emit_set_const64 (rtx op0, rtx op1)
2710 unsigned HOST_WIDE_INT high_bits, low_bits;
2711 int lowest_bit_set, highest_bit_set;
2712 int all_bits_between_are_set;
2713 rtx temp = 0;
2715 /* Sanity check that we know what we are working with. */
2716 gcc_assert (TARGET_ARCH64
2717 && (GET_CODE (op0) == SUBREG
2718 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2720 if (! can_create_pseudo_p ())
2721 temp = op0;
2723 if (GET_CODE (op1) != CONST_INT)
2725 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2726 return;
2729 if (! temp)
2730 temp = gen_reg_rtx (DImode);
2732 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2733 low_bits = (INTVAL (op1) & 0xffffffff);
2735 /* low_bits bits 0 --> 31
2736 high_bits bits 32 --> 63 */
2738 analyze_64bit_constant (high_bits, low_bits,
2739 &highest_bit_set, &lowest_bit_set,
2740 &all_bits_between_are_set);
2742 /* First try for a 2-insn sequence. */
2744 /* These situations are preferred because the optimizer can
2745 * do more things with them:
2746 * 1) mov -1, %reg
2747 * sllx %reg, shift, %reg
2748 * 2) mov -1, %reg
2749 * srlx %reg, shift, %reg
2750 * 3) mov some_small_const, %reg
2751 * sllx %reg, shift, %reg
2753 if (((highest_bit_set == 63
2754 || lowest_bit_set == 0)
2755 && all_bits_between_are_set != 0)
2756 || ((highest_bit_set - lowest_bit_set) < 12))
2758 HOST_WIDE_INT the_const = -1;
2759 int shift = lowest_bit_set;
2761 if ((highest_bit_set != 63
2762 && lowest_bit_set != 0)
2763 || all_bits_between_are_set == 0)
2765 the_const =
2766 create_simple_focus_bits (high_bits, low_bits,
2767 lowest_bit_set, 0);
2769 else if (lowest_bit_set == 0)
2770 shift = -(63 - highest_bit_set);
2772 gcc_assert (SPARC_SIMM13_P (the_const));
2773 gcc_assert (shift != 0);
2775 emit_insn (gen_safe_SET64 (temp, the_const));
2776 if (shift > 0)
2777 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
2778 GEN_INT (shift))));
2779 else if (shift < 0)
2780 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
2781 GEN_INT (-shift))));
2782 return;
2785 /* Now a range of 22 or less bits set somewhere.
2786 * 1) sethi %hi(focus_bits), %reg
2787 * sllx %reg, shift, %reg
2788 * 2) sethi %hi(focus_bits), %reg
2789 * srlx %reg, shift, %reg
2791 if ((highest_bit_set - lowest_bit_set) < 21)
2793 unsigned HOST_WIDE_INT focus_bits =
2794 create_simple_focus_bits (high_bits, low_bits,
2795 lowest_bit_set, 10);
2797 gcc_assert (SPARC_SETHI_P (focus_bits));
2798 gcc_assert (lowest_bit_set != 10);
2800 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2802 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2803 if (lowest_bit_set < 10)
2804 emit_insn (gen_rtx_SET (op0,
2805 gen_rtx_LSHIFTRT (DImode, temp,
2806 GEN_INT (10 - lowest_bit_set))));
2807 else if (lowest_bit_set > 10)
2808 emit_insn (gen_rtx_SET (op0,
2809 gen_rtx_ASHIFT (DImode, temp,
2810 GEN_INT (lowest_bit_set - 10))));
2811 return;
2814 /* 1) sethi %hi(low_bits), %reg
2815 * or %reg, %lo(low_bits), %reg
2816 * 2) sethi %hi(~low_bits), %reg
2817 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2819 if (high_bits == 0
2820 || high_bits == 0xffffffff)
2822 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2823 (high_bits == 0xffffffff));
2824 return;
2827 /* Now, try 3-insn sequences. */
2829 /* 1) sethi %hi(high_bits), %reg
2830 * or %reg, %lo(high_bits), %reg
2831 * sllx %reg, 32, %reg
2833 if (low_bits == 0)
2835 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2836 return;
2839 /* We may be able to do something quick
2840 when the constant is negated, so try that. */
2841 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2842 (~low_bits) & 0xfffffc00))
2844 /* NOTE: The trailing bits get XOR'd so we need the
2845 non-negated bits, not the negated ones. */
2846 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2848 if ((((~high_bits) & 0xffffffff) == 0
2849 && ((~low_bits) & 0x80000000) == 0)
2850 || (((~high_bits) & 0xffffffff) == 0xffffffff
2851 && ((~low_bits) & 0x80000000) != 0))
2853 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2855 if ((SPARC_SETHI_P (fast_int)
2856 && (~high_bits & 0xffffffff) == 0)
2857 || SPARC_SIMM13_P (fast_int))
2858 emit_insn (gen_safe_SET64 (temp, fast_int));
2859 else
2860 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2862 else
2864 rtx negated_const;
2865 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2866 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2867 sparc_emit_set_const64 (temp, negated_const);
2870 /* If we are XOR'ing with -1, then we should emit a one's complement
2871 instead. This way the combiner will notice logical operations
2872 such as ANDN later on and substitute. */
2873 if (trailing_bits == 0x3ff)
2875 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2877 else
2879 emit_insn (gen_rtx_SET (op0,
2880 gen_safe_XOR64 (temp,
2881 (-0x400 | trailing_bits))));
2883 return;
2886 /* 1) sethi %hi(xxx), %reg
2887 * or %reg, %lo(xxx), %reg
2888 * sllx %reg, yyy, %reg
2890 * ??? This is just a generalized version of the low_bits==0
2891 * thing above, FIXME...
2893 if ((highest_bit_set - lowest_bit_set) < 32)
2895 unsigned HOST_WIDE_INT focus_bits =
2896 create_simple_focus_bits (high_bits, low_bits,
2897 lowest_bit_set, 0);
2899 /* We can't get here in this state. */
2900 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2902 /* So what we know is that the set bits straddle the
2903 middle of the 64-bit word. */
2904 sparc_emit_set_const64_quick2 (op0, temp,
2905 focus_bits, 0,
2906 lowest_bit_set);
2907 return;
2910 /* 1) sethi %hi(high_bits), %reg
2911 * or %reg, %lo(high_bits), %reg
2912 * sllx %reg, 32, %reg
2913 * or %reg, low_bits, %reg
2915 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
2917 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2918 return;
2921 /* The easiest way when all else fails, is full decomposition. */
2922 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2925 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
2927 static bool
2928 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
2930 *p1 = SPARC_ICC_REG;
2931 *p2 = SPARC_FCC_REG;
2932 return true;
2935 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
2937 static unsigned int
2938 sparc_min_arithmetic_precision (void)
2940 return 32;
2943 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2944 return the mode to be used for the comparison. For floating-point,
2945 CCFP[E]mode is used. CCNZmode should be used when the first operand
2946 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2947 processing is needed. */
2949 machine_mode
2950 select_cc_mode (enum rtx_code op, rtx x, rtx y)
2952 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2954 switch (op)
2956 case EQ:
2957 case NE:
2958 case UNORDERED:
2959 case ORDERED:
2960 case UNLT:
2961 case UNLE:
2962 case UNGT:
2963 case UNGE:
2964 case UNEQ:
2965 case LTGT:
2966 return CCFPmode;
2968 case LT:
2969 case LE:
2970 case GT:
2971 case GE:
2972 return CCFPEmode;
2974 default:
2975 gcc_unreachable ();
2978 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2979 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2980 && y == const0_rtx)
2982 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2983 return CCXNZmode;
2984 else
2985 return CCNZmode;
2987 else
2989 /* This is for the cmp<mode>_sne pattern. */
2990 if (GET_CODE (x) == NOT && y == constm1_rtx)
2992 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2993 return CCXCmode;
2994 else
2995 return CCCmode;
2998 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
2999 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3001 if (GET_CODE (y) == UNSPEC
3002 && (XINT (y, 1) == UNSPEC_ADDV
3003 || XINT (y, 1) == UNSPEC_SUBV
3004 || XINT (y, 1) == UNSPEC_NEGV))
3005 return CCVmode;
3006 else
3007 return CCCmode;
3010 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3011 return CCXmode;
3012 else
3013 return CCmode;
3017 /* Emit the compare insn and return the CC reg for a CODE comparison
3018 with operands X and Y. */
3020 static rtx
3021 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3023 machine_mode mode;
3024 rtx cc_reg;
3026 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3027 return x;
3029 mode = SELECT_CC_MODE (code, x, y);
3031 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3032 fcc regs (cse can't tell they're really call clobbered regs and will
3033 remove a duplicate comparison even if there is an intervening function
3034 call - it will then try to reload the cc reg via an int reg which is why
3035 we need the movcc patterns). It is possible to provide the movcc
3036 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3037 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3038 to tell cse that CCFPE mode registers (even pseudos) are call
3039 clobbered. */
3041 /* ??? This is an experiment. Rather than making changes to cse which may
3042 or may not be easy/clean, we do our own cse. This is possible because
3043 we will generate hard registers. Cse knows they're call clobbered (it
3044 doesn't know the same thing about pseudos). If we guess wrong, no big
3045 deal, but if we win, great! */
3047 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3048 #if 1 /* experiment */
3050 int reg;
3051 /* We cycle through the registers to ensure they're all exercised. */
3052 static int next_fcc_reg = 0;
3053 /* Previous x,y for each fcc reg. */
3054 static rtx prev_args[4][2];
3056 /* Scan prev_args for x,y. */
3057 for (reg = 0; reg < 4; reg++)
3058 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3059 break;
3060 if (reg == 4)
3062 reg = next_fcc_reg;
3063 prev_args[reg][0] = x;
3064 prev_args[reg][1] = y;
3065 next_fcc_reg = (next_fcc_reg + 1) & 3;
3067 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3069 #else
3070 cc_reg = gen_reg_rtx (mode);
3071 #endif /* ! experiment */
3072 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3073 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3074 else
3075 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3077 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3078 will only result in an unrecognizable insn so no point in asserting. */
3079 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3081 return cc_reg;
3085 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3088 gen_compare_reg (rtx cmp)
3090 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3093 /* This function is used for v9 only.
3094 DEST is the target of the Scc insn.
3095 CODE is the code for an Scc's comparison.
3096 X and Y are the values we compare.
3098 This function is needed to turn
3100 (set (reg:SI 110)
3101 (gt (reg:CCX 100 %icc)
3102 (const_int 0)))
3103 into
3104 (set (reg:SI 110)
3105 (gt:DI (reg:CCX 100 %icc)
3106 (const_int 0)))
3108 IE: The instruction recognizer needs to see the mode of the comparison to
3109 find the right instruction. We could use "gt:DI" right in the
3110 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3112 static int
3113 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3115 if (! TARGET_ARCH64
3116 && (GET_MODE (x) == DImode
3117 || GET_MODE (dest) == DImode))
3118 return 0;
3120 /* Try to use the movrCC insns. */
3121 if (TARGET_ARCH64
3122 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3123 && y == const0_rtx
3124 && v9_regcmp_p (compare_code))
3126 rtx op0 = x;
3127 rtx temp;
3129 /* Special case for op0 != 0. This can be done with one instruction if
3130 dest == x. */
3132 if (compare_code == NE
3133 && GET_MODE (dest) == DImode
3134 && rtx_equal_p (op0, dest))
3136 emit_insn (gen_rtx_SET (dest,
3137 gen_rtx_IF_THEN_ELSE (DImode,
3138 gen_rtx_fmt_ee (compare_code, DImode,
3139 op0, const0_rtx),
3140 const1_rtx,
3141 dest)));
3142 return 1;
3145 if (reg_overlap_mentioned_p (dest, op0))
3147 /* Handle the case where dest == x.
3148 We "early clobber" the result. */
3149 op0 = gen_reg_rtx (GET_MODE (x));
3150 emit_move_insn (op0, x);
3153 emit_insn (gen_rtx_SET (dest, const0_rtx));
3154 if (GET_MODE (op0) != DImode)
3156 temp = gen_reg_rtx (DImode);
3157 convert_move (temp, op0, 0);
3159 else
3160 temp = op0;
3161 emit_insn (gen_rtx_SET (dest,
3162 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3163 gen_rtx_fmt_ee (compare_code, DImode,
3164 temp, const0_rtx),
3165 const1_rtx,
3166 dest)));
3167 return 1;
3169 else
3171 x = gen_compare_reg_1 (compare_code, x, y);
3172 y = const0_rtx;
3174 emit_insn (gen_rtx_SET (dest, const0_rtx));
3175 emit_insn (gen_rtx_SET (dest,
3176 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3177 gen_rtx_fmt_ee (compare_code,
3178 GET_MODE (x), x, y),
3179 const1_rtx, dest)));
3180 return 1;
3185 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3186 without jumps using the addx/subx instructions. */
3188 bool
3189 emit_scc_insn (rtx operands[])
3191 rtx tem, x, y;
3192 enum rtx_code code;
3193 machine_mode mode;
3195 /* The quad-word fp compare library routines all return nonzero to indicate
3196 true, which is different from the equivalent libgcc routines, so we must
3197 handle them specially here. */
3198 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3200 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3201 GET_CODE (operands[1]));
3202 operands[2] = XEXP (operands[1], 0);
3203 operands[3] = XEXP (operands[1], 1);
3206 code = GET_CODE (operands[1]);
3207 x = operands[2];
3208 y = operands[3];
3209 mode = GET_MODE (x);
3211 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3212 more applications). The exception to this is "reg != 0" which can
3213 be done in one instruction on v9 (so we do it). */
3214 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3216 if (y != const0_rtx)
3217 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3219 rtx pat = gen_rtx_SET (operands[0],
3220 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3221 x, const0_rtx));
3223 /* If we can use addx/subx or addxc, add a clobber for CC. */
3224 if (mode == SImode || (code == NE && TARGET_VIS3))
3226 rtx clobber
3227 = gen_rtx_CLOBBER (VOIDmode,
3228 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3229 SPARC_ICC_REG));
3230 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3233 emit_insn (pat);
3234 return true;
3237 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3238 if (TARGET_ARCH64
3239 && mode == DImode
3240 && !((code == LTU || code == GTU) && TARGET_VIS3)
3241 && gen_v9_scc (operands[0], code, x, y))
3242 return true;
3244 /* We can do LTU and GEU using the addx/subx instructions too. And
3245 for GTU/LEU, if both operands are registers swap them and fall
3246 back to the easy case. */
3247 if (code == GTU || code == LEU)
3249 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3250 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3252 tem = x;
3253 x = y;
3254 y = tem;
3255 code = swap_condition (code);
3259 if (code == LTU || code == GEU)
3261 emit_insn (gen_rtx_SET (operands[0],
3262 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3263 gen_compare_reg_1 (code, x, y),
3264 const0_rtx)));
3265 return true;
3268 /* All the posibilities to use addx/subx based sequences has been
3269 exhausted, try for a 3 instruction sequence using v9 conditional
3270 moves. */
3271 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3272 return true;
3274 /* Nope, do branches. */
3275 return false;
3278 /* Emit a conditional jump insn for the v9 architecture using comparison code
3279 CODE and jump target LABEL.
3280 This function exists to take advantage of the v9 brxx insns. */
3282 static void
3283 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3285 emit_jump_insn (gen_rtx_SET (pc_rtx,
3286 gen_rtx_IF_THEN_ELSE (VOIDmode,
3287 gen_rtx_fmt_ee (code, GET_MODE (op0),
3288 op0, const0_rtx),
3289 gen_rtx_LABEL_REF (VOIDmode, label),
3290 pc_rtx)));
3293 /* Emit a conditional jump insn for the UA2011 architecture using
3294 comparison code CODE and jump target LABEL. This function exists
3295 to take advantage of the UA2011 Compare and Branch insns. */
3297 static void
3298 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3300 rtx if_then_else;
3302 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3303 gen_rtx_fmt_ee(code, GET_MODE(op0),
3304 op0, op1),
3305 gen_rtx_LABEL_REF (VOIDmode, label),
3306 pc_rtx);
3308 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3311 void
3312 emit_conditional_branch_insn (rtx operands[])
3314 /* The quad-word fp compare library routines all return nonzero to indicate
3315 true, which is different from the equivalent libgcc routines, so we must
3316 handle them specially here. */
3317 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3319 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3320 GET_CODE (operands[0]));
3321 operands[1] = XEXP (operands[0], 0);
3322 operands[2] = XEXP (operands[0], 1);
3325 /* If we can tell early on that the comparison is against a constant
3326 that won't fit in the 5-bit signed immediate field of a cbcond,
3327 use one of the other v9 conditional branch sequences. */
3328 if (TARGET_CBCOND
3329 && GET_CODE (operands[1]) == REG
3330 && (GET_MODE (operands[1]) == SImode
3331 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3332 && (GET_CODE (operands[2]) != CONST_INT
3333 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3335 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3336 return;
3339 if (TARGET_ARCH64 && operands[2] == const0_rtx
3340 && GET_CODE (operands[1]) == REG
3341 && GET_MODE (operands[1]) == DImode)
3343 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3344 return;
3347 operands[1] = gen_compare_reg (operands[0]);
3348 operands[2] = const0_rtx;
3349 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3350 operands[1], operands[2]);
3351 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3352 operands[3]));
3356 /* Generate a DFmode part of a hard TFmode register.
3357 REG is the TFmode hard register, LOW is 1 for the
3358 low 64bit of the register and 0 otherwise.
3361 gen_df_reg (rtx reg, int low)
3363 int regno = REGNO (reg);
3365 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3366 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3367 return gen_rtx_REG (DFmode, regno);
3370 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3371 Unlike normal calls, TFmode operands are passed by reference. It is
3372 assumed that no more than 3 operands are required. */
3374 static void
3375 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3377 rtx ret_slot = NULL, arg[3], func_sym;
3378 int i;
3380 /* We only expect to be called for conversions, unary, and binary ops. */
3381 gcc_assert (nargs == 2 || nargs == 3);
3383 for (i = 0; i < nargs; ++i)
3385 rtx this_arg = operands[i];
3386 rtx this_slot;
3388 /* TFmode arguments and return values are passed by reference. */
3389 if (GET_MODE (this_arg) == TFmode)
3391 int force_stack_temp;
3393 force_stack_temp = 0;
3394 if (TARGET_BUGGY_QP_LIB && i == 0)
3395 force_stack_temp = 1;
3397 if (GET_CODE (this_arg) == MEM
3398 && ! force_stack_temp)
3400 tree expr = MEM_EXPR (this_arg);
3401 if (expr)
3402 mark_addressable (expr);
3403 this_arg = XEXP (this_arg, 0);
3405 else if (CONSTANT_P (this_arg)
3406 && ! force_stack_temp)
3408 this_slot = force_const_mem (TFmode, this_arg);
3409 this_arg = XEXP (this_slot, 0);
3411 else
3413 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3415 /* Operand 0 is the return value. We'll copy it out later. */
3416 if (i > 0)
3417 emit_move_insn (this_slot, this_arg);
3418 else
3419 ret_slot = this_slot;
3421 this_arg = XEXP (this_slot, 0);
3425 arg[i] = this_arg;
3428 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3430 if (GET_MODE (operands[0]) == TFmode)
3432 if (nargs == 2)
3433 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3434 arg[0], GET_MODE (arg[0]),
3435 arg[1], GET_MODE (arg[1]));
3436 else
3437 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3438 arg[0], GET_MODE (arg[0]),
3439 arg[1], GET_MODE (arg[1]),
3440 arg[2], GET_MODE (arg[2]));
3442 if (ret_slot)
3443 emit_move_insn (operands[0], ret_slot);
3445 else
3447 rtx ret;
3449 gcc_assert (nargs == 2);
3451 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3452 GET_MODE (operands[0]),
3453 arg[1], GET_MODE (arg[1]));
3455 if (ret != operands[0])
3456 emit_move_insn (operands[0], ret);
3460 /* Expand soft-float TFmode calls to sparc abi routines. */
3462 static void
3463 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3465 const char *func;
3467 switch (code)
3469 case PLUS:
3470 func = "_Qp_add";
3471 break;
3472 case MINUS:
3473 func = "_Qp_sub";
3474 break;
3475 case MULT:
3476 func = "_Qp_mul";
3477 break;
3478 case DIV:
3479 func = "_Qp_div";
3480 break;
3481 default:
3482 gcc_unreachable ();
3485 emit_soft_tfmode_libcall (func, 3, operands);
3488 static void
3489 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3491 const char *func;
3493 gcc_assert (code == SQRT);
3494 func = "_Qp_sqrt";
3496 emit_soft_tfmode_libcall (func, 2, operands);
3499 static void
3500 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3502 const char *func;
3504 switch (code)
3506 case FLOAT_EXTEND:
3507 switch (GET_MODE (operands[1]))
3509 case E_SFmode:
3510 func = "_Qp_stoq";
3511 break;
3512 case E_DFmode:
3513 func = "_Qp_dtoq";
3514 break;
3515 default:
3516 gcc_unreachable ();
3518 break;
3520 case FLOAT_TRUNCATE:
3521 switch (GET_MODE (operands[0]))
3523 case E_SFmode:
3524 func = "_Qp_qtos";
3525 break;
3526 case E_DFmode:
3527 func = "_Qp_qtod";
3528 break;
3529 default:
3530 gcc_unreachable ();
3532 break;
3534 case FLOAT:
3535 switch (GET_MODE (operands[1]))
3537 case E_SImode:
3538 func = "_Qp_itoq";
3539 if (TARGET_ARCH64)
3540 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3541 break;
3542 case E_DImode:
3543 func = "_Qp_xtoq";
3544 break;
3545 default:
3546 gcc_unreachable ();
3548 break;
3550 case UNSIGNED_FLOAT:
3551 switch (GET_MODE (operands[1]))
3553 case E_SImode:
3554 func = "_Qp_uitoq";
3555 if (TARGET_ARCH64)
3556 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3557 break;
3558 case E_DImode:
3559 func = "_Qp_uxtoq";
3560 break;
3561 default:
3562 gcc_unreachable ();
3564 break;
3566 case FIX:
3567 switch (GET_MODE (operands[0]))
3569 case E_SImode:
3570 func = "_Qp_qtoi";
3571 break;
3572 case E_DImode:
3573 func = "_Qp_qtox";
3574 break;
3575 default:
3576 gcc_unreachable ();
3578 break;
3580 case UNSIGNED_FIX:
3581 switch (GET_MODE (operands[0]))
3583 case E_SImode:
3584 func = "_Qp_qtoui";
3585 break;
3586 case E_DImode:
3587 func = "_Qp_qtoux";
3588 break;
3589 default:
3590 gcc_unreachable ();
3592 break;
3594 default:
3595 gcc_unreachable ();
3598 emit_soft_tfmode_libcall (func, 2, operands);
3601 /* Expand a hard-float tfmode operation. All arguments must be in
3602 registers. */
3604 static void
3605 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3607 rtx op, dest;
3609 if (GET_RTX_CLASS (code) == RTX_UNARY)
3611 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3612 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3614 else
3616 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3617 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3618 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3619 operands[1], operands[2]);
3622 if (register_operand (operands[0], VOIDmode))
3623 dest = operands[0];
3624 else
3625 dest = gen_reg_rtx (GET_MODE (operands[0]));
3627 emit_insn (gen_rtx_SET (dest, op));
3629 if (dest != operands[0])
3630 emit_move_insn (operands[0], dest);
3633 void
3634 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3636 if (TARGET_HARD_QUAD)
3637 emit_hard_tfmode_operation (code, operands);
3638 else
3639 emit_soft_tfmode_binop (code, operands);
3642 void
3643 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3645 if (TARGET_HARD_QUAD)
3646 emit_hard_tfmode_operation (code, operands);
3647 else
3648 emit_soft_tfmode_unop (code, operands);
3651 void
3652 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3654 if (TARGET_HARD_QUAD)
3655 emit_hard_tfmode_operation (code, operands);
3656 else
3657 emit_soft_tfmode_cvt (code, operands);
3660 /* Return nonzero if a branch/jump/call instruction will be emitting
3661 nop into its delay slot. */
3664 empty_delay_slot (rtx_insn *insn)
3666 rtx seq;
3668 /* If no previous instruction (should not happen), return true. */
3669 if (PREV_INSN (insn) == NULL)
3670 return 1;
3672 seq = NEXT_INSN (PREV_INSN (insn));
3673 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3674 return 0;
3676 return 1;
3679 /* Return nonzero if we should emit a nop after a cbcond instruction.
3680 The cbcond instruction does not have a delay slot, however there is
3681 a severe performance penalty if a control transfer appears right
3682 after a cbcond. Therefore we emit a nop when we detect this
3683 situation. */
3686 emit_cbcond_nop (rtx_insn *insn)
3688 rtx next = next_active_insn (insn);
3690 if (!next)
3691 return 1;
3693 if (NONJUMP_INSN_P (next)
3694 && GET_CODE (PATTERN (next)) == SEQUENCE)
3695 next = XVECEXP (PATTERN (next), 0, 0);
3696 else if (CALL_P (next)
3697 && GET_CODE (PATTERN (next)) == PARALLEL)
3699 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3701 if (GET_CODE (delay) == RETURN)
3703 /* It's a sibling call. Do not emit the nop if we're going
3704 to emit something other than the jump itself as the first
3705 instruction of the sibcall sequence. */
3706 if (sparc_leaf_function_p || TARGET_FLAT)
3707 return 0;
3711 if (NONJUMP_INSN_P (next))
3712 return 0;
3714 return 1;
3717 /* Return nonzero if TRIAL can go into the call delay slot. */
3720 eligible_for_call_delay (rtx_insn *trial)
3722 rtx pat;
3724 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3725 return 0;
3727 /* Binutils allows
3728 call __tls_get_addr, %tgd_call (foo)
3729 add %l7, %o0, %o0, %tgd_add (foo)
3730 while Sun as/ld does not. */
3731 if (TARGET_GNU_TLS || !TARGET_TLS)
3732 return 1;
3734 pat = PATTERN (trial);
3736 /* We must reject tgd_add{32|64}, i.e.
3737 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3738 and tldm_add{32|64}, i.e.
3739 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3740 for Sun as/ld. */
3741 if (GET_CODE (pat) == SET
3742 && GET_CODE (SET_SRC (pat)) == PLUS)
3744 rtx unspec = XEXP (SET_SRC (pat), 1);
3746 if (GET_CODE (unspec) == UNSPEC
3747 && (XINT (unspec, 1) == UNSPEC_TLSGD
3748 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3749 return 0;
3752 return 1;
3755 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3756 instruction. RETURN_P is true if the v9 variant 'return' is to be
3757 considered in the test too.
3759 TRIAL must be a SET whose destination is a REG appropriate for the
3760 'restore' instruction or, if RETURN_P is true, for the 'return'
3761 instruction. */
3763 static int
3764 eligible_for_restore_insn (rtx trial, bool return_p)
3766 rtx pat = PATTERN (trial);
3767 rtx src = SET_SRC (pat);
3768 bool src_is_freg = false;
3769 rtx src_reg;
3771 /* Since we now can do moves between float and integer registers when
3772 VIS3 is enabled, we have to catch this case. We can allow such
3773 moves when doing a 'return' however. */
3774 src_reg = src;
3775 if (GET_CODE (src_reg) == SUBREG)
3776 src_reg = SUBREG_REG (src_reg);
3777 if (GET_CODE (src_reg) == REG
3778 && SPARC_FP_REG_P (REGNO (src_reg)))
3779 src_is_freg = true;
3781 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3782 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3783 && arith_operand (src, GET_MODE (src))
3784 && ! src_is_freg)
3786 if (TARGET_ARCH64)
3787 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3788 else
3789 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3792 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3793 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3794 && arith_double_operand (src, GET_MODE (src))
3795 && ! src_is_freg)
3796 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3798 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3799 else if (! TARGET_FPU && register_operand (src, SFmode))
3800 return 1;
3802 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3803 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3804 return 1;
3806 /* If we have the 'return' instruction, anything that does not use
3807 local or output registers and can go into a delay slot wins. */
3808 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3809 return 1;
3811 /* The 'restore src1,src2,dest' pattern for SImode. */
3812 else if (GET_CODE (src) == PLUS
3813 && register_operand (XEXP (src, 0), SImode)
3814 && arith_operand (XEXP (src, 1), SImode))
3815 return 1;
3817 /* The 'restore src1,src2,dest' pattern for DImode. */
3818 else if (GET_CODE (src) == PLUS
3819 && register_operand (XEXP (src, 0), DImode)
3820 && arith_double_operand (XEXP (src, 1), DImode))
3821 return 1;
3823 /* The 'restore src1,%lo(src2),dest' pattern. */
3824 else if (GET_CODE (src) == LO_SUM
3825 && ! TARGET_CM_MEDMID
3826 && ((register_operand (XEXP (src, 0), SImode)
3827 && immediate_operand (XEXP (src, 1), SImode))
3828 || (TARGET_ARCH64
3829 && register_operand (XEXP (src, 0), DImode)
3830 && immediate_operand (XEXP (src, 1), DImode))))
3831 return 1;
3833 /* The 'restore src,src,dest' pattern. */
3834 else if (GET_CODE (src) == ASHIFT
3835 && (register_operand (XEXP (src, 0), SImode)
3836 || register_operand (XEXP (src, 0), DImode))
3837 && XEXP (src, 1) == const1_rtx)
3838 return 1;
3840 return 0;
3843 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3846 eligible_for_return_delay (rtx_insn *trial)
3848 int regno;
3849 rtx pat;
3851 /* If the function uses __builtin_eh_return, the eh_return machinery
3852 occupies the delay slot. */
3853 if (crtl->calls_eh_return)
3854 return 0;
3856 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3857 return 0;
3859 /* In the case of a leaf or flat function, anything can go into the slot. */
3860 if (sparc_leaf_function_p || TARGET_FLAT)
3861 return 1;
3863 if (!NONJUMP_INSN_P (trial))
3864 return 0;
3866 pat = PATTERN (trial);
3867 if (GET_CODE (pat) == PARALLEL)
3869 int i;
3871 if (! TARGET_V9)
3872 return 0;
3873 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3875 rtx expr = XVECEXP (pat, 0, i);
3876 if (GET_CODE (expr) != SET)
3877 return 0;
3878 if (GET_CODE (SET_DEST (expr)) != REG)
3879 return 0;
3880 regno = REGNO (SET_DEST (expr));
3881 if (regno >= 8 && regno < 24)
3882 return 0;
3884 return !epilogue_renumber (&pat, 1);
3887 if (GET_CODE (pat) != SET)
3888 return 0;
3890 if (GET_CODE (SET_DEST (pat)) != REG)
3891 return 0;
3893 regno = REGNO (SET_DEST (pat));
3895 /* Otherwise, only operations which can be done in tandem with
3896 a `restore' or `return' insn can go into the delay slot. */
3897 if (regno >= 8 && regno < 24)
3898 return 0;
3900 /* If this instruction sets up floating point register and we have a return
3901 instruction, it can probably go in. But restore will not work
3902 with FP_REGS. */
3903 if (! SPARC_INT_REG_P (regno))
3904 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3906 return eligible_for_restore_insn (trial, true);
3909 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3912 eligible_for_sibcall_delay (rtx_insn *trial)
3914 rtx pat;
3916 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3917 return 0;
3919 if (!NONJUMP_INSN_P (trial))
3920 return 0;
3922 pat = PATTERN (trial);
3924 if (sparc_leaf_function_p || TARGET_FLAT)
3926 /* If the tail call is done using the call instruction,
3927 we have to restore %o7 in the delay slot. */
3928 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3929 return 0;
3931 /* %g1 is used to build the function address */
3932 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3933 return 0;
3935 return 1;
3938 if (GET_CODE (pat) != SET)
3939 return 0;
3941 /* Otherwise, only operations which can be done in tandem with
3942 a `restore' insn can go into the delay slot. */
3943 if (GET_CODE (SET_DEST (pat)) != REG
3944 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3945 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3946 return 0;
3948 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3949 in most cases. */
3950 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3951 return 0;
3953 return eligible_for_restore_insn (trial, false);
3956 /* Determine if it's legal to put X into the constant pool. This
3957 is not possible if X contains the address of a symbol that is
3958 not constant (TLS) or not known at final link time (PIC). */
3960 static bool
3961 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
3963 switch (GET_CODE (x))
3965 case CONST_INT:
3966 case CONST_WIDE_INT:
3967 case CONST_DOUBLE:
3968 case CONST_VECTOR:
3969 /* Accept all non-symbolic constants. */
3970 return false;
3972 case LABEL_REF:
3973 /* Labels are OK iff we are non-PIC. */
3974 return flag_pic != 0;
3976 case SYMBOL_REF:
3977 /* 'Naked' TLS symbol references are never OK,
3978 non-TLS symbols are OK iff we are non-PIC. */
3979 if (SYMBOL_REF_TLS_MODEL (x))
3980 return true;
3981 else
3982 return flag_pic != 0;
3984 case CONST:
3985 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3986 case PLUS:
3987 case MINUS:
3988 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3989 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3990 case UNSPEC:
3991 return true;
3992 default:
3993 gcc_unreachable ();
3997 /* Global Offset Table support. */
3998 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3999 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
4001 /* Return the SYMBOL_REF for the Global Offset Table. */
4003 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
4005 static rtx
4006 sparc_got (void)
4008 if (!sparc_got_symbol)
4009 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4011 return sparc_got_symbol;
4014 /* Ensure that we are not using patterns that are not OK with PIC. */
4017 check_pic (int i)
4019 rtx op;
4021 switch (flag_pic)
4023 case 1:
4024 op = recog_data.operand[i];
4025 gcc_assert (GET_CODE (op) != SYMBOL_REF
4026 && (GET_CODE (op) != CONST
4027 || (GET_CODE (XEXP (op, 0)) == MINUS
4028 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4029 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4030 /* fallthrough */
4031 case 2:
4032 default:
4033 return 1;
4037 /* Return true if X is an address which needs a temporary register when
4038 reloaded while generating PIC code. */
4041 pic_address_needs_scratch (rtx x)
4043 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4044 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
4045 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4046 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4047 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
4048 return 1;
4050 return 0;
4053 /* Determine if a given RTX is a valid constant. We already know this
4054 satisfies CONSTANT_P. */
4056 static bool
4057 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4059 switch (GET_CODE (x))
4061 case CONST:
4062 case SYMBOL_REF:
4063 if (sparc_tls_referenced_p (x))
4064 return false;
4065 break;
4067 case CONST_DOUBLE:
4068 /* Floating point constants are generally not ok.
4069 The only exception is 0.0 and all-ones in VIS. */
4070 if (TARGET_VIS
4071 && SCALAR_FLOAT_MODE_P (mode)
4072 && (const_zero_operand (x, mode)
4073 || const_all_ones_operand (x, mode)))
4074 return true;
4076 return false;
4078 case CONST_VECTOR:
4079 /* Vector constants are generally not ok.
4080 The only exception is 0 or -1 in VIS. */
4081 if (TARGET_VIS
4082 && (const_zero_operand (x, mode)
4083 || const_all_ones_operand (x, mode)))
4084 return true;
4086 return false;
4088 default:
4089 break;
4092 return true;
4095 /* Determine if a given RTX is a valid constant address. */
4097 bool
4098 constant_address_p (rtx x)
4100 switch (GET_CODE (x))
4102 case LABEL_REF:
4103 case CONST_INT:
4104 case HIGH:
4105 return true;
4107 case CONST:
4108 if (flag_pic && pic_address_needs_scratch (x))
4109 return false;
4110 return sparc_legitimate_constant_p (Pmode, x);
4112 case SYMBOL_REF:
4113 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4115 default:
4116 return false;
4120 /* Nonzero if the constant value X is a legitimate general operand
4121 when generating PIC code. It is given that flag_pic is on and
4122 that X satisfies CONSTANT_P. */
4124 bool
4125 legitimate_pic_operand_p (rtx x)
4127 if (pic_address_needs_scratch (x))
4128 return false;
4129 if (sparc_tls_referenced_p (x))
4130 return false;
4131 return true;
4134 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4135 (CONST_INT_P (X) \
4136 && INTVAL (X) >= -0x1000 \
4137 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4139 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4140 (CONST_INT_P (X) \
4141 && INTVAL (X) >= -0x1000 \
4142 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4144 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4146 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4147 ordinarily. This changes a bit when generating PIC. */
4149 static bool
4150 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4152 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4154 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4155 rs1 = addr;
4156 else if (GET_CODE (addr) == PLUS)
4158 rs1 = XEXP (addr, 0);
4159 rs2 = XEXP (addr, 1);
4161 /* Canonicalize. REG comes first, if there are no regs,
4162 LO_SUM comes first. */
4163 if (!REG_P (rs1)
4164 && GET_CODE (rs1) != SUBREG
4165 && (REG_P (rs2)
4166 || GET_CODE (rs2) == SUBREG
4167 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4169 rs1 = XEXP (addr, 1);
4170 rs2 = XEXP (addr, 0);
4173 if ((flag_pic == 1
4174 && rs1 == pic_offset_table_rtx
4175 && !REG_P (rs2)
4176 && GET_CODE (rs2) != SUBREG
4177 && GET_CODE (rs2) != LO_SUM
4178 && GET_CODE (rs2) != MEM
4179 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4180 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4181 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4182 || ((REG_P (rs1)
4183 || GET_CODE (rs1) == SUBREG)
4184 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4186 imm1 = rs2;
4187 rs2 = NULL;
4189 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4190 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4192 /* We prohibit REG + REG for TFmode when there are no quad move insns
4193 and we consequently need to split. We do this because REG+REG
4194 is not an offsettable address. If we get the situation in reload
4195 where source and destination of a movtf pattern are both MEMs with
4196 REG+REG address, then only one of them gets converted to an
4197 offsettable address. */
4198 if (mode == TFmode
4199 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4200 return 0;
4202 /* Likewise for TImode, but in all cases. */
4203 if (mode == TImode)
4204 return 0;
4206 /* We prohibit REG + REG on ARCH32 if not optimizing for
4207 DFmode/DImode because then mem_min_alignment is likely to be zero
4208 after reload and the forced split would lack a matching splitter
4209 pattern. */
4210 if (TARGET_ARCH32 && !optimize
4211 && (mode == DFmode || mode == DImode))
4212 return 0;
4214 else if (USE_AS_OFFSETABLE_LO10
4215 && GET_CODE (rs1) == LO_SUM
4216 && TARGET_ARCH64
4217 && ! TARGET_CM_MEDMID
4218 && RTX_OK_FOR_OLO10_P (rs2, mode))
4220 rs2 = NULL;
4221 imm1 = XEXP (rs1, 1);
4222 rs1 = XEXP (rs1, 0);
4223 if (!CONSTANT_P (imm1)
4224 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4225 return 0;
4228 else if (GET_CODE (addr) == LO_SUM)
4230 rs1 = XEXP (addr, 0);
4231 imm1 = XEXP (addr, 1);
4233 if (!CONSTANT_P (imm1)
4234 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4235 return 0;
4237 /* We can't allow TFmode in 32-bit mode, because an offset greater
4238 than the alignment (8) may cause the LO_SUM to overflow. */
4239 if (mode == TFmode && TARGET_ARCH32)
4240 return 0;
4242 /* During reload, accept the HIGH+LO_SUM construct generated by
4243 sparc_legitimize_reload_address. */
4244 if (reload_in_progress
4245 && GET_CODE (rs1) == HIGH
4246 && XEXP (rs1, 0) == imm1)
4247 return 1;
4249 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4250 return 1;
4251 else
4252 return 0;
4254 if (GET_CODE (rs1) == SUBREG)
4255 rs1 = SUBREG_REG (rs1);
4256 if (!REG_P (rs1))
4257 return 0;
4259 if (rs2)
4261 if (GET_CODE (rs2) == SUBREG)
4262 rs2 = SUBREG_REG (rs2);
4263 if (!REG_P (rs2))
4264 return 0;
4267 if (strict)
4269 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4270 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4271 return 0;
4273 else
4275 if ((! SPARC_INT_REG_P (REGNO (rs1))
4276 && REGNO (rs1) != FRAME_POINTER_REGNUM
4277 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4278 || (rs2
4279 && (! SPARC_INT_REG_P (REGNO (rs2))
4280 && REGNO (rs2) != FRAME_POINTER_REGNUM
4281 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4282 return 0;
4284 return 1;
4287 /* Return the SYMBOL_REF for the tls_get_addr function. */
4289 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4291 static rtx
4292 sparc_tls_get_addr (void)
4294 if (!sparc_tls_symbol)
4295 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4297 return sparc_tls_symbol;
4300 /* Return the Global Offset Table to be used in TLS mode. */
4302 static rtx
4303 sparc_tls_got (void)
4305 /* In PIC mode, this is just the PIC offset table. */
4306 if (flag_pic)
4308 crtl->uses_pic_offset_table = 1;
4309 return pic_offset_table_rtx;
4312 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4313 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4314 if (TARGET_SUN_TLS && TARGET_ARCH32)
4316 load_got_register ();
4317 return global_offset_table_rtx;
4320 /* In all other cases, we load a new pseudo with the GOT symbol. */
4321 return copy_to_reg (sparc_got ());
4324 /* Return true if X contains a thread-local symbol. */
4326 static bool
4327 sparc_tls_referenced_p (rtx x)
4329 if (!TARGET_HAVE_TLS)
4330 return false;
4332 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4333 x = XEXP (XEXP (x, 0), 0);
4335 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4336 return true;
4338 /* That's all we handle in sparc_legitimize_tls_address for now. */
4339 return false;
4342 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4343 this (thread-local) address. */
4345 static rtx
4346 sparc_legitimize_tls_address (rtx addr)
4348 rtx temp1, temp2, temp3, ret, o0, got;
4349 rtx_insn *insn;
4351 gcc_assert (can_create_pseudo_p ());
4353 if (GET_CODE (addr) == SYMBOL_REF)
4354 switch (SYMBOL_REF_TLS_MODEL (addr))
4356 case TLS_MODEL_GLOBAL_DYNAMIC:
4357 start_sequence ();
4358 temp1 = gen_reg_rtx (SImode);
4359 temp2 = gen_reg_rtx (SImode);
4360 ret = gen_reg_rtx (Pmode);
4361 o0 = gen_rtx_REG (Pmode, 8);
4362 got = sparc_tls_got ();
4363 emit_insn (gen_tgd_hi22 (temp1, addr));
4364 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4365 if (TARGET_ARCH32)
4367 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4368 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4369 addr, const1_rtx));
4371 else
4373 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4374 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4375 addr, const1_rtx));
4377 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4378 insn = get_insns ();
4379 end_sequence ();
4380 emit_libcall_block (insn, ret, o0, addr);
4381 break;
4383 case TLS_MODEL_LOCAL_DYNAMIC:
4384 start_sequence ();
4385 temp1 = gen_reg_rtx (SImode);
4386 temp2 = gen_reg_rtx (SImode);
4387 temp3 = gen_reg_rtx (Pmode);
4388 ret = gen_reg_rtx (Pmode);
4389 o0 = gen_rtx_REG (Pmode, 8);
4390 got = sparc_tls_got ();
4391 emit_insn (gen_tldm_hi22 (temp1));
4392 emit_insn (gen_tldm_lo10 (temp2, temp1));
4393 if (TARGET_ARCH32)
4395 emit_insn (gen_tldm_add32 (o0, got, temp2));
4396 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4397 const1_rtx));
4399 else
4401 emit_insn (gen_tldm_add64 (o0, got, temp2));
4402 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4403 const1_rtx));
4405 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4406 insn = get_insns ();
4407 end_sequence ();
4408 emit_libcall_block (insn, temp3, o0,
4409 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4410 UNSPEC_TLSLD_BASE));
4411 temp1 = gen_reg_rtx (SImode);
4412 temp2 = gen_reg_rtx (SImode);
4413 emit_insn (gen_tldo_hix22 (temp1, addr));
4414 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4415 if (TARGET_ARCH32)
4416 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4417 else
4418 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4419 break;
4421 case TLS_MODEL_INITIAL_EXEC:
4422 temp1 = gen_reg_rtx (SImode);
4423 temp2 = gen_reg_rtx (SImode);
4424 temp3 = gen_reg_rtx (Pmode);
4425 got = sparc_tls_got ();
4426 emit_insn (gen_tie_hi22 (temp1, addr));
4427 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4428 if (TARGET_ARCH32)
4429 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4430 else
4431 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4432 if (TARGET_SUN_TLS)
4434 ret = gen_reg_rtx (Pmode);
4435 if (TARGET_ARCH32)
4436 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4437 temp3, addr));
4438 else
4439 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4440 temp3, addr));
4442 else
4443 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4444 break;
4446 case TLS_MODEL_LOCAL_EXEC:
4447 temp1 = gen_reg_rtx (Pmode);
4448 temp2 = gen_reg_rtx (Pmode);
4449 if (TARGET_ARCH32)
4451 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4452 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4454 else
4456 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4457 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4459 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4460 break;
4462 default:
4463 gcc_unreachable ();
4466 else if (GET_CODE (addr) == CONST)
4468 rtx base, offset;
4470 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4472 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4473 offset = XEXP (XEXP (addr, 0), 1);
4475 base = force_operand (base, NULL_RTX);
4476 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4477 offset = force_reg (Pmode, offset);
4478 ret = gen_rtx_PLUS (Pmode, base, offset);
4481 else
4482 gcc_unreachable (); /* for now ... */
4484 return ret;
4487 /* Legitimize PIC addresses. If the address is already position-independent,
4488 we return ORIG. Newly generated position-independent addresses go into a
4489 reg. This is REG if nonzero, otherwise we allocate register(s) as
4490 necessary. */
4492 static rtx
4493 sparc_legitimize_pic_address (rtx orig, rtx reg)
4495 bool gotdata_op = false;
4497 if (GET_CODE (orig) == SYMBOL_REF
4498 /* See the comment in sparc_expand_move. */
4499 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4501 rtx pic_ref, address;
4502 rtx_insn *insn;
4504 if (reg == 0)
4506 gcc_assert (can_create_pseudo_p ());
4507 reg = gen_reg_rtx (Pmode);
4510 if (flag_pic == 2)
4512 /* If not during reload, allocate another temp reg here for loading
4513 in the address, so that these instructions can be optimized
4514 properly. */
4515 rtx temp_reg = (! can_create_pseudo_p ()
4516 ? reg : gen_reg_rtx (Pmode));
4518 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4519 won't get confused into thinking that these two instructions
4520 are loading in the true address of the symbol. If in the
4521 future a PIC rtx exists, that should be used instead. */
4522 if (TARGET_ARCH64)
4524 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4525 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4527 else
4529 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4530 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4532 address = temp_reg;
4533 gotdata_op = true;
4535 else
4536 address = orig;
4538 crtl->uses_pic_offset_table = 1;
4539 if (gotdata_op)
4541 if (TARGET_ARCH64)
4542 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4543 pic_offset_table_rtx,
4544 address, orig));
4545 else
4546 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4547 pic_offset_table_rtx,
4548 address, orig));
4550 else
4552 pic_ref
4553 = gen_const_mem (Pmode,
4554 gen_rtx_PLUS (Pmode,
4555 pic_offset_table_rtx, address));
4556 insn = emit_move_insn (reg, pic_ref);
4559 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4560 by loop. */
4561 set_unique_reg_note (insn, REG_EQUAL, orig);
4562 return reg;
4564 else if (GET_CODE (orig) == CONST)
4566 rtx base, offset;
4568 if (GET_CODE (XEXP (orig, 0)) == PLUS
4569 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4570 return orig;
4572 if (reg == 0)
4574 gcc_assert (can_create_pseudo_p ());
4575 reg = gen_reg_rtx (Pmode);
4578 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4579 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4580 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4581 base == reg ? NULL_RTX : reg);
4583 if (GET_CODE (offset) == CONST_INT)
4585 if (SMALL_INT (offset))
4586 return plus_constant (Pmode, base, INTVAL (offset));
4587 else if (can_create_pseudo_p ())
4588 offset = force_reg (Pmode, offset);
4589 else
4590 /* If we reach here, then something is seriously wrong. */
4591 gcc_unreachable ();
4593 return gen_rtx_PLUS (Pmode, base, offset);
4595 else if (GET_CODE (orig) == LABEL_REF)
4596 /* ??? We ought to be checking that the register is live instead, in case
4597 it is eliminated. */
4598 crtl->uses_pic_offset_table = 1;
4600 return orig;
4603 /* Try machine-dependent ways of modifying an illegitimate address X
4604 to be legitimate. If we find one, return the new, valid address.
4606 OLDX is the address as it was before break_out_memory_refs was called.
4607 In some cases it is useful to look at this to decide what needs to be done.
4609 MODE is the mode of the operand pointed to by X.
4611 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4613 static rtx
4614 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4615 machine_mode mode)
4617 rtx orig_x = x;
4619 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4620 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4621 force_operand (XEXP (x, 0), NULL_RTX));
4622 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4623 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4624 force_operand (XEXP (x, 1), NULL_RTX));
4625 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4626 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4627 XEXP (x, 1));
4628 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4629 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4630 force_operand (XEXP (x, 1), NULL_RTX));
4632 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4633 return x;
4635 if (sparc_tls_referenced_p (x))
4636 x = sparc_legitimize_tls_address (x);
4637 else if (flag_pic)
4638 x = sparc_legitimize_pic_address (x, NULL_RTX);
4639 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4640 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4641 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4642 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4643 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4644 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4645 else if (GET_CODE (x) == SYMBOL_REF
4646 || GET_CODE (x) == CONST
4647 || GET_CODE (x) == LABEL_REF)
4648 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4650 return x;
4653 /* Delegitimize an address that was legitimized by the above function. */
4655 static rtx
4656 sparc_delegitimize_address (rtx x)
4658 x = delegitimize_mem_from_attrs (x);
4660 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4661 switch (XINT (XEXP (x, 1), 1))
4663 case UNSPEC_MOVE_PIC:
4664 case UNSPEC_TLSLE:
4665 x = XVECEXP (XEXP (x, 1), 0, 0);
4666 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4667 break;
4668 default:
4669 break;
4672 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4673 if (GET_CODE (x) == MINUS
4674 && REG_P (XEXP (x, 0))
4675 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4676 && GET_CODE (XEXP (x, 1)) == LO_SUM
4677 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4678 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4680 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4681 gcc_assert (GET_CODE (x) == LABEL_REF);
4684 return x;
4687 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4688 replace the input X, or the original X if no replacement is called for.
4689 The output parameter *WIN is 1 if the calling macro should goto WIN,
4690 0 if it should not.
4692 For SPARC, we wish to handle addresses by splitting them into
4693 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4694 This cuts the number of extra insns by one.
4696 Do nothing when generating PIC code and the address is a symbolic
4697 operand or requires a scratch register. */
4700 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4701 int opnum, int type,
4702 int ind_levels ATTRIBUTE_UNUSED, int *win)
4704 /* Decompose SImode constants into HIGH+LO_SUM. */
4705 if (CONSTANT_P (x)
4706 && (mode != TFmode || TARGET_ARCH64)
4707 && GET_MODE (x) == SImode
4708 && GET_CODE (x) != LO_SUM
4709 && GET_CODE (x) != HIGH
4710 && sparc_cmodel <= CM_MEDLOW
4711 && !(flag_pic
4712 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4714 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4715 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4716 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4717 opnum, (enum reload_type)type);
4718 *win = 1;
4719 return x;
4722 /* We have to recognize what we have already generated above. */
4723 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4725 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4726 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4727 opnum, (enum reload_type)type);
4728 *win = 1;
4729 return x;
4732 *win = 0;
4733 return x;
4736 /* Return true if ADDR (a legitimate address expression)
4737 has an effect that depends on the machine mode it is used for.
4739 In PIC mode,
4741 (mem:HI [%l7+a])
4743 is not equivalent to
4745 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4747 because [%l7+a+1] is interpreted as the address of (a+1). */
4750 static bool
4751 sparc_mode_dependent_address_p (const_rtx addr,
4752 addr_space_t as ATTRIBUTE_UNUSED)
4754 if (flag_pic && GET_CODE (addr) == PLUS)
4756 rtx op0 = XEXP (addr, 0);
4757 rtx op1 = XEXP (addr, 1);
4758 if (op0 == pic_offset_table_rtx
4759 && symbolic_operand (op1, VOIDmode))
4760 return true;
4763 return false;
4766 #ifdef HAVE_GAS_HIDDEN
4767 # define USE_HIDDEN_LINKONCE 1
4768 #else
4769 # define USE_HIDDEN_LINKONCE 0
4770 #endif
4772 static void
4773 get_pc_thunk_name (char name[32], unsigned int regno)
4775 const char *reg_name = reg_names[regno];
4777 /* Skip the leading '%' as that cannot be used in a
4778 symbol name. */
4779 reg_name += 1;
4781 if (USE_HIDDEN_LINKONCE)
4782 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4783 else
4784 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4787 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4789 static rtx
4790 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4792 int orig_flag_pic = flag_pic;
4793 rtx insn;
4795 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4796 flag_pic = 0;
4797 if (TARGET_ARCH64)
4798 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4799 else
4800 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4801 flag_pic = orig_flag_pic;
4803 return insn;
4806 /* Emit code to load the GOT register. */
4808 void
4809 load_got_register (void)
4811 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4812 if (!global_offset_table_rtx)
4813 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4815 if (TARGET_VXWORKS_RTP)
4816 emit_insn (gen_vxworks_load_got ());
4817 else
4819 /* The GOT symbol is subject to a PC-relative relocation so we need a
4820 helper function to add the PC value and thus get the final value. */
4821 if (!got_helper_rtx)
4823 char name[32];
4824 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4825 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4828 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4829 got_helper_rtx,
4830 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4833 /* Need to emit this whether or not we obey regdecls,
4834 since setjmp/longjmp can cause life info to screw up.
4835 ??? In the case where we don't obey regdecls, this is not sufficient
4836 since we may not fall out the bottom. */
4837 emit_use (global_offset_table_rtx);
4840 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4841 address of the call target. */
4843 void
4844 sparc_emit_call_insn (rtx pat, rtx addr)
4846 rtx_insn *insn;
4848 insn = emit_call_insn (pat);
4850 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4851 if (TARGET_VXWORKS_RTP
4852 && flag_pic
4853 && GET_CODE (addr) == SYMBOL_REF
4854 && (SYMBOL_REF_DECL (addr)
4855 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4856 : !SYMBOL_REF_LOCAL_P (addr)))
4858 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4859 crtl->uses_pic_offset_table = 1;
4863 /* Return 1 if RTX is a MEM which is known to be aligned to at
4864 least a DESIRED byte boundary. */
4867 mem_min_alignment (rtx mem, int desired)
4869 rtx addr, base, offset;
4871 /* If it's not a MEM we can't accept it. */
4872 if (GET_CODE (mem) != MEM)
4873 return 0;
4875 /* Obviously... */
4876 if (!TARGET_UNALIGNED_DOUBLES
4877 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4878 return 1;
4880 /* ??? The rest of the function predates MEM_ALIGN so
4881 there is probably a bit of redundancy. */
4882 addr = XEXP (mem, 0);
4883 base = offset = NULL_RTX;
4884 if (GET_CODE (addr) == PLUS)
4886 if (GET_CODE (XEXP (addr, 0)) == REG)
4888 base = XEXP (addr, 0);
4890 /* What we are saying here is that if the base
4891 REG is aligned properly, the compiler will make
4892 sure any REG based index upon it will be so
4893 as well. */
4894 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4895 offset = XEXP (addr, 1);
4896 else
4897 offset = const0_rtx;
4900 else if (GET_CODE (addr) == REG)
4902 base = addr;
4903 offset = const0_rtx;
4906 if (base != NULL_RTX)
4908 int regno = REGNO (base);
4910 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4912 /* Check if the compiler has recorded some information
4913 about the alignment of the base REG. If reload has
4914 completed, we already matched with proper alignments.
4915 If not running global_alloc, reload might give us
4916 unaligned pointer to local stack though. */
4917 if (((cfun != 0
4918 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4919 || (optimize && reload_completed))
4920 && (INTVAL (offset) & (desired - 1)) == 0)
4921 return 1;
4923 else
4925 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4926 return 1;
4929 else if (! TARGET_UNALIGNED_DOUBLES
4930 || CONSTANT_P (addr)
4931 || GET_CODE (addr) == LO_SUM)
4933 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4934 is true, in which case we can only assume that an access is aligned if
4935 it is to a constant address, or the address involves a LO_SUM. */
4936 return 1;
4939 /* An obviously unaligned address. */
4940 return 0;
4944 /* Vectors to keep interesting information about registers where it can easily
4945 be got. We used to use the actual mode value as the bit number, but there
4946 are more than 32 modes now. Instead we use two tables: one indexed by
4947 hard register number, and one indexed by mode. */
4949 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4950 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4951 mapped into one sparc_mode_class mode. */
4953 enum sparc_mode_class {
4954 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4955 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4956 CC_MODE, CCFP_MODE
4959 /* Modes for single-word and smaller quantities. */
4960 #define S_MODES \
4961 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4963 /* Modes for double-word and smaller quantities. */
4964 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
4966 /* Modes for quad-word and smaller quantities. */
4967 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4969 /* Modes for 8-word and smaller quantities. */
4970 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4972 /* Modes for single-float quantities. */
4973 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4975 /* Modes for double-float and smaller quantities. */
4976 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
4978 /* Modes for quad-float and smaller quantities. */
4979 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4981 /* Modes for quad-float pairs and smaller quantities. */
4982 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4984 /* Modes for double-float only quantities. */
4985 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4987 /* Modes for quad-float and double-float only quantities. */
4988 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4990 /* Modes for quad-float pairs and double-float only quantities. */
4991 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4993 /* Modes for condition codes. */
4994 #define CC_MODES (1 << (int) CC_MODE)
4995 #define CCFP_MODES (1 << (int) CCFP_MODE)
4997 /* Value is 1 if register/mode pair is acceptable on sparc.
4999 The funny mixture of D and T modes is because integer operations
5000 do not specially operate on tetra quantities, so non-quad-aligned
5001 registers can hold quadword quantities (except %o4 and %i4 because
5002 they cross fixed registers).
5004 ??? Note that, despite the settings, non-double-aligned parameter
5005 registers can hold double-word quantities in 32-bit mode. */
5007 /* This points to either the 32-bit or the 64-bit version. */
5008 static const int *hard_regno_mode_classes;
5010 static const int hard_32bit_mode_classes[] = {
5011 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5012 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5013 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5014 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5016 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5017 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5018 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5019 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5021 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5022 and none can hold SFmode/SImode values. */
5023 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5024 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5025 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5026 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5028 /* %fcc[0123] */
5029 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5031 /* %icc, %sfp, %gsr */
5032 CC_MODES, 0, D_MODES
5035 static const int hard_64bit_mode_classes[] = {
5036 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5037 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5038 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5039 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5041 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5042 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5043 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5044 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5046 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5047 and none can hold SFmode/SImode values. */
5048 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5049 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5050 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5051 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5053 /* %fcc[0123] */
5054 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5056 /* %icc, %sfp, %gsr */
5057 CC_MODES, 0, D_MODES
5060 static int sparc_mode_class [NUM_MACHINE_MODES];
5062 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5064 static void
5065 sparc_init_modes (void)
5067 int i;
5069 for (i = 0; i < NUM_MACHINE_MODES; i++)
5071 machine_mode m = (machine_mode) i;
5072 unsigned int size = GET_MODE_SIZE (m);
5074 switch (GET_MODE_CLASS (m))
5076 case MODE_INT:
5077 case MODE_PARTIAL_INT:
5078 case MODE_COMPLEX_INT:
5079 if (size < 4)
5080 sparc_mode_class[i] = 1 << (int) H_MODE;
5081 else if (size == 4)
5082 sparc_mode_class[i] = 1 << (int) S_MODE;
5083 else if (size == 8)
5084 sparc_mode_class[i] = 1 << (int) D_MODE;
5085 else if (size == 16)
5086 sparc_mode_class[i] = 1 << (int) T_MODE;
5087 else if (size == 32)
5088 sparc_mode_class[i] = 1 << (int) O_MODE;
5089 else
5090 sparc_mode_class[i] = 0;
5091 break;
5092 case MODE_VECTOR_INT:
5093 if (size == 4)
5094 sparc_mode_class[i] = 1 << (int) SF_MODE;
5095 else if (size == 8)
5096 sparc_mode_class[i] = 1 << (int) DF_MODE;
5097 else
5098 sparc_mode_class[i] = 0;
5099 break;
5100 case MODE_FLOAT:
5101 case MODE_COMPLEX_FLOAT:
5102 if (size == 4)
5103 sparc_mode_class[i] = 1 << (int) SF_MODE;
5104 else if (size == 8)
5105 sparc_mode_class[i] = 1 << (int) DF_MODE;
5106 else if (size == 16)
5107 sparc_mode_class[i] = 1 << (int) TF_MODE;
5108 else if (size == 32)
5109 sparc_mode_class[i] = 1 << (int) OF_MODE;
5110 else
5111 sparc_mode_class[i] = 0;
5112 break;
5113 case MODE_CC:
5114 if (m == CCFPmode || m == CCFPEmode)
5115 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5116 else
5117 sparc_mode_class[i] = 1 << (int) CC_MODE;
5118 break;
5119 default:
5120 sparc_mode_class[i] = 0;
5121 break;
5125 if (TARGET_ARCH64)
5126 hard_regno_mode_classes = hard_64bit_mode_classes;
5127 else
5128 hard_regno_mode_classes = hard_32bit_mode_classes;
5130 /* Initialize the array used by REGNO_REG_CLASS. */
5131 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5133 if (i < 16 && TARGET_V8PLUS)
5134 sparc_regno_reg_class[i] = I64_REGS;
5135 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5136 sparc_regno_reg_class[i] = GENERAL_REGS;
5137 else if (i < 64)
5138 sparc_regno_reg_class[i] = FP_REGS;
5139 else if (i < 96)
5140 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5141 else if (i < 100)
5142 sparc_regno_reg_class[i] = FPCC_REGS;
5143 else
5144 sparc_regno_reg_class[i] = NO_REGS;
5148 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5150 static inline bool
5151 save_global_or_fp_reg_p (unsigned int regno,
5152 int leaf_function ATTRIBUTE_UNUSED)
5154 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
5157 /* Return whether the return address register (%i7) is needed. */
5159 static inline bool
5160 return_addr_reg_needed_p (int leaf_function)
5162 /* If it is live, for example because of __builtin_return_address (0). */
5163 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5164 return true;
5166 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5167 if (!leaf_function
5168 /* Loading the GOT register clobbers %o7. */
5169 || crtl->uses_pic_offset_table
5170 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5171 return true;
5173 return false;
5176 /* Return whether REGNO, a local or in register, must be saved/restored. */
5178 static bool
5179 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5181 /* General case: call-saved registers live at some point. */
5182 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
5183 return true;
5185 /* Frame pointer register (%fp) if needed. */
5186 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5187 return true;
5189 /* Return address register (%i7) if needed. */
5190 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5191 return true;
5193 /* GOT register (%l7) if needed. */
5194 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
5195 return true;
5197 /* If the function accesses prior frames, the frame pointer and the return
5198 address of the previous frame must be saved on the stack. */
5199 if (crtl->accesses_prior_frames
5200 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5201 return true;
5203 return false;
5206 /* Compute the frame size required by the function. This function is called
5207 during the reload pass and also by sparc_expand_prologue. */
5209 HOST_WIDE_INT
5210 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5212 HOST_WIDE_INT frame_size, apparent_frame_size;
5213 int args_size, n_global_fp_regs = 0;
5214 bool save_local_in_regs_p = false;
5215 unsigned int i;
5217 /* If the function allocates dynamic stack space, the dynamic offset is
5218 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5219 if (leaf_function && !cfun->calls_alloca)
5220 args_size = 0;
5221 else
5222 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5224 /* Calculate space needed for global registers. */
5225 if (TARGET_ARCH64)
5227 for (i = 0; i < 8; i++)
5228 if (save_global_or_fp_reg_p (i, 0))
5229 n_global_fp_regs += 2;
5231 else
5233 for (i = 0; i < 8; i += 2)
5234 if (save_global_or_fp_reg_p (i, 0)
5235 || save_global_or_fp_reg_p (i + 1, 0))
5236 n_global_fp_regs += 2;
5239 /* In the flat window model, find out which local and in registers need to
5240 be saved. We don't reserve space in the current frame for them as they
5241 will be spilled into the register window save area of the caller's frame.
5242 However, as soon as we use this register window save area, we must create
5243 that of the current frame to make it the live one. */
5244 if (TARGET_FLAT)
5245 for (i = 16; i < 32; i++)
5246 if (save_local_or_in_reg_p (i, leaf_function))
5248 save_local_in_regs_p = true;
5249 break;
5252 /* Calculate space needed for FP registers. */
5253 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5254 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5255 n_global_fp_regs += 2;
5257 if (size == 0
5258 && n_global_fp_regs == 0
5259 && args_size == 0
5260 && !save_local_in_regs_p)
5261 frame_size = apparent_frame_size = 0;
5262 else
5264 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
5265 apparent_frame_size = ROUND_UP (size - STARTING_FRAME_OFFSET, 8);
5266 apparent_frame_size += n_global_fp_regs * 4;
5268 /* We need to add the size of the outgoing argument area. */
5269 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5271 /* And that of the register window save area. */
5272 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5274 /* Finally, bump to the appropriate alignment. */
5275 frame_size = SPARC_STACK_ALIGN (frame_size);
5278 /* Set up values for use in prologue and epilogue. */
5279 sparc_frame_size = frame_size;
5280 sparc_apparent_frame_size = apparent_frame_size;
5281 sparc_n_global_fp_regs = n_global_fp_regs;
5282 sparc_save_local_in_regs_p = save_local_in_regs_p;
5284 return frame_size;
5287 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5290 sparc_initial_elimination_offset (int to)
5292 int offset;
5294 if (to == STACK_POINTER_REGNUM)
5295 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5296 else
5297 offset = 0;
5299 offset += SPARC_STACK_BIAS;
5300 return offset;
5303 /* Output any necessary .register pseudo-ops. */
5305 void
5306 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5308 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5309 int i;
5311 if (TARGET_ARCH32)
5312 return;
5314 /* Check if %g[2367] were used without
5315 .register being printed for them already. */
5316 for (i = 2; i < 8; i++)
5318 if (df_regs_ever_live_p (i)
5319 && ! sparc_hard_reg_printed [i])
5321 sparc_hard_reg_printed [i] = 1;
5322 /* %g7 is used as TLS base register, use #ignore
5323 for it instead of #scratch. */
5324 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5325 i == 7 ? "ignore" : "scratch");
5327 if (i == 3) i = 5;
5329 #endif
5332 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5334 #if PROBE_INTERVAL > 4096
5335 #error Cannot use indexed addressing mode for stack probing
5336 #endif
5338 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5339 inclusive. These are offsets from the current stack pointer.
5341 Note that we don't use the REG+REG addressing mode for the probes because
5342 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5343 so the advantages of having a single code win here. */
5345 static void
5346 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5348 rtx g1 = gen_rtx_REG (Pmode, 1);
5350 /* See if we have a constant small number of probes to generate. If so,
5351 that's the easy case. */
5352 if (size <= PROBE_INTERVAL)
5354 emit_move_insn (g1, GEN_INT (first));
5355 emit_insn (gen_rtx_SET (g1,
5356 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5357 emit_stack_probe (plus_constant (Pmode, g1, -size));
5360 /* The run-time loop is made up of 9 insns in the generic case while the
5361 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5362 else if (size <= 4 * PROBE_INTERVAL)
5364 HOST_WIDE_INT i;
5366 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5367 emit_insn (gen_rtx_SET (g1,
5368 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5369 emit_stack_probe (g1);
5371 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5372 it exceeds SIZE. If only two probes are needed, this will not
5373 generate any code. Then probe at FIRST + SIZE. */
5374 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5376 emit_insn (gen_rtx_SET (g1,
5377 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5378 emit_stack_probe (g1);
5381 emit_stack_probe (plus_constant (Pmode, g1,
5382 (i - PROBE_INTERVAL) - size));
5385 /* Otherwise, do the same as above, but in a loop. Note that we must be
5386 extra careful with variables wrapping around because we might be at
5387 the very top (or the very bottom) of the address space and we have
5388 to be able to handle this case properly; in particular, we use an
5389 equality test for the loop condition. */
5390 else
5392 HOST_WIDE_INT rounded_size;
5393 rtx g4 = gen_rtx_REG (Pmode, 4);
5395 emit_move_insn (g1, GEN_INT (first));
5398 /* Step 1: round SIZE to the previous multiple of the interval. */
5400 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5401 emit_move_insn (g4, GEN_INT (rounded_size));
5404 /* Step 2: compute initial and final value of the loop counter. */
5406 /* TEST_ADDR = SP + FIRST. */
5407 emit_insn (gen_rtx_SET (g1,
5408 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5410 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5411 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5414 /* Step 3: the loop
5416 while (TEST_ADDR != LAST_ADDR)
5418 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5419 probe at TEST_ADDR
5422 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5423 until it is equal to ROUNDED_SIZE. */
5425 if (TARGET_ARCH64)
5426 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5427 else
5428 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5431 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5432 that SIZE is equal to ROUNDED_SIZE. */
5434 if (size != rounded_size)
5435 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5438 /* Make sure nothing is scheduled before we are done. */
5439 emit_insn (gen_blockage ());
5442 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5443 absolute addresses. */
5445 const char *
5446 output_probe_stack_range (rtx reg1, rtx reg2)
5448 static int labelno = 0;
5449 char loop_lab[32];
5450 rtx xops[2];
5452 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5454 /* Loop. */
5455 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5457 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5458 xops[0] = reg1;
5459 xops[1] = GEN_INT (-PROBE_INTERVAL);
5460 output_asm_insn ("add\t%0, %1, %0", xops);
5462 /* Test if TEST_ADDR == LAST_ADDR. */
5463 xops[1] = reg2;
5464 output_asm_insn ("cmp\t%0, %1", xops);
5466 /* Probe at TEST_ADDR and branch. */
5467 if (TARGET_ARCH64)
5468 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5469 else
5470 fputs ("\tbne\t", asm_out_file);
5471 assemble_name_raw (asm_out_file, loop_lab);
5472 fputc ('\n', asm_out_file);
5473 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5474 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5476 return "";
5479 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5480 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5481 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5482 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5483 the action to be performed if it returns false. Return the new offset. */
5485 typedef bool (*sorr_pred_t) (unsigned int, int);
5486 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5488 static int
5489 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5490 int offset, int leaf_function, sorr_pred_t save_p,
5491 sorr_act_t action_true, sorr_act_t action_false)
5493 unsigned int i;
5494 rtx mem;
5495 rtx_insn *insn;
5497 if (TARGET_ARCH64 && high <= 32)
5499 int fp_offset = -1;
5501 for (i = low; i < high; i++)
5503 if (save_p (i, leaf_function))
5505 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5506 base, offset));
5507 if (action_true == SORR_SAVE)
5509 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5510 RTX_FRAME_RELATED_P (insn) = 1;
5512 else /* action_true == SORR_RESTORE */
5514 /* The frame pointer must be restored last since its old
5515 value may be used as base address for the frame. This
5516 is problematic in 64-bit mode only because of the lack
5517 of double-word load instruction. */
5518 if (i == HARD_FRAME_POINTER_REGNUM)
5519 fp_offset = offset;
5520 else
5521 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5523 offset += 8;
5525 else if (action_false == SORR_ADVANCE)
5526 offset += 8;
5529 if (fp_offset >= 0)
5531 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5532 emit_move_insn (hard_frame_pointer_rtx, mem);
5535 else
5537 for (i = low; i < high; i += 2)
5539 bool reg0 = save_p (i, leaf_function);
5540 bool reg1 = save_p (i + 1, leaf_function);
5541 machine_mode mode;
5542 int regno;
5544 if (reg0 && reg1)
5546 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5547 regno = i;
5549 else if (reg0)
5551 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5552 regno = i;
5554 else if (reg1)
5556 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5557 regno = i + 1;
5558 offset += 4;
5560 else
5562 if (action_false == SORR_ADVANCE)
5563 offset += 8;
5564 continue;
5567 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5568 if (action_true == SORR_SAVE)
5570 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5571 RTX_FRAME_RELATED_P (insn) = 1;
5572 if (mode == DImode)
5574 rtx set1, set2;
5575 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5576 offset));
5577 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5578 RTX_FRAME_RELATED_P (set1) = 1;
5580 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5581 offset + 4));
5582 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5583 RTX_FRAME_RELATED_P (set2) = 1;
5584 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5585 gen_rtx_PARALLEL (VOIDmode,
5586 gen_rtvec (2, set1, set2)));
5589 else /* action_true == SORR_RESTORE */
5590 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5592 /* Bump and round down to double word
5593 in case we already bumped by 4. */
5594 offset = ROUND_DOWN (offset + 8, 8);
5598 return offset;
5601 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5603 static rtx
5604 emit_adjust_base_to_offset (rtx base, int offset)
5606 /* ??? This might be optimized a little as %g1 might already have a
5607 value close enough that a single add insn will do. */
5608 /* ??? Although, all of this is probably only a temporary fix because
5609 if %g1 can hold a function result, then sparc_expand_epilogue will
5610 lose (the result will be clobbered). */
5611 rtx new_base = gen_rtx_REG (Pmode, 1);
5612 emit_move_insn (new_base, GEN_INT (offset));
5613 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5614 return new_base;
5617 /* Emit code to save/restore call-saved global and FP registers. */
5619 static void
5620 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5622 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5624 base = emit_adjust_base_to_offset (base, offset);
5625 offset = 0;
5628 offset
5629 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5630 save_global_or_fp_reg_p, action, SORR_NONE);
5631 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5632 save_global_or_fp_reg_p, action, SORR_NONE);
5635 /* Emit code to save/restore call-saved local and in registers. */
5637 static void
5638 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5640 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5642 base = emit_adjust_base_to_offset (base, offset);
5643 offset = 0;
5646 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5647 save_local_or_in_reg_p, action, SORR_ADVANCE);
5650 /* Emit a window_save insn. */
5652 static rtx_insn *
5653 emit_window_save (rtx increment)
5655 rtx_insn *insn = emit_insn (gen_window_save (increment));
5656 RTX_FRAME_RELATED_P (insn) = 1;
5658 /* The incoming return address (%o7) is saved in %i7. */
5659 add_reg_note (insn, REG_CFA_REGISTER,
5660 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5661 gen_rtx_REG (Pmode,
5662 INCOMING_RETURN_ADDR_REGNUM)));
5664 /* The window save event. */
5665 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5667 /* The CFA is %fp, the hard frame pointer. */
5668 add_reg_note (insn, REG_CFA_DEF_CFA,
5669 plus_constant (Pmode, hard_frame_pointer_rtx,
5670 INCOMING_FRAME_SP_OFFSET));
5672 return insn;
5675 /* Generate an increment for the stack pointer. */
5677 static rtx
5678 gen_stack_pointer_inc (rtx increment)
5680 return gen_rtx_SET (stack_pointer_rtx,
5681 gen_rtx_PLUS (Pmode,
5682 stack_pointer_rtx,
5683 increment));
5686 /* Expand the function prologue. The prologue is responsible for reserving
5687 storage for the frame, saving the call-saved registers and loading the
5688 GOT register if needed. */
5690 void
5691 sparc_expand_prologue (void)
5693 HOST_WIDE_INT size;
5694 rtx_insn *insn;
5696 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5697 on the final value of the flag means deferring the prologue/epilogue
5698 expansion until just before the second scheduling pass, which is too
5699 late to emit multiple epilogues or return insns.
5701 Of course we are making the assumption that the value of the flag
5702 will not change between now and its final value. Of the three parts
5703 of the formula, only the last one can reasonably vary. Let's take a
5704 closer look, after assuming that the first two ones are set to true
5705 (otherwise the last value is effectively silenced).
5707 If only_leaf_regs_used returns false, the global predicate will also
5708 be false so the actual frame size calculated below will be positive.
5709 As a consequence, the save_register_window insn will be emitted in
5710 the instruction stream; now this insn explicitly references %fp
5711 which is not a leaf register so only_leaf_regs_used will always
5712 return false subsequently.
5714 If only_leaf_regs_used returns true, we hope that the subsequent
5715 optimization passes won't cause non-leaf registers to pop up. For
5716 example, the regrename pass has special provisions to not rename to
5717 non-leaf registers in a leaf function. */
5718 sparc_leaf_function_p
5719 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5721 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5723 if (flag_stack_usage_info)
5724 current_function_static_stack_size = size;
5726 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5728 if (crtl->is_leaf && !cfun->calls_alloca)
5730 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5731 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5732 size - STACK_CHECK_PROTECT);
5734 else if (size > 0)
5735 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5738 if (size == 0)
5739 ; /* do nothing. */
5740 else if (sparc_leaf_function_p)
5742 rtx size_int_rtx = GEN_INT (-size);
5744 if (size <= 4096)
5745 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5746 else if (size <= 8192)
5748 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5749 RTX_FRAME_RELATED_P (insn) = 1;
5751 /* %sp is still the CFA register. */
5752 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5754 else
5756 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5757 emit_move_insn (size_rtx, size_int_rtx);
5758 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5759 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5760 gen_stack_pointer_inc (size_int_rtx));
5763 RTX_FRAME_RELATED_P (insn) = 1;
5765 else
5767 rtx size_int_rtx = GEN_INT (-size);
5769 if (size <= 4096)
5770 emit_window_save (size_int_rtx);
5771 else if (size <= 8192)
5773 emit_window_save (GEN_INT (-4096));
5775 /* %sp is not the CFA register anymore. */
5776 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5778 /* Make sure no %fp-based store is issued until after the frame is
5779 established. The offset between the frame pointer and the stack
5780 pointer is calculated relative to the value of the stack pointer
5781 at the end of the function prologue, and moving instructions that
5782 access the stack via the frame pointer between the instructions
5783 that decrement the stack pointer could result in accessing the
5784 register window save area, which is volatile. */
5785 emit_insn (gen_frame_blockage ());
5787 else
5789 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5790 emit_move_insn (size_rtx, size_int_rtx);
5791 emit_window_save (size_rtx);
5795 if (sparc_leaf_function_p)
5797 sparc_frame_base_reg = stack_pointer_rtx;
5798 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5800 else
5802 sparc_frame_base_reg = hard_frame_pointer_rtx;
5803 sparc_frame_base_offset = SPARC_STACK_BIAS;
5806 if (sparc_n_global_fp_regs > 0)
5807 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5808 sparc_frame_base_offset
5809 - sparc_apparent_frame_size,
5810 SORR_SAVE);
5812 /* Load the GOT register if needed. */
5813 if (crtl->uses_pic_offset_table)
5814 load_got_register ();
5816 /* Advertise that the data calculated just above are now valid. */
5817 sparc_prologue_data_valid_p = true;
5820 /* Expand the function prologue. The prologue is responsible for reserving
5821 storage for the frame, saving the call-saved registers and loading the
5822 GOT register if needed. */
5824 void
5825 sparc_flat_expand_prologue (void)
5827 HOST_WIDE_INT size;
5828 rtx_insn *insn;
5830 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5832 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5834 if (flag_stack_usage_info)
5835 current_function_static_stack_size = size;
5837 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5839 if (crtl->is_leaf && !cfun->calls_alloca)
5841 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5842 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5843 size - STACK_CHECK_PROTECT);
5845 else if (size > 0)
5846 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5849 if (sparc_save_local_in_regs_p)
5850 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5851 SORR_SAVE);
5853 if (size == 0)
5854 ; /* do nothing. */
5855 else
5857 rtx size_int_rtx, size_rtx;
5859 size_rtx = size_int_rtx = GEN_INT (-size);
5861 /* We establish the frame (i.e. decrement the stack pointer) first, even
5862 if we use a frame pointer, because we cannot clobber any call-saved
5863 registers, including the frame pointer, if we haven't created a new
5864 register save area, for the sake of compatibility with the ABI. */
5865 if (size <= 4096)
5866 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5867 else if (size <= 8192 && !frame_pointer_needed)
5869 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5870 RTX_FRAME_RELATED_P (insn) = 1;
5871 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5873 else
5875 size_rtx = gen_rtx_REG (Pmode, 1);
5876 emit_move_insn (size_rtx, size_int_rtx);
5877 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5878 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5879 gen_stack_pointer_inc (size_int_rtx));
5881 RTX_FRAME_RELATED_P (insn) = 1;
5883 /* Ensure nothing is scheduled until after the frame is established. */
5884 emit_insn (gen_blockage ());
5886 if (frame_pointer_needed)
5888 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
5889 gen_rtx_MINUS (Pmode,
5890 stack_pointer_rtx,
5891 size_rtx)));
5892 RTX_FRAME_RELATED_P (insn) = 1;
5894 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5895 gen_rtx_SET (hard_frame_pointer_rtx,
5896 plus_constant (Pmode, stack_pointer_rtx,
5897 size)));
5900 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5902 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5903 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5905 insn = emit_move_insn (i7, o7);
5906 RTX_FRAME_RELATED_P (insn) = 1;
5908 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
5910 /* Prevent this instruction from ever being considered dead,
5911 even if this function has no epilogue. */
5912 emit_use (i7);
5916 if (frame_pointer_needed)
5918 sparc_frame_base_reg = hard_frame_pointer_rtx;
5919 sparc_frame_base_offset = SPARC_STACK_BIAS;
5921 else
5923 sparc_frame_base_reg = stack_pointer_rtx;
5924 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5927 if (sparc_n_global_fp_regs > 0)
5928 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5929 sparc_frame_base_offset
5930 - sparc_apparent_frame_size,
5931 SORR_SAVE);
5933 /* Load the GOT register if needed. */
5934 if (crtl->uses_pic_offset_table)
5935 load_got_register ();
5937 /* Advertise that the data calculated just above are now valid. */
5938 sparc_prologue_data_valid_p = true;
5941 /* This function generates the assembly code for function entry, which boils
5942 down to emitting the necessary .register directives. */
5944 static void
5945 sparc_asm_function_prologue (FILE *file)
5947 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5948 if (!TARGET_FLAT)
5949 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5951 sparc_output_scratch_registers (file);
5954 /* Expand the function epilogue, either normal or part of a sibcall.
5955 We emit all the instructions except the return or the call. */
5957 void
5958 sparc_expand_epilogue (bool for_eh)
5960 HOST_WIDE_INT size = sparc_frame_size;
5962 if (cfun->calls_alloca)
5963 emit_insn (gen_frame_blockage ());
5965 if (sparc_n_global_fp_regs > 0)
5966 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5967 sparc_frame_base_offset
5968 - sparc_apparent_frame_size,
5969 SORR_RESTORE);
5971 if (size == 0 || for_eh)
5972 ; /* do nothing. */
5973 else if (sparc_leaf_function_p)
5975 if (size <= 4096)
5976 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5977 else if (size <= 8192)
5979 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5980 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5982 else
5984 rtx reg = gen_rtx_REG (Pmode, 1);
5985 emit_move_insn (reg, GEN_INT (size));
5986 emit_insn (gen_stack_pointer_inc (reg));
5991 /* Expand the function epilogue, either normal or part of a sibcall.
5992 We emit all the instructions except the return or the call. */
5994 void
5995 sparc_flat_expand_epilogue (bool for_eh)
5997 HOST_WIDE_INT size = sparc_frame_size;
5999 if (sparc_n_global_fp_regs > 0)
6000 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6001 sparc_frame_base_offset
6002 - sparc_apparent_frame_size,
6003 SORR_RESTORE);
6005 /* If we have a frame pointer, we'll need both to restore it before the
6006 frame is destroyed and use its current value in destroying the frame.
6007 Since we don't have an atomic way to do that in the flat window model,
6008 we save the current value into a temporary register (%g1). */
6009 if (frame_pointer_needed && !for_eh)
6010 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6012 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6013 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6014 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6016 if (sparc_save_local_in_regs_p)
6017 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6018 sparc_frame_base_offset,
6019 SORR_RESTORE);
6021 if (size == 0 || for_eh)
6022 ; /* do nothing. */
6023 else if (frame_pointer_needed)
6025 /* Make sure the frame is destroyed after everything else is done. */
6026 emit_insn (gen_blockage ());
6028 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6030 else
6032 /* Likewise. */
6033 emit_insn (gen_blockage ());
6035 if (size <= 4096)
6036 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6037 else if (size <= 8192)
6039 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6040 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6042 else
6044 rtx reg = gen_rtx_REG (Pmode, 1);
6045 emit_move_insn (reg, GEN_INT (size));
6046 emit_insn (gen_stack_pointer_inc (reg));
6051 /* Return true if it is appropriate to emit `return' instructions in the
6052 body of a function. */
6054 bool
6055 sparc_can_use_return_insn_p (void)
6057 return sparc_prologue_data_valid_p
6058 && sparc_n_global_fp_regs == 0
6059 && TARGET_FLAT
6060 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6061 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6064 /* This function generates the assembly code for function exit. */
6066 static void
6067 sparc_asm_function_epilogue (FILE *file)
6069 /* If the last two instructions of a function are "call foo; dslot;"
6070 the return address might point to the first instruction in the next
6071 function and we have to output a dummy nop for the sake of sane
6072 backtraces in such cases. This is pointless for sibling calls since
6073 the return address is explicitly adjusted. */
6075 rtx_insn *insn = get_last_insn ();
6077 rtx last_real_insn = prev_real_insn (insn);
6078 if (last_real_insn
6079 && NONJUMP_INSN_P (last_real_insn)
6080 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6081 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6083 if (last_real_insn
6084 && CALL_P (last_real_insn)
6085 && !SIBLING_CALL_P (last_real_insn))
6086 fputs("\tnop\n", file);
6088 sparc_output_deferred_case_vectors ();
6091 /* Output a 'restore' instruction. */
6093 static void
6094 output_restore (rtx pat)
6096 rtx operands[3];
6098 if (! pat)
6100 fputs ("\t restore\n", asm_out_file);
6101 return;
6104 gcc_assert (GET_CODE (pat) == SET);
6106 operands[0] = SET_DEST (pat);
6107 pat = SET_SRC (pat);
6109 switch (GET_CODE (pat))
6111 case PLUS:
6112 operands[1] = XEXP (pat, 0);
6113 operands[2] = XEXP (pat, 1);
6114 output_asm_insn (" restore %r1, %2, %Y0", operands);
6115 break;
6116 case LO_SUM:
6117 operands[1] = XEXP (pat, 0);
6118 operands[2] = XEXP (pat, 1);
6119 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6120 break;
6121 case ASHIFT:
6122 operands[1] = XEXP (pat, 0);
6123 gcc_assert (XEXP (pat, 1) == const1_rtx);
6124 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6125 break;
6126 default:
6127 operands[1] = pat;
6128 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6129 break;
6133 /* Output a return. */
6135 const char *
6136 output_return (rtx_insn *insn)
6138 if (crtl->calls_eh_return)
6140 /* If the function uses __builtin_eh_return, the eh_return
6141 machinery occupies the delay slot. */
6142 gcc_assert (!final_sequence);
6144 if (flag_delayed_branch)
6146 if (!TARGET_FLAT && TARGET_V9)
6147 fputs ("\treturn\t%i7+8\n", asm_out_file);
6148 else
6150 if (!TARGET_FLAT)
6151 fputs ("\trestore\n", asm_out_file);
6153 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6156 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6158 else
6160 if (!TARGET_FLAT)
6161 fputs ("\trestore\n", asm_out_file);
6163 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6164 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6167 else if (sparc_leaf_function_p || TARGET_FLAT)
6169 /* This is a leaf or flat function so we don't have to bother restoring
6170 the register window, which frees us from dealing with the convoluted
6171 semantics of restore/return. We simply output the jump to the
6172 return address and the insn in the delay slot (if any). */
6174 return "jmp\t%%o7+%)%#";
6176 else
6178 /* This is a regular function so we have to restore the register window.
6179 We may have a pending insn for the delay slot, which will be either
6180 combined with the 'restore' instruction or put in the delay slot of
6181 the 'return' instruction. */
6183 if (final_sequence)
6185 rtx delay, pat;
6187 delay = NEXT_INSN (insn);
6188 gcc_assert (delay);
6190 pat = PATTERN (delay);
6192 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6194 epilogue_renumber (&pat, 0);
6195 return "return\t%%i7+%)%#";
6197 else
6199 output_asm_insn ("jmp\t%%i7+%)", NULL);
6200 output_restore (pat);
6201 PATTERN (delay) = gen_blockage ();
6202 INSN_CODE (delay) = -1;
6205 else
6207 /* The delay slot is empty. */
6208 if (TARGET_V9)
6209 return "return\t%%i7+%)\n\t nop";
6210 else if (flag_delayed_branch)
6211 return "jmp\t%%i7+%)\n\t restore";
6212 else
6213 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6217 return "";
6220 /* Output a sibling call. */
6222 const char *
6223 output_sibcall (rtx_insn *insn, rtx call_operand)
6225 rtx operands[1];
6227 gcc_assert (flag_delayed_branch);
6229 operands[0] = call_operand;
6231 if (sparc_leaf_function_p || TARGET_FLAT)
6233 /* This is a leaf or flat function so we don't have to bother restoring
6234 the register window. We simply output the jump to the function and
6235 the insn in the delay slot (if any). */
6237 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6239 if (final_sequence)
6240 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6241 operands);
6242 else
6243 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6244 it into branch if possible. */
6245 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6246 operands);
6248 else
6250 /* This is a regular function so we have to restore the register window.
6251 We may have a pending insn for the delay slot, which will be combined
6252 with the 'restore' instruction. */
6254 output_asm_insn ("call\t%a0, 0", operands);
6256 if (final_sequence)
6258 rtx_insn *delay = NEXT_INSN (insn);
6259 gcc_assert (delay);
6261 output_restore (PATTERN (delay));
6263 PATTERN (delay) = gen_blockage ();
6264 INSN_CODE (delay) = -1;
6266 else
6267 output_restore (NULL_RTX);
6270 return "";
6273 /* Functions for handling argument passing.
6275 For 32-bit, the first 6 args are normally in registers and the rest are
6276 pushed. Any arg that starts within the first 6 words is at least
6277 partially passed in a register unless its data type forbids.
6279 For 64-bit, the argument registers are laid out as an array of 16 elements
6280 and arguments are added sequentially. The first 6 int args and up to the
6281 first 16 fp args (depending on size) are passed in regs.
6283 Slot Stack Integral Float Float in structure Double Long Double
6284 ---- ----- -------- ----- ------------------ ------ -----------
6285 15 [SP+248] %f31 %f30,%f31 %d30
6286 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6287 13 [SP+232] %f27 %f26,%f27 %d26
6288 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6289 11 [SP+216] %f23 %f22,%f23 %d22
6290 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6291 9 [SP+200] %f19 %f18,%f19 %d18
6292 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6293 7 [SP+184] %f15 %f14,%f15 %d14
6294 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6295 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6296 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6297 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6298 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6299 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6300 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6302 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6304 Integral arguments are always passed as 64-bit quantities appropriately
6305 extended.
6307 Passing of floating point values is handled as follows.
6308 If a prototype is in scope:
6309 If the value is in a named argument (i.e. not a stdarg function or a
6310 value not part of the `...') then the value is passed in the appropriate
6311 fp reg.
6312 If the value is part of the `...' and is passed in one of the first 6
6313 slots then the value is passed in the appropriate int reg.
6314 If the value is part of the `...' and is not passed in one of the first 6
6315 slots then the value is passed in memory.
6316 If a prototype is not in scope:
6317 If the value is one of the first 6 arguments the value is passed in the
6318 appropriate integer reg and the appropriate fp reg.
6319 If the value is not one of the first 6 arguments the value is passed in
6320 the appropriate fp reg and in memory.
6323 Summary of the calling conventions implemented by GCC on the SPARC:
6325 32-bit ABI:
6326 size argument return value
6328 small integer <4 int. reg. int. reg.
6329 word 4 int. reg. int. reg.
6330 double word 8 int. reg. int. reg.
6332 _Complex small integer <8 int. reg. int. reg.
6333 _Complex word 8 int. reg. int. reg.
6334 _Complex double word 16 memory int. reg.
6336 vector integer <=8 int. reg. FP reg.
6337 vector integer >8 memory memory
6339 float 4 int. reg. FP reg.
6340 double 8 int. reg. FP reg.
6341 long double 16 memory memory
6343 _Complex float 8 memory FP reg.
6344 _Complex double 16 memory FP reg.
6345 _Complex long double 32 memory FP reg.
6347 vector float any memory memory
6349 aggregate any memory memory
6353 64-bit ABI:
6354 size argument return value
6356 small integer <8 int. reg. int. reg.
6357 word 8 int. reg. int. reg.
6358 double word 16 int. reg. int. reg.
6360 _Complex small integer <16 int. reg. int. reg.
6361 _Complex word 16 int. reg. int. reg.
6362 _Complex double word 32 memory int. reg.
6364 vector integer <=16 FP reg. FP reg.
6365 vector integer 16<s<=32 memory FP reg.
6366 vector integer >32 memory memory
6368 float 4 FP reg. FP reg.
6369 double 8 FP reg. FP reg.
6370 long double 16 FP reg. FP reg.
6372 _Complex float 8 FP reg. FP reg.
6373 _Complex double 16 FP reg. FP reg.
6374 _Complex long double 32 memory FP reg.
6376 vector float <=16 FP reg. FP reg.
6377 vector float 16<s<=32 memory FP reg.
6378 vector float >32 memory memory
6380 aggregate <=16 reg. reg.
6381 aggregate 16<s<=32 memory reg.
6382 aggregate >32 memory memory
6386 Note #1: complex floating-point types follow the extended SPARC ABIs as
6387 implemented by the Sun compiler.
6389 Note #2: integral vector types follow the scalar floating-point types
6390 conventions to match what is implemented by the Sun VIS SDK.
6392 Note #3: floating-point vector types follow the aggregate types
6393 conventions. */
6396 /* Maximum number of int regs for args. */
6397 #define SPARC_INT_ARG_MAX 6
6398 /* Maximum number of fp regs for args. */
6399 #define SPARC_FP_ARG_MAX 16
6400 /* Number of words (partially) occupied for a given size in units. */
6401 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6403 /* Handle the INIT_CUMULATIVE_ARGS macro.
6404 Initialize a variable CUM of type CUMULATIVE_ARGS
6405 for a call to a function whose data type is FNTYPE.
6406 For a library call, FNTYPE is 0. */
6408 void
6409 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6411 cum->words = 0;
6412 cum->prototype_p = fntype && prototype_p (fntype);
6413 cum->libcall_p = !fntype;
6416 /* Handle promotion of pointer and integer arguments. */
6418 static machine_mode
6419 sparc_promote_function_mode (const_tree type, machine_mode mode,
6420 int *punsignedp, const_tree, int)
6422 if (type && POINTER_TYPE_P (type))
6424 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6425 return Pmode;
6428 /* Integral arguments are passed as full words, as per the ABI. */
6429 if (GET_MODE_CLASS (mode) == MODE_INT
6430 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6431 return word_mode;
6433 return mode;
6436 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6438 static bool
6439 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6441 return TARGET_ARCH64 ? true : false;
6444 /* Traverse the record TYPE recursively and call FUNC on its fields.
6445 NAMED is true if this is for a named parameter. DATA is passed
6446 to FUNC for each field. OFFSET is the starting position and
6447 PACKED is true if we are inside a packed record. */
6449 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6450 static void
6451 traverse_record_type (const_tree type, bool named, T *data,
6452 HOST_WIDE_INT offset = 0, bool packed = false)
6454 /* The ABI obviously doesn't specify how packed structures are passed.
6455 These are passed in integer regs if possible, otherwise memory. */
6456 if (!packed)
6457 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6458 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6460 packed = true;
6461 break;
6464 /* Walk the real fields, but skip those with no size or a zero size.
6465 ??? Fields with variable offset are handled as having zero offset. */
6466 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6467 if (TREE_CODE (field) == FIELD_DECL)
6469 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6470 continue;
6472 HOST_WIDE_INT bitpos = offset;
6473 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6474 bitpos += int_bit_position (field);
6476 tree field_type = TREE_TYPE (field);
6477 if (TREE_CODE (field_type) == RECORD_TYPE)
6478 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6479 packed);
6480 else
6482 const bool fp_type
6483 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6484 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6485 data);
6490 /* Handle recursive register classifying for structure layout. */
6492 typedef struct
6494 bool fp_regs; /* true if field eligible to FP registers. */
6495 bool fp_regs_in_first_word; /* true if such field in first word. */
6496 } classify_data_t;
6498 /* A subroutine of function_arg_slotno. Classify the field. */
6500 inline void
6501 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6502 classify_data_t *data)
6504 if (fp)
6506 data->fp_regs = true;
6507 if (bitpos < BITS_PER_WORD)
6508 data->fp_regs_in_first_word = true;
6512 /* Compute the slot number to pass an argument in.
6513 Return the slot number or -1 if passing on the stack.
6515 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6516 the preceding args and about the function being called.
6517 MODE is the argument's machine mode.
6518 TYPE is the data type of the argument (as a tree).
6519 This is null for libcalls where that information may
6520 not be available.
6521 NAMED is nonzero if this argument is a named parameter
6522 (otherwise it is an extra parameter matching an ellipsis).
6523 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6524 *PREGNO records the register number to use if scalar type.
6525 *PPADDING records the amount of padding needed in words. */
6527 static int
6528 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6529 const_tree type, bool named, bool incoming,
6530 int *pregno, int *ppadding)
6532 int regbase = (incoming
6533 ? SPARC_INCOMING_INT_ARG_FIRST
6534 : SPARC_OUTGOING_INT_ARG_FIRST);
6535 int slotno = cum->words;
6536 enum mode_class mclass;
6537 int regno;
6539 *ppadding = 0;
6541 if (type && TREE_ADDRESSABLE (type))
6542 return -1;
6544 if (TARGET_ARCH32
6545 && mode == BLKmode
6546 && type
6547 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6548 return -1;
6550 /* For SPARC64, objects requiring 16-byte alignment get it. */
6551 if (TARGET_ARCH64
6552 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6553 && (slotno & 1) != 0)
6554 slotno++, *ppadding = 1;
6556 mclass = GET_MODE_CLASS (mode);
6557 if (type && TREE_CODE (type) == VECTOR_TYPE)
6559 /* Vector types deserve special treatment because they are
6560 polymorphic wrt their mode, depending upon whether VIS
6561 instructions are enabled. */
6562 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6564 /* The SPARC port defines no floating-point vector modes. */
6565 gcc_assert (mode == BLKmode);
6567 else
6569 /* Integral vector types should either have a vector
6570 mode or an integral mode, because we are guaranteed
6571 by pass_by_reference that their size is not greater
6572 than 16 bytes and TImode is 16-byte wide. */
6573 gcc_assert (mode != BLKmode);
6575 /* Vector integers are handled like floats according to
6576 the Sun VIS SDK. */
6577 mclass = MODE_FLOAT;
6581 switch (mclass)
6583 case MODE_FLOAT:
6584 case MODE_COMPLEX_FLOAT:
6585 case MODE_VECTOR_INT:
6586 if (TARGET_ARCH64 && TARGET_FPU && named)
6588 /* If all arg slots are filled, then must pass on stack. */
6589 if (slotno >= SPARC_FP_ARG_MAX)
6590 return -1;
6592 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6593 /* Arguments filling only one single FP register are
6594 right-justified in the outer double FP register. */
6595 if (GET_MODE_SIZE (mode) <= 4)
6596 regno++;
6597 break;
6599 /* fallthrough */
6601 case MODE_INT:
6602 case MODE_COMPLEX_INT:
6603 /* If all arg slots are filled, then must pass on stack. */
6604 if (slotno >= SPARC_INT_ARG_MAX)
6605 return -1;
6607 regno = regbase + slotno;
6608 break;
6610 case MODE_RANDOM:
6611 if (mode == VOIDmode)
6612 /* MODE is VOIDmode when generating the actual call. */
6613 return -1;
6615 gcc_assert (mode == BLKmode);
6617 if (TARGET_ARCH32
6618 || !type
6619 || (TREE_CODE (type) != RECORD_TYPE
6620 && TREE_CODE (type) != VECTOR_TYPE))
6622 /* If all arg slots are filled, then must pass on stack. */
6623 if (slotno >= SPARC_INT_ARG_MAX)
6624 return -1;
6626 regno = regbase + slotno;
6628 else /* TARGET_ARCH64 && type */
6630 /* If all arg slots are filled, then must pass on stack. */
6631 if (slotno >= SPARC_FP_ARG_MAX)
6632 return -1;
6634 if (TREE_CODE (type) == RECORD_TYPE)
6636 classify_data_t data = { false, false };
6637 traverse_record_type<classify_data_t, classify_registers>
6638 (type, named, &data);
6640 if (data.fp_regs)
6642 /* If all FP slots are filled except for the last one and
6643 there is no FP field in the first word, then must pass
6644 on stack. */
6645 if (slotno >= SPARC_FP_ARG_MAX - 1
6646 && !data.fp_regs_in_first_word)
6647 return -1;
6649 else
6651 /* If all int slots are filled, then must pass on stack. */
6652 if (slotno >= SPARC_INT_ARG_MAX)
6653 return -1;
6657 /* PREGNO isn't set since both int and FP regs can be used. */
6658 return slotno;
6660 break;
6662 default :
6663 gcc_unreachable ();
6666 *pregno = regno;
6667 return slotno;
6670 /* Handle recursive register counting/assigning for structure layout. */
6672 typedef struct
6674 int slotno; /* slot number of the argument. */
6675 int regbase; /* regno of the base register. */
6676 int intoffset; /* offset of the first pending integer field. */
6677 int nregs; /* number of words passed in registers. */
6678 bool stack; /* true if part of the argument is on the stack. */
6679 rtx ret; /* return expression being built. */
6680 } assign_data_t;
6682 /* A subroutine of function_arg_record_value. Compute the number of integer
6683 registers to be assigned between PARMS->intoffset and BITPOS. Return
6684 true if at least one integer register is assigned or false otherwise. */
6686 static bool
6687 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6689 if (data->intoffset < 0)
6690 return false;
6692 const int intoffset = data->intoffset;
6693 data->intoffset = -1;
6695 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6696 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6697 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6698 int nregs = (endbit - startbit) / BITS_PER_WORD;
6700 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6702 nregs = SPARC_INT_ARG_MAX - this_slotno;
6704 /* We need to pass this field (partly) on the stack. */
6705 data->stack = 1;
6708 if (nregs <= 0)
6709 return false;
6711 *pnregs = nregs;
6712 return true;
6715 /* A subroutine of function_arg_record_value. Compute the number and the mode
6716 of the FP registers to be assigned for FIELD. Return true if at least one
6717 FP register is assigned or false otherwise. */
6719 static bool
6720 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6721 assign_data_t *data,
6722 int *pnregs, machine_mode *pmode)
6724 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6725 machine_mode mode = DECL_MODE (field);
6726 int nregs, nslots;
6728 /* Slots are counted as words while regs are counted as having the size of
6729 the (inner) mode. */
6730 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6732 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6733 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6735 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6737 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6738 nregs = 2;
6740 else
6741 nregs = 1;
6743 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
6745 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
6747 nslots = SPARC_FP_ARG_MAX - this_slotno;
6748 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
6750 /* We need to pass this field (partly) on the stack. */
6751 data->stack = 1;
6753 if (nregs <= 0)
6754 return false;
6757 *pnregs = nregs;
6758 *pmode = mode;
6759 return true;
6762 /* A subroutine of function_arg_record_value. Count the number of registers
6763 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
6765 inline void
6766 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6767 assign_data_t *data)
6769 if (fp)
6771 int nregs;
6772 machine_mode mode;
6774 if (compute_int_layout (bitpos, data, &nregs))
6775 data->nregs += nregs;
6777 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
6778 data->nregs += nregs;
6780 else
6782 if (data->intoffset < 0)
6783 data->intoffset = bitpos;
6787 /* A subroutine of function_arg_record_value. Assign the bits of the
6788 structure between PARMS->intoffset and BITPOS to integer registers. */
6790 static void
6791 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
6793 int intoffset = data->intoffset;
6794 machine_mode mode;
6795 int nregs;
6797 if (!compute_int_layout (bitpos, data, &nregs))
6798 return;
6800 /* If this is the trailing part of a word, only load that much into
6801 the register. Otherwise load the whole register. Note that in
6802 the latter case we may pick up unwanted bits. It's not a problem
6803 at the moment but may wish to revisit. */
6804 if (intoffset % BITS_PER_WORD != 0)
6805 mode = smallest_int_mode_for_size (BITS_PER_WORD
6806 - intoffset % BITS_PER_WORD);
6807 else
6808 mode = word_mode;
6810 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6811 unsigned int regno = data->regbase + this_slotno;
6812 intoffset /= BITS_PER_UNIT;
6816 rtx reg = gen_rtx_REG (mode, regno);
6817 XVECEXP (data->ret, 0, data->stack + data->nregs)
6818 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6819 data->nregs += 1;
6820 mode = word_mode;
6821 regno += 1;
6822 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
6824 while (--nregs > 0);
6827 /* A subroutine of function_arg_record_value. Assign FIELD at position
6828 BITPOS to FP registers. */
6830 static void
6831 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
6832 assign_data_t *data)
6834 int nregs;
6835 machine_mode mode;
6837 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
6838 return;
6840 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6841 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6842 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6843 regno++;
6844 int pos = bitpos / BITS_PER_UNIT;
6848 rtx reg = gen_rtx_REG (mode, regno);
6849 XVECEXP (data->ret, 0, data->stack + data->nregs)
6850 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6851 data->nregs += 1;
6852 regno += GET_MODE_SIZE (mode) / 4;
6853 pos += GET_MODE_SIZE (mode);
6855 while (--nregs > 0);
6858 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
6859 the structure between PARMS->intoffset and BITPOS to registers. */
6861 inline void
6862 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6863 assign_data_t *data)
6865 if (fp)
6867 assign_int_registers (bitpos, data);
6869 assign_fp_registers (field, bitpos, data);
6871 else
6873 if (data->intoffset < 0)
6874 data->intoffset = bitpos;
6878 /* Used by function_arg and sparc_function_value_1 to implement the complex
6879 conventions of the 64-bit ABI for passing and returning structures.
6880 Return an expression valid as a return value for the FUNCTION_ARG
6881 and TARGET_FUNCTION_VALUE.
6883 TYPE is the data type of the argument (as a tree).
6884 This is null for libcalls where that information may
6885 not be available.
6886 MODE is the argument's machine mode.
6887 SLOTNO is the index number of the argument's slot in the parameter array.
6888 NAMED is true if this argument is a named parameter
6889 (otherwise it is an extra parameter matching an ellipsis).
6890 REGBASE is the regno of the base register for the parameter array. */
6892 static rtx
6893 function_arg_record_value (const_tree type, machine_mode mode,
6894 int slotno, bool named, int regbase)
6896 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6897 assign_data_t data;
6898 int nregs;
6900 data.slotno = slotno;
6901 data.regbase = regbase;
6903 /* Count how many registers we need. */
6904 data.nregs = 0;
6905 data.intoffset = 0;
6906 data.stack = false;
6907 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
6909 /* Take into account pending integer fields. */
6910 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
6911 data.nregs += nregs;
6913 /* Allocate the vector and handle some annoying special cases. */
6914 nregs = data.nregs;
6916 if (nregs == 0)
6918 /* ??? Empty structure has no value? Duh? */
6919 if (typesize <= 0)
6921 /* Though there's nothing really to store, return a word register
6922 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6923 leads to breakage due to the fact that there are zero bytes to
6924 load. */
6925 return gen_rtx_REG (mode, regbase);
6928 /* ??? C++ has structures with no fields, and yet a size. Give up
6929 for now and pass everything back in integer registers. */
6930 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6931 if (nregs + slotno > SPARC_INT_ARG_MAX)
6932 nregs = SPARC_INT_ARG_MAX - slotno;
6935 gcc_assert (nregs > 0);
6937 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
6939 /* If at least one field must be passed on the stack, generate
6940 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6941 also be passed on the stack. We can't do much better because the
6942 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6943 of structures for which the fields passed exclusively in registers
6944 are not at the beginning of the structure. */
6945 if (data.stack)
6946 XVECEXP (data.ret, 0, 0)
6947 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6949 /* Assign the registers. */
6950 data.nregs = 0;
6951 data.intoffset = 0;
6952 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
6954 /* Assign pending integer fields. */
6955 assign_int_registers (typesize * BITS_PER_UNIT, &data);
6957 gcc_assert (data.nregs == nregs);
6959 return data.ret;
6962 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6963 of the 64-bit ABI for passing and returning unions.
6964 Return an expression valid as a return value for the FUNCTION_ARG
6965 and TARGET_FUNCTION_VALUE.
6967 SIZE is the size in bytes of the union.
6968 MODE is the argument's machine mode.
6969 REGNO is the hard register the union will be passed in. */
6971 static rtx
6972 function_arg_union_value (int size, machine_mode mode, int slotno,
6973 int regno)
6975 int nwords = CEIL_NWORDS (size), i;
6976 rtx regs;
6978 /* See comment in previous function for empty structures. */
6979 if (nwords == 0)
6980 return gen_rtx_REG (mode, regno);
6982 if (slotno == SPARC_INT_ARG_MAX - 1)
6983 nwords = 1;
6985 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6987 for (i = 0; i < nwords; i++)
6989 /* Unions are passed left-justified. */
6990 XVECEXP (regs, 0, i)
6991 = gen_rtx_EXPR_LIST (VOIDmode,
6992 gen_rtx_REG (word_mode, regno),
6993 GEN_INT (UNITS_PER_WORD * i));
6994 regno++;
6997 return regs;
7000 /* Used by function_arg and sparc_function_value_1 to implement the conventions
7001 for passing and returning BLKmode vectors.
7002 Return an expression valid as a return value for the FUNCTION_ARG
7003 and TARGET_FUNCTION_VALUE.
7005 SIZE is the size in bytes of the vector.
7006 REGNO is the FP hard register the vector will be passed in. */
7008 static rtx
7009 function_arg_vector_value (int size, int regno)
7011 const int nregs = MAX (1, size / 8);
7012 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
7014 if (size < 8)
7015 XVECEXP (regs, 0, 0)
7016 = gen_rtx_EXPR_LIST (VOIDmode,
7017 gen_rtx_REG (SImode, regno),
7018 const0_rtx);
7019 else
7020 for (int i = 0; i < nregs; i++)
7021 XVECEXP (regs, 0, i)
7022 = gen_rtx_EXPR_LIST (VOIDmode,
7023 gen_rtx_REG (DImode, regno + 2*i),
7024 GEN_INT (i*8));
7026 return regs;
7029 /* Determine where to put an argument to a function.
7030 Value is zero to push the argument on the stack,
7031 or a hard register in which to store the argument.
7033 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7034 the preceding args and about the function being called.
7035 MODE is the argument's machine mode.
7036 TYPE is the data type of the argument (as a tree).
7037 This is null for libcalls where that information may
7038 not be available.
7039 NAMED is true if this argument is a named parameter
7040 (otherwise it is an extra parameter matching an ellipsis).
7041 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7042 TARGET_FUNCTION_INCOMING_ARG. */
7044 static rtx
7045 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
7046 const_tree type, bool named, bool incoming)
7048 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7050 int regbase = (incoming
7051 ? SPARC_INCOMING_INT_ARG_FIRST
7052 : SPARC_OUTGOING_INT_ARG_FIRST);
7053 int slotno, regno, padding;
7054 enum mode_class mclass = GET_MODE_CLASS (mode);
7056 slotno = function_arg_slotno (cum, mode, type, named, incoming,
7057 &regno, &padding);
7058 if (slotno == -1)
7059 return 0;
7061 /* Vector types deserve special treatment because they are polymorphic wrt
7062 their mode, depending upon whether VIS instructions are enabled. */
7063 if (type && TREE_CODE (type) == VECTOR_TYPE)
7065 HOST_WIDE_INT size = int_size_in_bytes (type);
7066 gcc_assert ((TARGET_ARCH32 && size <= 8)
7067 || (TARGET_ARCH64 && size <= 16));
7069 if (mode == BLKmode)
7070 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
7072 mclass = MODE_FLOAT;
7075 if (TARGET_ARCH32)
7076 return gen_rtx_REG (mode, regno);
7078 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7079 and are promoted to registers if possible. */
7080 if (type && TREE_CODE (type) == RECORD_TYPE)
7082 HOST_WIDE_INT size = int_size_in_bytes (type);
7083 gcc_assert (size <= 16);
7085 return function_arg_record_value (type, mode, slotno, named, regbase);
7088 /* Unions up to 16 bytes in size are passed in integer registers. */
7089 else if (type && TREE_CODE (type) == UNION_TYPE)
7091 HOST_WIDE_INT size = int_size_in_bytes (type);
7092 gcc_assert (size <= 16);
7094 return function_arg_union_value (size, mode, slotno, regno);
7097 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7098 but also have the slot allocated for them.
7099 If no prototype is in scope fp values in register slots get passed
7100 in two places, either fp regs and int regs or fp regs and memory. */
7101 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7102 && SPARC_FP_REG_P (regno))
7104 rtx reg = gen_rtx_REG (mode, regno);
7105 if (cum->prototype_p || cum->libcall_p)
7106 return reg;
7107 else
7109 rtx v0, v1;
7111 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7113 int intreg;
7115 /* On incoming, we don't need to know that the value
7116 is passed in %f0 and %i0, and it confuses other parts
7117 causing needless spillage even on the simplest cases. */
7118 if (incoming)
7119 return reg;
7121 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7122 + (regno - SPARC_FP_ARG_FIRST) / 2);
7124 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7125 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7126 const0_rtx);
7127 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7129 else
7131 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7132 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7133 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7138 /* All other aggregate types are passed in an integer register in a mode
7139 corresponding to the size of the type. */
7140 else if (type && AGGREGATE_TYPE_P (type))
7142 HOST_WIDE_INT size = int_size_in_bytes (type);
7143 gcc_assert (size <= 16);
7145 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7148 return gen_rtx_REG (mode, regno);
7151 /* Handle the TARGET_FUNCTION_ARG target hook. */
7153 static rtx
7154 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
7155 const_tree type, bool named)
7157 return sparc_function_arg_1 (cum, mode, type, named, false);
7160 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7162 static rtx
7163 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
7164 const_tree type, bool named)
7166 return sparc_function_arg_1 (cum, mode, type, named, true);
7169 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7171 static unsigned int
7172 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7174 return ((TARGET_ARCH64
7175 && (GET_MODE_ALIGNMENT (mode) == 128
7176 || (type && TYPE_ALIGN (type) == 128)))
7177 ? 128
7178 : PARM_BOUNDARY);
7181 /* For an arg passed partly in registers and partly in memory,
7182 this is the number of bytes of registers used.
7183 For args passed entirely in registers or entirely in memory, zero.
7185 Any arg that starts in the first 6 regs but won't entirely fit in them
7186 needs partial registers on v8. On v9, structures with integer
7187 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7188 values that begin in the last fp reg [where "last fp reg" varies with the
7189 mode] will be split between that reg and memory. */
7191 static int
7192 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7193 tree type, bool named)
7195 int slotno, regno, padding;
7197 /* We pass false for incoming here, it doesn't matter. */
7198 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7199 false, &regno, &padding);
7201 if (slotno == -1)
7202 return 0;
7204 if (TARGET_ARCH32)
7206 if ((slotno + (mode == BLKmode
7207 ? CEIL_NWORDS (int_size_in_bytes (type))
7208 : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7209 > SPARC_INT_ARG_MAX)
7210 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7212 else
7214 /* We are guaranteed by pass_by_reference that the size of the
7215 argument is not greater than 16 bytes, so we only need to return
7216 one word if the argument is partially passed in registers. */
7218 if (type && AGGREGATE_TYPE_P (type))
7220 int size = int_size_in_bytes (type);
7222 if (size > UNITS_PER_WORD
7223 && (slotno == SPARC_INT_ARG_MAX - 1
7224 || slotno == SPARC_FP_ARG_MAX - 1))
7225 return UNITS_PER_WORD;
7227 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7228 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7229 && ! (TARGET_FPU && named)))
7231 /* The complex types are passed as packed types. */
7232 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7233 && slotno == SPARC_INT_ARG_MAX - 1)
7234 return UNITS_PER_WORD;
7236 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7238 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7239 > SPARC_FP_ARG_MAX)
7240 return UNITS_PER_WORD;
7244 return 0;
7247 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7248 Specify whether to pass the argument by reference. */
7250 static bool
7251 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7252 machine_mode mode, const_tree type,
7253 bool named ATTRIBUTE_UNUSED)
7255 if (TARGET_ARCH32)
7256 /* Original SPARC 32-bit ABI says that structures and unions,
7257 and quad-precision floats are passed by reference. For Pascal,
7258 also pass arrays by reference. All other base types are passed
7259 in registers.
7261 Extended ABI (as implemented by the Sun compiler) says that all
7262 complex floats are passed by reference. Pass complex integers
7263 in registers up to 8 bytes. More generally, enforce the 2-word
7264 cap for passing arguments in registers.
7266 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7267 integers are passed like floats of the same size, that is in
7268 registers up to 8 bytes. Pass all vector floats by reference
7269 like structure and unions. */
7270 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7271 || mode == SCmode
7272 /* Catch CDImode, TFmode, DCmode and TCmode. */
7273 || GET_MODE_SIZE (mode) > 8
7274 || (type
7275 && TREE_CODE (type) == VECTOR_TYPE
7276 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7277 else
7278 /* Original SPARC 64-bit ABI says that structures and unions
7279 smaller than 16 bytes are passed in registers, as well as
7280 all other base types.
7282 Extended ABI (as implemented by the Sun compiler) says that
7283 complex floats are passed in registers up to 16 bytes. Pass
7284 all complex integers in registers up to 16 bytes. More generally,
7285 enforce the 2-word cap for passing arguments in registers.
7287 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7288 integers are passed like floats of the same size, that is in
7289 registers (up to 16 bytes). Pass all vector floats like structure
7290 and unions. */
7291 return ((type
7292 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7293 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7294 /* Catch CTImode and TCmode. */
7295 || GET_MODE_SIZE (mode) > 16);
7298 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7299 Update the data in CUM to advance over an argument
7300 of mode MODE and data type TYPE.
7301 TYPE is null for libcalls where that information may not be available. */
7303 static void
7304 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7305 const_tree type, bool named)
7307 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7308 int regno, padding;
7310 /* We pass false for incoming here, it doesn't matter. */
7311 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7313 /* If argument requires leading padding, add it. */
7314 cum->words += padding;
7316 if (TARGET_ARCH32)
7317 cum->words += (mode == BLKmode
7318 ? CEIL_NWORDS (int_size_in_bytes (type))
7319 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7320 else
7322 if (type && AGGREGATE_TYPE_P (type))
7324 int size = int_size_in_bytes (type);
7326 if (size <= 8)
7327 ++cum->words;
7328 else if (size <= 16)
7329 cum->words += 2;
7330 else /* passed by reference */
7331 ++cum->words;
7333 else
7334 cum->words += (mode == BLKmode
7335 ? CEIL_NWORDS (int_size_in_bytes (type))
7336 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7340 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7341 are always stored left shifted in their argument slot. */
7343 static pad_direction
7344 sparc_function_arg_padding (machine_mode mode, const_tree type)
7346 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7347 return PAD_UPWARD;
7349 /* Fall back to the default. */
7350 return default_function_arg_padding (mode, type);
7353 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7354 Specify whether to return the return value in memory. */
7356 static bool
7357 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7359 if (TARGET_ARCH32)
7360 /* Original SPARC 32-bit ABI says that structures and unions,
7361 and quad-precision floats are returned in memory. All other
7362 base types are returned in registers.
7364 Extended ABI (as implemented by the Sun compiler) says that
7365 all complex floats are returned in registers (8 FP registers
7366 at most for '_Complex long double'). Return all complex integers
7367 in registers (4 at most for '_Complex long long').
7369 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7370 integers are returned like floats of the same size, that is in
7371 registers up to 8 bytes and in memory otherwise. Return all
7372 vector floats in memory like structure and unions; note that
7373 they always have BLKmode like the latter. */
7374 return (TYPE_MODE (type) == BLKmode
7375 || TYPE_MODE (type) == TFmode
7376 || (TREE_CODE (type) == VECTOR_TYPE
7377 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7378 else
7379 /* Original SPARC 64-bit ABI says that structures and unions
7380 smaller than 32 bytes are returned in registers, as well as
7381 all other base types.
7383 Extended ABI (as implemented by the Sun compiler) says that all
7384 complex floats are returned in registers (8 FP registers at most
7385 for '_Complex long double'). Return all complex integers in
7386 registers (4 at most for '_Complex TItype').
7388 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7389 integers are returned like floats of the same size, that is in
7390 registers. Return all vector floats like structure and unions;
7391 note that they always have BLKmode like the latter. */
7392 return (TYPE_MODE (type) == BLKmode
7393 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7396 /* Handle the TARGET_STRUCT_VALUE target hook.
7397 Return where to find the structure return value address. */
7399 static rtx
7400 sparc_struct_value_rtx (tree fndecl, int incoming)
7402 if (TARGET_ARCH64)
7403 return 0;
7404 else
7406 rtx mem;
7408 if (incoming)
7409 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7410 STRUCT_VALUE_OFFSET));
7411 else
7412 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7413 STRUCT_VALUE_OFFSET));
7415 /* Only follow the SPARC ABI for fixed-size structure returns.
7416 Variable size structure returns are handled per the normal
7417 procedures in GCC. This is enabled by -mstd-struct-return */
7418 if (incoming == 2
7419 && sparc_std_struct_return
7420 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7421 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7423 /* We must check and adjust the return address, as it is optional
7424 as to whether the return object is really provided. */
7425 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7426 rtx scratch = gen_reg_rtx (SImode);
7427 rtx_code_label *endlab = gen_label_rtx ();
7429 /* Calculate the return object size. */
7430 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7431 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7432 /* Construct a temporary return value. */
7433 rtx temp_val
7434 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7436 /* Implement SPARC 32-bit psABI callee return struct checking:
7438 Fetch the instruction where we will return to and see if
7439 it's an unimp instruction (the most significant 10 bits
7440 will be zero). */
7441 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7442 plus_constant (Pmode,
7443 ret_reg, 8)));
7444 /* Assume the size is valid and pre-adjust. */
7445 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7446 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7447 0, endlab);
7448 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7449 /* Write the address of the memory pointed to by temp_val into
7450 the memory pointed to by mem. */
7451 emit_move_insn (mem, XEXP (temp_val, 0));
7452 emit_label (endlab);
7455 return mem;
7459 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7460 For v9, function return values are subject to the same rules as arguments,
7461 except that up to 32 bytes may be returned in registers. */
7463 static rtx
7464 sparc_function_value_1 (const_tree type, machine_mode mode,
7465 bool outgoing)
7467 /* Beware that the two values are swapped here wrt function_arg. */
7468 int regbase = (outgoing
7469 ? SPARC_INCOMING_INT_ARG_FIRST
7470 : SPARC_OUTGOING_INT_ARG_FIRST);
7471 enum mode_class mclass = GET_MODE_CLASS (mode);
7472 int regno;
7474 /* Vector types deserve special treatment because they are polymorphic wrt
7475 their mode, depending upon whether VIS instructions are enabled. */
7476 if (type && TREE_CODE (type) == VECTOR_TYPE)
7478 HOST_WIDE_INT size = int_size_in_bytes (type);
7479 gcc_assert ((TARGET_ARCH32 && size <= 8)
7480 || (TARGET_ARCH64 && size <= 32));
7482 if (mode == BLKmode)
7483 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7485 mclass = MODE_FLOAT;
7488 if (TARGET_ARCH64 && type)
7490 /* Structures up to 32 bytes in size are returned in registers. */
7491 if (TREE_CODE (type) == RECORD_TYPE)
7493 HOST_WIDE_INT size = int_size_in_bytes (type);
7494 gcc_assert (size <= 32);
7496 return function_arg_record_value (type, mode, 0, 1, regbase);
7499 /* Unions up to 32 bytes in size are returned in integer registers. */
7500 else if (TREE_CODE (type) == UNION_TYPE)
7502 HOST_WIDE_INT size = int_size_in_bytes (type);
7503 gcc_assert (size <= 32);
7505 return function_arg_union_value (size, mode, 0, regbase);
7508 /* Objects that require it are returned in FP registers. */
7509 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7512 /* All other aggregate types are returned in an integer register in a
7513 mode corresponding to the size of the type. */
7514 else if (AGGREGATE_TYPE_P (type))
7516 /* All other aggregate types are passed in an integer register
7517 in a mode corresponding to the size of the type. */
7518 HOST_WIDE_INT size = int_size_in_bytes (type);
7519 gcc_assert (size <= 32);
7521 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7523 /* ??? We probably should have made the same ABI change in
7524 3.4.0 as the one we made for unions. The latter was
7525 required by the SCD though, while the former is not
7526 specified, so we favored compatibility and efficiency.
7528 Now we're stuck for aggregates larger than 16 bytes,
7529 because OImode vanished in the meantime. Let's not
7530 try to be unduly clever, and simply follow the ABI
7531 for unions in that case. */
7532 if (mode == BLKmode)
7533 return function_arg_union_value (size, mode, 0, regbase);
7534 else
7535 mclass = MODE_INT;
7538 /* We should only have pointer and integer types at this point. This
7539 must match sparc_promote_function_mode. */
7540 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7541 mode = word_mode;
7544 /* We should only have pointer and integer types at this point, except with
7545 -freg-struct-return. This must match sparc_promote_function_mode. */
7546 else if (TARGET_ARCH32
7547 && !(type && AGGREGATE_TYPE_P (type))
7548 && mclass == MODE_INT
7549 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7550 mode = word_mode;
7552 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7553 regno = SPARC_FP_ARG_FIRST;
7554 else
7555 regno = regbase;
7557 return gen_rtx_REG (mode, regno);
7560 /* Handle TARGET_FUNCTION_VALUE.
7561 On the SPARC, the value is found in the first "output" register, but the
7562 called function leaves it in the first "input" register. */
7564 static rtx
7565 sparc_function_value (const_tree valtype,
7566 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7567 bool outgoing)
7569 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7572 /* Handle TARGET_LIBCALL_VALUE. */
7574 static rtx
7575 sparc_libcall_value (machine_mode mode,
7576 const_rtx fun ATTRIBUTE_UNUSED)
7578 return sparc_function_value_1 (NULL_TREE, mode, false);
7581 /* Handle FUNCTION_VALUE_REGNO_P.
7582 On the SPARC, the first "output" reg is used for integer values, and the
7583 first floating point register is used for floating point values. */
7585 static bool
7586 sparc_function_value_regno_p (const unsigned int regno)
7588 return (regno == 8 || (TARGET_FPU && regno == 32));
7591 /* Do what is necessary for `va_start'. We look at the current function
7592 to determine if stdarg or varargs is used and return the address of
7593 the first unnamed parameter. */
7595 static rtx
7596 sparc_builtin_saveregs (void)
7598 int first_reg = crtl->args.info.words;
7599 rtx address;
7600 int regno;
7602 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7603 emit_move_insn (gen_rtx_MEM (word_mode,
7604 gen_rtx_PLUS (Pmode,
7605 frame_pointer_rtx,
7606 GEN_INT (FIRST_PARM_OFFSET (0)
7607 + (UNITS_PER_WORD
7608 * regno)))),
7609 gen_rtx_REG (word_mode,
7610 SPARC_INCOMING_INT_ARG_FIRST + regno));
7612 address = gen_rtx_PLUS (Pmode,
7613 frame_pointer_rtx,
7614 GEN_INT (FIRST_PARM_OFFSET (0)
7615 + UNITS_PER_WORD * first_reg));
7617 return address;
7620 /* Implement `va_start' for stdarg. */
7622 static void
7623 sparc_va_start (tree valist, rtx nextarg)
7625 nextarg = expand_builtin_saveregs ();
7626 std_expand_builtin_va_start (valist, nextarg);
7629 /* Implement `va_arg' for stdarg. */
7631 static tree
7632 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7633 gimple_seq *post_p)
7635 HOST_WIDE_INT size, rsize, align;
7636 tree addr, incr;
7637 bool indirect;
7638 tree ptrtype = build_pointer_type (type);
7640 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7642 indirect = true;
7643 size = rsize = UNITS_PER_WORD;
7644 align = 0;
7646 else
7648 indirect = false;
7649 size = int_size_in_bytes (type);
7650 rsize = ROUND_UP (size, UNITS_PER_WORD);
7651 align = 0;
7653 if (TARGET_ARCH64)
7655 /* For SPARC64, objects requiring 16-byte alignment get it. */
7656 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7657 align = 2 * UNITS_PER_WORD;
7659 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7660 are left-justified in their slots. */
7661 if (AGGREGATE_TYPE_P (type))
7663 if (size == 0)
7664 size = rsize = UNITS_PER_WORD;
7665 else
7666 size = rsize;
7671 incr = valist;
7672 if (align)
7674 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7675 incr = fold_convert (sizetype, incr);
7676 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7677 size_int (-align));
7678 incr = fold_convert (ptr_type_node, incr);
7681 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7682 addr = incr;
7684 if (BYTES_BIG_ENDIAN && size < rsize)
7685 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7687 if (indirect)
7689 addr = fold_convert (build_pointer_type (ptrtype), addr);
7690 addr = build_va_arg_indirect_ref (addr);
7693 /* If the address isn't aligned properly for the type, we need a temporary.
7694 FIXME: This is inefficient, usually we can do this in registers. */
7695 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7697 tree tmp = create_tmp_var (type, "va_arg_tmp");
7698 tree dest_addr = build_fold_addr_expr (tmp);
7699 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7700 3, dest_addr, addr, size_int (rsize));
7701 TREE_ADDRESSABLE (tmp) = 1;
7702 gimplify_and_add (copy, pre_p);
7703 addr = dest_addr;
7706 else
7707 addr = fold_convert (ptrtype, addr);
7709 incr = fold_build_pointer_plus_hwi (incr, rsize);
7710 gimplify_assign (valist, incr, post_p);
7712 return build_va_arg_indirect_ref (addr);
7715 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7716 Specify whether the vector mode is supported by the hardware. */
7718 static bool
7719 sparc_vector_mode_supported_p (machine_mode mode)
7721 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7724 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7726 static machine_mode
7727 sparc_preferred_simd_mode (scalar_mode mode)
7729 if (TARGET_VIS)
7730 switch (mode)
7732 case E_SImode:
7733 return V2SImode;
7734 case E_HImode:
7735 return V4HImode;
7736 case E_QImode:
7737 return V8QImode;
7739 default:;
7742 return word_mode;
7745 /* Return the string to output an unconditional branch to LABEL, which is
7746 the operand number of the label.
7748 DEST is the destination insn (i.e. the label), INSN is the source. */
7750 const char *
7751 output_ubranch (rtx dest, rtx_insn *insn)
7753 static char string[64];
7754 bool v9_form = false;
7755 int delta;
7756 char *p;
7758 /* Even if we are trying to use cbcond for this, evaluate
7759 whether we can use V9 branches as our backup plan. */
7761 delta = 5000000;
7762 if (INSN_ADDRESSES_SET_P ())
7763 delta = (INSN_ADDRESSES (INSN_UID (dest))
7764 - INSN_ADDRESSES (INSN_UID (insn)));
7766 /* Leave some instructions for "slop". */
7767 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7768 v9_form = true;
7770 if (TARGET_CBCOND)
7772 bool emit_nop = emit_cbcond_nop (insn);
7773 bool far = false;
7774 const char *rval;
7776 if (delta < -500 || delta > 500)
7777 far = true;
7779 if (far)
7781 if (v9_form)
7782 rval = "ba,a,pt\t%%xcc, %l0";
7783 else
7784 rval = "b,a\t%l0";
7786 else
7788 if (emit_nop)
7789 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7790 else
7791 rval = "cwbe\t%%g0, %%g0, %l0";
7793 return rval;
7796 if (v9_form)
7797 strcpy (string, "ba%*,pt\t%%xcc, ");
7798 else
7799 strcpy (string, "b%*\t");
7801 p = strchr (string, '\0');
7802 *p++ = '%';
7803 *p++ = 'l';
7804 *p++ = '0';
7805 *p++ = '%';
7806 *p++ = '(';
7807 *p = '\0';
7809 return string;
7812 /* Return the string to output a conditional branch to LABEL, which is
7813 the operand number of the label. OP is the conditional expression.
7814 XEXP (OP, 0) is assumed to be a condition code register (integer or
7815 floating point) and its mode specifies what kind of comparison we made.
7817 DEST is the destination insn (i.e. the label), INSN is the source.
7819 REVERSED is nonzero if we should reverse the sense of the comparison.
7821 ANNUL is nonzero if we should generate an annulling branch. */
7823 const char *
7824 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7825 rtx_insn *insn)
7827 static char string[64];
7828 enum rtx_code code = GET_CODE (op);
7829 rtx cc_reg = XEXP (op, 0);
7830 machine_mode mode = GET_MODE (cc_reg);
7831 const char *labelno, *branch;
7832 int spaces = 8, far;
7833 char *p;
7835 /* v9 branches are limited to +-1MB. If it is too far away,
7836 change
7838 bne,pt %xcc, .LC30
7842 be,pn %xcc, .+12
7844 ba .LC30
7848 fbne,a,pn %fcc2, .LC29
7852 fbe,pt %fcc2, .+16
7854 ba .LC29 */
7856 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7857 if (reversed ^ far)
7859 /* Reversal of FP compares takes care -- an ordered compare
7860 becomes an unordered compare and vice versa. */
7861 if (mode == CCFPmode || mode == CCFPEmode)
7862 code = reverse_condition_maybe_unordered (code);
7863 else
7864 code = reverse_condition (code);
7867 /* Start by writing the branch condition. */
7868 if (mode == CCFPmode || mode == CCFPEmode)
7870 switch (code)
7872 case NE:
7873 branch = "fbne";
7874 break;
7875 case EQ:
7876 branch = "fbe";
7877 break;
7878 case GE:
7879 branch = "fbge";
7880 break;
7881 case GT:
7882 branch = "fbg";
7883 break;
7884 case LE:
7885 branch = "fble";
7886 break;
7887 case LT:
7888 branch = "fbl";
7889 break;
7890 case UNORDERED:
7891 branch = "fbu";
7892 break;
7893 case ORDERED:
7894 branch = "fbo";
7895 break;
7896 case UNGT:
7897 branch = "fbug";
7898 break;
7899 case UNLT:
7900 branch = "fbul";
7901 break;
7902 case UNEQ:
7903 branch = "fbue";
7904 break;
7905 case UNGE:
7906 branch = "fbuge";
7907 break;
7908 case UNLE:
7909 branch = "fbule";
7910 break;
7911 case LTGT:
7912 branch = "fblg";
7913 break;
7914 default:
7915 gcc_unreachable ();
7918 /* ??? !v9: FP branches cannot be preceded by another floating point
7919 insn. Because there is currently no concept of pre-delay slots,
7920 we can fix this only by always emitting a nop before a floating
7921 point branch. */
7923 string[0] = '\0';
7924 if (! TARGET_V9)
7925 strcpy (string, "nop\n\t");
7926 strcat (string, branch);
7928 else
7930 switch (code)
7932 case NE:
7933 if (mode == CCVmode || mode == CCXVmode)
7934 branch = "bvs";
7935 else
7936 branch = "bne";
7937 break;
7938 case EQ:
7939 if (mode == CCVmode || mode == CCXVmode)
7940 branch = "bvc";
7941 else
7942 branch = "be";
7943 break;
7944 case GE:
7945 if (mode == CCNZmode || mode == CCXNZmode)
7946 branch = "bpos";
7947 else
7948 branch = "bge";
7949 break;
7950 case GT:
7951 branch = "bg";
7952 break;
7953 case LE:
7954 branch = "ble";
7955 break;
7956 case LT:
7957 if (mode == CCNZmode || mode == CCXNZmode)
7958 branch = "bneg";
7959 else
7960 branch = "bl";
7961 break;
7962 case GEU:
7963 branch = "bgeu";
7964 break;
7965 case GTU:
7966 branch = "bgu";
7967 break;
7968 case LEU:
7969 branch = "bleu";
7970 break;
7971 case LTU:
7972 branch = "blu";
7973 break;
7974 default:
7975 gcc_unreachable ();
7977 strcpy (string, branch);
7979 spaces -= strlen (branch);
7980 p = strchr (string, '\0');
7982 /* Now add the annulling, the label, and a possible noop. */
7983 if (annul && ! far)
7985 strcpy (p, ",a");
7986 p += 2;
7987 spaces -= 2;
7990 if (TARGET_V9)
7992 rtx note;
7993 int v8 = 0;
7995 if (! far && insn && INSN_ADDRESSES_SET_P ())
7997 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7998 - INSN_ADDRESSES (INSN_UID (insn)));
7999 /* Leave some instructions for "slop". */
8000 if (delta < -260000 || delta >= 260000)
8001 v8 = 1;
8004 switch (mode)
8006 case E_CCmode:
8007 case E_CCNZmode:
8008 case E_CCCmode:
8009 case E_CCVmode:
8010 labelno = "%%icc, ";
8011 if (v8)
8012 labelno = "";
8013 break;
8014 case E_CCXmode:
8015 case E_CCXNZmode:
8016 case E_CCXCmode:
8017 case E_CCXVmode:
8018 labelno = "%%xcc, ";
8019 gcc_assert (!v8);
8020 break;
8021 case E_CCFPmode:
8022 case E_CCFPEmode:
8024 static char v9_fcc_labelno[] = "%%fccX, ";
8025 /* Set the char indicating the number of the fcc reg to use. */
8026 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8027 labelno = v9_fcc_labelno;
8028 if (v8)
8030 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8031 labelno = "";
8034 break;
8035 default:
8036 gcc_unreachable ();
8039 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8041 strcpy (p,
8042 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8043 >= profile_probability::even ()) ^ far)
8044 ? ",pt" : ",pn");
8045 p += 3;
8046 spaces -= 3;
8049 else
8050 labelno = "";
8052 if (spaces > 0)
8053 *p++ = '\t';
8054 else
8055 *p++ = ' ';
8056 strcpy (p, labelno);
8057 p = strchr (p, '\0');
8058 if (far)
8060 strcpy (p, ".+12\n\t nop\n\tb\t");
8061 /* Skip the next insn if requested or
8062 if we know that it will be a nop. */
8063 if (annul || ! final_sequence)
8064 p[3] = '6';
8065 p += 14;
8067 *p++ = '%';
8068 *p++ = 'l';
8069 *p++ = label + '0';
8070 *p++ = '%';
8071 *p++ = '#';
8072 *p = '\0';
8074 return string;
8077 /* Emit a library call comparison between floating point X and Y.
8078 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8079 Return the new operator to be used in the comparison sequence.
8081 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8082 values as arguments instead of the TFmode registers themselves,
8083 that's why we cannot call emit_float_lib_cmp. */
8086 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8088 const char *qpfunc;
8089 rtx slot0, slot1, result, tem, tem2, libfunc;
8090 machine_mode mode;
8091 enum rtx_code new_comparison;
8093 switch (comparison)
8095 case EQ:
8096 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8097 break;
8099 case NE:
8100 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8101 break;
8103 case GT:
8104 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8105 break;
8107 case GE:
8108 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8109 break;
8111 case LT:
8112 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8113 break;
8115 case LE:
8116 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8117 break;
8119 case ORDERED:
8120 case UNORDERED:
8121 case UNGT:
8122 case UNLT:
8123 case UNEQ:
8124 case UNGE:
8125 case UNLE:
8126 case LTGT:
8127 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8128 break;
8130 default:
8131 gcc_unreachable ();
8134 if (TARGET_ARCH64)
8136 if (MEM_P (x))
8138 tree expr = MEM_EXPR (x);
8139 if (expr)
8140 mark_addressable (expr);
8141 slot0 = x;
8143 else
8145 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8146 emit_move_insn (slot0, x);
8149 if (MEM_P (y))
8151 tree expr = MEM_EXPR (y);
8152 if (expr)
8153 mark_addressable (expr);
8154 slot1 = y;
8156 else
8158 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8159 emit_move_insn (slot1, y);
8162 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8163 emit_library_call (libfunc, LCT_NORMAL,
8164 DImode,
8165 XEXP (slot0, 0), Pmode,
8166 XEXP (slot1, 0), Pmode);
8167 mode = DImode;
8169 else
8171 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8172 emit_library_call (libfunc, LCT_NORMAL,
8173 SImode,
8174 x, TFmode, y, TFmode);
8175 mode = SImode;
8179 /* Immediately move the result of the libcall into a pseudo
8180 register so reload doesn't clobber the value if it needs
8181 the return register for a spill reg. */
8182 result = gen_reg_rtx (mode);
8183 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8185 switch (comparison)
8187 default:
8188 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8189 case ORDERED:
8190 case UNORDERED:
8191 new_comparison = (comparison == UNORDERED ? EQ : NE);
8192 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8193 case UNGT:
8194 case UNGE:
8195 new_comparison = (comparison == UNGT ? GT : NE);
8196 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8197 case UNLE:
8198 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8199 case UNLT:
8200 tem = gen_reg_rtx (mode);
8201 if (TARGET_ARCH32)
8202 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8203 else
8204 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8205 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8206 case UNEQ:
8207 case LTGT:
8208 tem = gen_reg_rtx (mode);
8209 if (TARGET_ARCH32)
8210 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8211 else
8212 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8213 tem2 = gen_reg_rtx (mode);
8214 if (TARGET_ARCH32)
8215 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8216 else
8217 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8218 new_comparison = (comparison == UNEQ ? EQ : NE);
8219 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8222 gcc_unreachable ();
8225 /* Generate an unsigned DImode to FP conversion. This is the same code
8226 optabs would emit if we didn't have TFmode patterns. */
8228 void
8229 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8231 rtx i0, i1, f0, in, out;
8233 out = operands[0];
8234 in = force_reg (DImode, operands[1]);
8235 rtx_code_label *neglab = gen_label_rtx ();
8236 rtx_code_label *donelab = gen_label_rtx ();
8237 i0 = gen_reg_rtx (DImode);
8238 i1 = gen_reg_rtx (DImode);
8239 f0 = gen_reg_rtx (mode);
8241 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8243 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8244 emit_jump_insn (gen_jump (donelab));
8245 emit_barrier ();
8247 emit_label (neglab);
8249 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8250 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8251 emit_insn (gen_iordi3 (i0, i0, i1));
8252 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8253 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8255 emit_label (donelab);
8258 /* Generate an FP to unsigned DImode conversion. This is the same code
8259 optabs would emit if we didn't have TFmode patterns. */
8261 void
8262 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8264 rtx i0, i1, f0, in, out, limit;
8266 out = operands[0];
8267 in = force_reg (mode, operands[1]);
8268 rtx_code_label *neglab = gen_label_rtx ();
8269 rtx_code_label *donelab = gen_label_rtx ();
8270 i0 = gen_reg_rtx (DImode);
8271 i1 = gen_reg_rtx (DImode);
8272 limit = gen_reg_rtx (mode);
8273 f0 = gen_reg_rtx (mode);
8275 emit_move_insn (limit,
8276 const_double_from_real_value (
8277 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8278 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8280 emit_insn (gen_rtx_SET (out,
8281 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8282 emit_jump_insn (gen_jump (donelab));
8283 emit_barrier ();
8285 emit_label (neglab);
8287 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8288 emit_insn (gen_rtx_SET (i0,
8289 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8290 emit_insn (gen_movdi (i1, const1_rtx));
8291 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8292 emit_insn (gen_xordi3 (out, i0, i1));
8294 emit_label (donelab);
8297 /* Return the string to output a compare and branch instruction to DEST.
8298 DEST is the destination insn (i.e. the label), INSN is the source,
8299 and OP is the conditional expression. */
8301 const char *
8302 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8304 machine_mode mode = GET_MODE (XEXP (op, 0));
8305 enum rtx_code code = GET_CODE (op);
8306 const char *cond_str, *tmpl;
8307 int far, emit_nop, len;
8308 static char string[64];
8309 char size_char;
8311 /* Compare and Branch is limited to +-2KB. If it is too far away,
8312 change
8314 cxbne X, Y, .LC30
8318 cxbe X, Y, .+16
8320 ba,pt xcc, .LC30
8321 nop */
8323 len = get_attr_length (insn);
8325 far = len == 4;
8326 emit_nop = len == 2;
8328 if (far)
8329 code = reverse_condition (code);
8331 size_char = ((mode == SImode) ? 'w' : 'x');
8333 switch (code)
8335 case NE:
8336 cond_str = "ne";
8337 break;
8339 case EQ:
8340 cond_str = "e";
8341 break;
8343 case GE:
8344 cond_str = "ge";
8345 break;
8347 case GT:
8348 cond_str = "g";
8349 break;
8351 case LE:
8352 cond_str = "le";
8353 break;
8355 case LT:
8356 cond_str = "l";
8357 break;
8359 case GEU:
8360 cond_str = "cc";
8361 break;
8363 case GTU:
8364 cond_str = "gu";
8365 break;
8367 case LEU:
8368 cond_str = "leu";
8369 break;
8371 case LTU:
8372 cond_str = "cs";
8373 break;
8375 default:
8376 gcc_unreachable ();
8379 if (far)
8381 int veryfar = 1, delta;
8383 if (INSN_ADDRESSES_SET_P ())
8385 delta = (INSN_ADDRESSES (INSN_UID (dest))
8386 - INSN_ADDRESSES (INSN_UID (insn)));
8387 /* Leave some instructions for "slop". */
8388 if (delta >= -260000 && delta < 260000)
8389 veryfar = 0;
8392 if (veryfar)
8393 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8394 else
8395 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8397 else
8399 if (emit_nop)
8400 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8401 else
8402 tmpl = "c%cb%s\t%%1, %%2, %%3";
8405 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8407 return string;
8410 /* Return the string to output a conditional branch to LABEL, testing
8411 register REG. LABEL is the operand number of the label; REG is the
8412 operand number of the reg. OP is the conditional expression. The mode
8413 of REG says what kind of comparison we made.
8415 DEST is the destination insn (i.e. the label), INSN is the source.
8417 REVERSED is nonzero if we should reverse the sense of the comparison.
8419 ANNUL is nonzero if we should generate an annulling branch. */
8421 const char *
8422 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8423 int annul, rtx_insn *insn)
8425 static char string[64];
8426 enum rtx_code code = GET_CODE (op);
8427 machine_mode mode = GET_MODE (XEXP (op, 0));
8428 rtx note;
8429 int far;
8430 char *p;
8432 /* branch on register are limited to +-128KB. If it is too far away,
8433 change
8435 brnz,pt %g1, .LC30
8439 brz,pn %g1, .+12
8441 ba,pt %xcc, .LC30
8445 brgez,a,pn %o1, .LC29
8449 brlz,pt %o1, .+16
8451 ba,pt %xcc, .LC29 */
8453 far = get_attr_length (insn) >= 3;
8455 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8456 if (reversed ^ far)
8457 code = reverse_condition (code);
8459 /* Only 64-bit versions of these instructions exist. */
8460 gcc_assert (mode == DImode);
8462 /* Start by writing the branch condition. */
8464 switch (code)
8466 case NE:
8467 strcpy (string, "brnz");
8468 break;
8470 case EQ:
8471 strcpy (string, "brz");
8472 break;
8474 case GE:
8475 strcpy (string, "brgez");
8476 break;
8478 case LT:
8479 strcpy (string, "brlz");
8480 break;
8482 case LE:
8483 strcpy (string, "brlez");
8484 break;
8486 case GT:
8487 strcpy (string, "brgz");
8488 break;
8490 default:
8491 gcc_unreachable ();
8494 p = strchr (string, '\0');
8496 /* Now add the annulling, reg, label, and nop. */
8497 if (annul && ! far)
8499 strcpy (p, ",a");
8500 p += 2;
8503 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8505 strcpy (p,
8506 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8507 >= profile_probability::even ()) ^ far)
8508 ? ",pt" : ",pn");
8509 p += 3;
8512 *p = p < string + 8 ? '\t' : ' ';
8513 p++;
8514 *p++ = '%';
8515 *p++ = '0' + reg;
8516 *p++ = ',';
8517 *p++ = ' ';
8518 if (far)
8520 int veryfar = 1, delta;
8522 if (INSN_ADDRESSES_SET_P ())
8524 delta = (INSN_ADDRESSES (INSN_UID (dest))
8525 - INSN_ADDRESSES (INSN_UID (insn)));
8526 /* Leave some instructions for "slop". */
8527 if (delta >= -260000 && delta < 260000)
8528 veryfar = 0;
8531 strcpy (p, ".+12\n\t nop\n\t");
8532 /* Skip the next insn if requested or
8533 if we know that it will be a nop. */
8534 if (annul || ! final_sequence)
8535 p[3] = '6';
8536 p += 12;
8537 if (veryfar)
8539 strcpy (p, "b\t");
8540 p += 2;
8542 else
8544 strcpy (p, "ba,pt\t%%xcc, ");
8545 p += 13;
8548 *p++ = '%';
8549 *p++ = 'l';
8550 *p++ = '0' + label;
8551 *p++ = '%';
8552 *p++ = '#';
8553 *p = '\0';
8555 return string;
8558 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8559 Such instructions cannot be used in the delay slot of return insn on v9.
8560 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8563 static int
8564 epilogue_renumber (register rtx *where, int test)
8566 register const char *fmt;
8567 register int i;
8568 register enum rtx_code code;
8570 if (*where == 0)
8571 return 0;
8573 code = GET_CODE (*where);
8575 switch (code)
8577 case REG:
8578 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8579 return 1;
8580 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8581 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8582 /* fallthrough */
8583 case SCRATCH:
8584 case CC0:
8585 case PC:
8586 case CONST_INT:
8587 case CONST_WIDE_INT:
8588 case CONST_DOUBLE:
8589 return 0;
8591 /* Do not replace the frame pointer with the stack pointer because
8592 it can cause the delayed instruction to load below the stack.
8593 This occurs when instructions like:
8595 (set (reg/i:SI 24 %i0)
8596 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8597 (const_int -20 [0xffffffec])) 0))
8599 are in the return delayed slot. */
8600 case PLUS:
8601 if (GET_CODE (XEXP (*where, 0)) == REG
8602 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8603 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8604 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8605 return 1;
8606 break;
8608 case MEM:
8609 if (SPARC_STACK_BIAS
8610 && GET_CODE (XEXP (*where, 0)) == REG
8611 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8612 return 1;
8613 break;
8615 default:
8616 break;
8619 fmt = GET_RTX_FORMAT (code);
8621 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8623 if (fmt[i] == 'E')
8625 register int j;
8626 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8627 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8628 return 1;
8630 else if (fmt[i] == 'e'
8631 && epilogue_renumber (&(XEXP (*where, i)), test))
8632 return 1;
8634 return 0;
8637 /* Leaf functions and non-leaf functions have different needs. */
8639 static const int
8640 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8642 static const int
8643 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8645 static const int *const reg_alloc_orders[] = {
8646 reg_leaf_alloc_order,
8647 reg_nonleaf_alloc_order};
8649 void
8650 order_regs_for_local_alloc (void)
8652 static int last_order_nonleaf = 1;
8654 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8656 last_order_nonleaf = !last_order_nonleaf;
8657 memcpy ((char *) reg_alloc_order,
8658 (const char *) reg_alloc_orders[last_order_nonleaf],
8659 FIRST_PSEUDO_REGISTER * sizeof (int));
8663 /* Return 1 if REG and MEM are legitimate enough to allow the various
8664 MEM<-->REG splits to be run. */
8667 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
8669 /* Punt if we are here by mistake. */
8670 gcc_assert (reload_completed);
8672 /* We must have an offsettable memory reference. */
8673 if (!offsettable_memref_p (mem))
8674 return 0;
8676 /* If we have legitimate args for ldd/std, we do not want
8677 the split to happen. */
8678 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
8679 return 0;
8681 /* Success. */
8682 return 1;
8685 /* Split a REG <-- MEM move into a pair of moves in MODE. */
8687 void
8688 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
8690 rtx high_part = gen_highpart (mode, dest);
8691 rtx low_part = gen_lowpart (mode, dest);
8692 rtx word0 = adjust_address (src, mode, 0);
8693 rtx word1 = adjust_address (src, mode, 4);
8695 if (reg_overlap_mentioned_p (high_part, word1))
8697 emit_move_insn_1 (low_part, word1);
8698 emit_move_insn_1 (high_part, word0);
8700 else
8702 emit_move_insn_1 (high_part, word0);
8703 emit_move_insn_1 (low_part, word1);
8707 /* Split a MEM <-- REG move into a pair of moves in MODE. */
8709 void
8710 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
8712 rtx word0 = adjust_address (dest, mode, 0);
8713 rtx word1 = adjust_address (dest, mode, 4);
8714 rtx high_part = gen_highpart (mode, src);
8715 rtx low_part = gen_lowpart (mode, src);
8717 emit_move_insn_1 (word0, high_part);
8718 emit_move_insn_1 (word1, low_part);
8721 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
8724 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
8726 /* Punt if we are here by mistake. */
8727 gcc_assert (reload_completed);
8729 if (GET_CODE (reg1) == SUBREG)
8730 reg1 = SUBREG_REG (reg1);
8731 if (GET_CODE (reg1) != REG)
8732 return 0;
8733 const int regno1 = REGNO (reg1);
8735 if (GET_CODE (reg2) == SUBREG)
8736 reg2 = SUBREG_REG (reg2);
8737 if (GET_CODE (reg2) != REG)
8738 return 0;
8739 const int regno2 = REGNO (reg2);
8741 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8742 return 1;
8744 if (TARGET_VIS3)
8746 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8747 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8748 return 1;
8751 return 0;
8754 /* Split a REG <--> REG move into a pair of moves in MODE. */
8756 void
8757 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
8759 rtx dest1 = gen_highpart (mode, dest);
8760 rtx dest2 = gen_lowpart (mode, dest);
8761 rtx src1 = gen_highpart (mode, src);
8762 rtx src2 = gen_lowpart (mode, src);
8764 /* Now emit using the real source and destination we found, swapping
8765 the order if we detect overlap. */
8766 if (reg_overlap_mentioned_p (dest1, src2))
8768 emit_move_insn_1 (dest2, src2);
8769 emit_move_insn_1 (dest1, src1);
8771 else
8773 emit_move_insn_1 (dest1, src1);
8774 emit_move_insn_1 (dest2, src2);
8778 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8779 This makes them candidates for using ldd and std insns.
8781 Note reg1 and reg2 *must* be hard registers. */
8784 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8786 /* We might have been passed a SUBREG. */
8787 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8788 return 0;
8790 if (REGNO (reg1) % 2 != 0)
8791 return 0;
8793 /* Integer ldd is deprecated in SPARC V9 */
8794 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8795 return 0;
8797 return (REGNO (reg1) == REGNO (reg2) - 1);
8800 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8801 an ldd or std insn.
8803 This can only happen when addr1 and addr2, the addresses in mem1
8804 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8805 addr1 must also be aligned on a 64-bit boundary.
8807 Also iff dependent_reg_rtx is not null it should not be used to
8808 compute the address for mem1, i.e. we cannot optimize a sequence
8809 like:
8810 ld [%o0], %o0
8811 ld [%o0 + 4], %o1
8813 ldd [%o0], %o0
8814 nor:
8815 ld [%g3 + 4], %g3
8816 ld [%g3], %g2
8818 ldd [%g3], %g2
8820 But, note that the transformation from:
8821 ld [%g2 + 4], %g3
8822 ld [%g2], %g2
8824 ldd [%g2], %g2
8825 is perfectly fine. Thus, the peephole2 patterns always pass us
8826 the destination register of the first load, never the second one.
8828 For stores we don't have a similar problem, so dependent_reg_rtx is
8829 NULL_RTX. */
8832 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8834 rtx addr1, addr2;
8835 unsigned int reg1;
8836 HOST_WIDE_INT offset1;
8838 /* The mems cannot be volatile. */
8839 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8840 return 0;
8842 /* MEM1 should be aligned on a 64-bit boundary. */
8843 if (MEM_ALIGN (mem1) < 64)
8844 return 0;
8846 addr1 = XEXP (mem1, 0);
8847 addr2 = XEXP (mem2, 0);
8849 /* Extract a register number and offset (if used) from the first addr. */
8850 if (GET_CODE (addr1) == PLUS)
8852 /* If not a REG, return zero. */
8853 if (GET_CODE (XEXP (addr1, 0)) != REG)
8854 return 0;
8855 else
8857 reg1 = REGNO (XEXP (addr1, 0));
8858 /* The offset must be constant! */
8859 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8860 return 0;
8861 offset1 = INTVAL (XEXP (addr1, 1));
8864 else if (GET_CODE (addr1) != REG)
8865 return 0;
8866 else
8868 reg1 = REGNO (addr1);
8869 /* This was a simple (mem (reg)) expression. Offset is 0. */
8870 offset1 = 0;
8873 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8874 if (GET_CODE (addr2) != PLUS)
8875 return 0;
8877 if (GET_CODE (XEXP (addr2, 0)) != REG
8878 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8879 return 0;
8881 if (reg1 != REGNO (XEXP (addr2, 0)))
8882 return 0;
8884 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8885 return 0;
8887 /* The first offset must be evenly divisible by 8 to ensure the
8888 address is 64-bit aligned. */
8889 if (offset1 % 8 != 0)
8890 return 0;
8892 /* The offset for the second addr must be 4 more than the first addr. */
8893 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8894 return 0;
8896 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8897 instructions. */
8898 return 1;
8901 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8904 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
8906 rtx x = widen_memory_access (mem1, mode, 0);
8907 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8908 return x;
8911 /* Return 1 if reg is a pseudo, or is the first register in
8912 a hard register pair. This makes it suitable for use in
8913 ldd and std insns. */
8916 register_ok_for_ldd (rtx reg)
8918 /* We might have been passed a SUBREG. */
8919 if (!REG_P (reg))
8920 return 0;
8922 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8923 return (REGNO (reg) % 2 == 0);
8925 return 1;
8928 /* Return 1 if OP, a MEM, has an address which is known to be
8929 aligned to an 8-byte boundary. */
8932 memory_ok_for_ldd (rtx op)
8934 /* In 64-bit mode, we assume that the address is word-aligned. */
8935 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8936 return 0;
8938 if (! can_create_pseudo_p ()
8939 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8940 return 0;
8942 return 1;
8945 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8947 static bool
8948 sparc_print_operand_punct_valid_p (unsigned char code)
8950 if (code == '#'
8951 || code == '*'
8952 || code == '('
8953 || code == ')'
8954 || code == '_'
8955 || code == '&')
8956 return true;
8958 return false;
8961 /* Implement TARGET_PRINT_OPERAND.
8962 Print operand X (an rtx) in assembler syntax to file FILE.
8963 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8964 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8966 static void
8967 sparc_print_operand (FILE *file, rtx x, int code)
8969 const char *s;
8971 switch (code)
8973 case '#':
8974 /* Output an insn in a delay slot. */
8975 if (final_sequence)
8976 sparc_indent_opcode = 1;
8977 else
8978 fputs ("\n\t nop", file);
8979 return;
8980 case '*':
8981 /* Output an annul flag if there's nothing for the delay slot and we
8982 are optimizing. This is always used with '(' below.
8983 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8984 this is a dbx bug. So, we only do this when optimizing.
8985 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8986 Always emit a nop in case the next instruction is a branch. */
8987 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8988 fputs (",a", file);
8989 return;
8990 case '(':
8991 /* Output a 'nop' if there's nothing for the delay slot and we are
8992 not optimizing. This is always used with '*' above. */
8993 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8994 fputs ("\n\t nop", file);
8995 else if (final_sequence)
8996 sparc_indent_opcode = 1;
8997 return;
8998 case ')':
8999 /* Output the right displacement from the saved PC on function return.
9000 The caller may have placed an "unimp" insn immediately after the call
9001 so we have to account for it. This insn is used in the 32-bit ABI
9002 when calling a function that returns a non zero-sized structure. The
9003 64-bit ABI doesn't have it. Be careful to have this test be the same
9004 as that for the call. The exception is when sparc_std_struct_return
9005 is enabled, the psABI is followed exactly and the adjustment is made
9006 by the code in sparc_struct_value_rtx. The call emitted is the same
9007 when sparc_std_struct_return is enabled. */
9008 if (!TARGET_ARCH64
9009 && cfun->returns_struct
9010 && !sparc_std_struct_return
9011 && DECL_SIZE (DECL_RESULT (current_function_decl))
9012 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9013 == INTEGER_CST
9014 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9015 fputs ("12", file);
9016 else
9017 fputc ('8', file);
9018 return;
9019 case '_':
9020 /* Output the Embedded Medium/Anywhere code model base register. */
9021 fputs (EMBMEDANY_BASE_REG, file);
9022 return;
9023 case '&':
9024 /* Print some local dynamic TLS name. */
9025 if (const char *name = get_some_local_dynamic_name ())
9026 assemble_name (file, name);
9027 else
9028 output_operand_lossage ("'%%&' used without any "
9029 "local dynamic TLS references");
9030 return;
9032 case 'Y':
9033 /* Adjust the operand to take into account a RESTORE operation. */
9034 if (GET_CODE (x) == CONST_INT)
9035 break;
9036 else if (GET_CODE (x) != REG)
9037 output_operand_lossage ("invalid %%Y operand");
9038 else if (REGNO (x) < 8)
9039 fputs (reg_names[REGNO (x)], file);
9040 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9041 fputs (reg_names[REGNO (x)-16], file);
9042 else
9043 output_operand_lossage ("invalid %%Y operand");
9044 return;
9045 case 'L':
9046 /* Print out the low order register name of a register pair. */
9047 if (WORDS_BIG_ENDIAN)
9048 fputs (reg_names[REGNO (x)+1], file);
9049 else
9050 fputs (reg_names[REGNO (x)], file);
9051 return;
9052 case 'H':
9053 /* Print out the high order register name of a register pair. */
9054 if (WORDS_BIG_ENDIAN)
9055 fputs (reg_names[REGNO (x)], file);
9056 else
9057 fputs (reg_names[REGNO (x)+1], file);
9058 return;
9059 case 'R':
9060 /* Print out the second register name of a register pair or quad.
9061 I.e., R (%o0) => %o1. */
9062 fputs (reg_names[REGNO (x)+1], file);
9063 return;
9064 case 'S':
9065 /* Print out the third register name of a register quad.
9066 I.e., S (%o0) => %o2. */
9067 fputs (reg_names[REGNO (x)+2], file);
9068 return;
9069 case 'T':
9070 /* Print out the fourth register name of a register quad.
9071 I.e., T (%o0) => %o3. */
9072 fputs (reg_names[REGNO (x)+3], file);
9073 return;
9074 case 'x':
9075 /* Print a condition code register. */
9076 if (REGNO (x) == SPARC_ICC_REG)
9078 switch (GET_MODE (x))
9080 case E_CCmode:
9081 case E_CCNZmode:
9082 case E_CCCmode:
9083 case E_CCVmode:
9084 s = "%icc";
9085 break;
9086 case E_CCXmode:
9087 case E_CCXNZmode:
9088 case E_CCXCmode:
9089 case E_CCXVmode:
9090 s = "%xcc";
9091 break;
9092 default:
9093 gcc_unreachable ();
9095 fputs (s, file);
9097 else
9098 /* %fccN register */
9099 fputs (reg_names[REGNO (x)], file);
9100 return;
9101 case 'm':
9102 /* Print the operand's address only. */
9103 output_address (GET_MODE (x), XEXP (x, 0));
9104 return;
9105 case 'r':
9106 /* In this case we need a register. Use %g0 if the
9107 operand is const0_rtx. */
9108 if (x == const0_rtx
9109 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9111 fputs ("%g0", file);
9112 return;
9114 else
9115 break;
9117 case 'A':
9118 switch (GET_CODE (x))
9120 case IOR:
9121 s = "or";
9122 break;
9123 case AND:
9124 s = "and";
9125 break;
9126 case XOR:
9127 s = "xor";
9128 break;
9129 default:
9130 output_operand_lossage ("invalid %%A operand");
9131 s = "";
9132 break;
9134 fputs (s, file);
9135 return;
9137 case 'B':
9138 switch (GET_CODE (x))
9140 case IOR:
9141 s = "orn";
9142 break;
9143 case AND:
9144 s = "andn";
9145 break;
9146 case XOR:
9147 s = "xnor";
9148 break;
9149 default:
9150 output_operand_lossage ("invalid %%B operand");
9151 s = "";
9152 break;
9154 fputs (s, file);
9155 return;
9157 /* This is used by the conditional move instructions. */
9158 case 'C':
9160 machine_mode mode = GET_MODE (XEXP (x, 0));
9161 switch (GET_CODE (x))
9163 case NE:
9164 if (mode == CCVmode || mode == CCXVmode)
9165 s = "vs";
9166 else
9167 s = "ne";
9168 break;
9169 case EQ:
9170 if (mode == CCVmode || mode == CCXVmode)
9171 s = "vc";
9172 else
9173 s = "e";
9174 break;
9175 case GE:
9176 if (mode == CCNZmode || mode == CCXNZmode)
9177 s = "pos";
9178 else
9179 s = "ge";
9180 break;
9181 case GT:
9182 s = "g";
9183 break;
9184 case LE:
9185 s = "le";
9186 break;
9187 case LT:
9188 if (mode == CCNZmode || mode == CCXNZmode)
9189 s = "neg";
9190 else
9191 s = "l";
9192 break;
9193 case GEU:
9194 s = "geu";
9195 break;
9196 case GTU:
9197 s = "gu";
9198 break;
9199 case LEU:
9200 s = "leu";
9201 break;
9202 case LTU:
9203 s = "lu";
9204 break;
9205 case LTGT:
9206 s = "lg";
9207 break;
9208 case UNORDERED:
9209 s = "u";
9210 break;
9211 case ORDERED:
9212 s = "o";
9213 break;
9214 case UNLT:
9215 s = "ul";
9216 break;
9217 case UNLE:
9218 s = "ule";
9219 break;
9220 case UNGT:
9221 s = "ug";
9222 break;
9223 case UNGE:
9224 s = "uge"
9225 ; break;
9226 case UNEQ:
9227 s = "ue";
9228 break;
9229 default:
9230 output_operand_lossage ("invalid %%C operand");
9231 s = "";
9232 break;
9234 fputs (s, file);
9235 return;
9238 /* This are used by the movr instruction pattern. */
9239 case 'D':
9241 switch (GET_CODE (x))
9243 case NE:
9244 s = "ne";
9245 break;
9246 case EQ:
9247 s = "e";
9248 break;
9249 case GE:
9250 s = "gez";
9251 break;
9252 case LT:
9253 s = "lz";
9254 break;
9255 case LE:
9256 s = "lez";
9257 break;
9258 case GT:
9259 s = "gz";
9260 break;
9261 default:
9262 output_operand_lossage ("invalid %%D operand");
9263 s = "";
9264 break;
9266 fputs (s, file);
9267 return;
9270 case 'b':
9272 /* Print a sign-extended character. */
9273 int i = trunc_int_for_mode (INTVAL (x), QImode);
9274 fprintf (file, "%d", i);
9275 return;
9278 case 'f':
9279 /* Operand must be a MEM; write its address. */
9280 if (GET_CODE (x) != MEM)
9281 output_operand_lossage ("invalid %%f operand");
9282 output_address (GET_MODE (x), XEXP (x, 0));
9283 return;
9285 case 's':
9287 /* Print a sign-extended 32-bit value. */
9288 HOST_WIDE_INT i;
9289 if (GET_CODE(x) == CONST_INT)
9290 i = INTVAL (x);
9291 else
9293 output_operand_lossage ("invalid %%s operand");
9294 return;
9296 i = trunc_int_for_mode (i, SImode);
9297 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9298 return;
9301 case 0:
9302 /* Do nothing special. */
9303 break;
9305 default:
9306 /* Undocumented flag. */
9307 output_operand_lossage ("invalid operand output code");
9310 if (GET_CODE (x) == REG)
9311 fputs (reg_names[REGNO (x)], file);
9312 else if (GET_CODE (x) == MEM)
9314 fputc ('[', file);
9315 /* Poor Sun assembler doesn't understand absolute addressing. */
9316 if (CONSTANT_P (XEXP (x, 0)))
9317 fputs ("%g0+", file);
9318 output_address (GET_MODE (x), XEXP (x, 0));
9319 fputc (']', file);
9321 else if (GET_CODE (x) == HIGH)
9323 fputs ("%hi(", file);
9324 output_addr_const (file, XEXP (x, 0));
9325 fputc (')', file);
9327 else if (GET_CODE (x) == LO_SUM)
9329 sparc_print_operand (file, XEXP (x, 0), 0);
9330 if (TARGET_CM_MEDMID)
9331 fputs ("+%l44(", file);
9332 else
9333 fputs ("+%lo(", file);
9334 output_addr_const (file, XEXP (x, 1));
9335 fputc (')', file);
9337 else if (GET_CODE (x) == CONST_DOUBLE)
9338 output_operand_lossage ("floating-point constant not a valid immediate operand");
9339 else
9340 output_addr_const (file, x);
9343 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9345 static void
9346 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9348 register rtx base, index = 0;
9349 int offset = 0;
9350 register rtx addr = x;
9352 if (REG_P (addr))
9353 fputs (reg_names[REGNO (addr)], file);
9354 else if (GET_CODE (addr) == PLUS)
9356 if (CONST_INT_P (XEXP (addr, 0)))
9357 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9358 else if (CONST_INT_P (XEXP (addr, 1)))
9359 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9360 else
9361 base = XEXP (addr, 0), index = XEXP (addr, 1);
9362 if (GET_CODE (base) == LO_SUM)
9364 gcc_assert (USE_AS_OFFSETABLE_LO10
9365 && TARGET_ARCH64
9366 && ! TARGET_CM_MEDMID);
9367 output_operand (XEXP (base, 0), 0);
9368 fputs ("+%lo(", file);
9369 output_address (VOIDmode, XEXP (base, 1));
9370 fprintf (file, ")+%d", offset);
9372 else
9374 fputs (reg_names[REGNO (base)], file);
9375 if (index == 0)
9376 fprintf (file, "%+d", offset);
9377 else if (REG_P (index))
9378 fprintf (file, "+%s", reg_names[REGNO (index)]);
9379 else if (GET_CODE (index) == SYMBOL_REF
9380 || GET_CODE (index) == LABEL_REF
9381 || GET_CODE (index) == CONST)
9382 fputc ('+', file), output_addr_const (file, index);
9383 else gcc_unreachable ();
9386 else if (GET_CODE (addr) == MINUS
9387 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9389 output_addr_const (file, XEXP (addr, 0));
9390 fputs ("-(", file);
9391 output_addr_const (file, XEXP (addr, 1));
9392 fputs ("-.)", file);
9394 else if (GET_CODE (addr) == LO_SUM)
9396 output_operand (XEXP (addr, 0), 0);
9397 if (TARGET_CM_MEDMID)
9398 fputs ("+%l44(", file);
9399 else
9400 fputs ("+%lo(", file);
9401 output_address (VOIDmode, XEXP (addr, 1));
9402 fputc (')', file);
9404 else if (flag_pic
9405 && GET_CODE (addr) == CONST
9406 && GET_CODE (XEXP (addr, 0)) == MINUS
9407 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9408 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9409 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9411 addr = XEXP (addr, 0);
9412 output_addr_const (file, XEXP (addr, 0));
9413 /* Group the args of the second CONST in parenthesis. */
9414 fputs ("-(", file);
9415 /* Skip past the second CONST--it does nothing for us. */
9416 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9417 /* Close the parenthesis. */
9418 fputc (')', file);
9420 else
9422 output_addr_const (file, addr);
9426 /* Target hook for assembling integer objects. The sparc version has
9427 special handling for aligned DI-mode objects. */
9429 static bool
9430 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9432 /* ??? We only output .xword's for symbols and only then in environments
9433 where the assembler can handle them. */
9434 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9436 if (TARGET_V9)
9438 assemble_integer_with_op ("\t.xword\t", x);
9439 return true;
9441 else
9443 assemble_aligned_integer (4, const0_rtx);
9444 assemble_aligned_integer (4, x);
9445 return true;
9448 return default_assemble_integer (x, size, aligned_p);
9451 /* Return the value of a code used in the .proc pseudo-op that says
9452 what kind of result this function returns. For non-C types, we pick
9453 the closest C type. */
9455 #ifndef SHORT_TYPE_SIZE
9456 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9457 #endif
9459 #ifndef INT_TYPE_SIZE
9460 #define INT_TYPE_SIZE BITS_PER_WORD
9461 #endif
9463 #ifndef LONG_TYPE_SIZE
9464 #define LONG_TYPE_SIZE BITS_PER_WORD
9465 #endif
9467 #ifndef LONG_LONG_TYPE_SIZE
9468 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9469 #endif
9471 #ifndef FLOAT_TYPE_SIZE
9472 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9473 #endif
9475 #ifndef DOUBLE_TYPE_SIZE
9476 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9477 #endif
9479 #ifndef LONG_DOUBLE_TYPE_SIZE
9480 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9481 #endif
9483 unsigned long
9484 sparc_type_code (register tree type)
9486 register unsigned long qualifiers = 0;
9487 register unsigned shift;
9489 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9490 setting more, since some assemblers will give an error for this. Also,
9491 we must be careful to avoid shifts of 32 bits or more to avoid getting
9492 unpredictable results. */
9494 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9496 switch (TREE_CODE (type))
9498 case ERROR_MARK:
9499 return qualifiers;
9501 case ARRAY_TYPE:
9502 qualifiers |= (3 << shift);
9503 break;
9505 case FUNCTION_TYPE:
9506 case METHOD_TYPE:
9507 qualifiers |= (2 << shift);
9508 break;
9510 case POINTER_TYPE:
9511 case REFERENCE_TYPE:
9512 case OFFSET_TYPE:
9513 qualifiers |= (1 << shift);
9514 break;
9516 case RECORD_TYPE:
9517 return (qualifiers | 8);
9519 case UNION_TYPE:
9520 case QUAL_UNION_TYPE:
9521 return (qualifiers | 9);
9523 case ENUMERAL_TYPE:
9524 return (qualifiers | 10);
9526 case VOID_TYPE:
9527 return (qualifiers | 16);
9529 case INTEGER_TYPE:
9530 /* If this is a range type, consider it to be the underlying
9531 type. */
9532 if (TREE_TYPE (type) != 0)
9533 break;
9535 /* Carefully distinguish all the standard types of C,
9536 without messing up if the language is not C. We do this by
9537 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9538 look at both the names and the above fields, but that's redundant.
9539 Any type whose size is between two C types will be considered
9540 to be the wider of the two types. Also, we do not have a
9541 special code to use for "long long", so anything wider than
9542 long is treated the same. Note that we can't distinguish
9543 between "int" and "long" in this code if they are the same
9544 size, but that's fine, since neither can the assembler. */
9546 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9547 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9549 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9550 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9552 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9553 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9555 else
9556 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9558 case REAL_TYPE:
9559 /* If this is a range type, consider it to be the underlying
9560 type. */
9561 if (TREE_TYPE (type) != 0)
9562 break;
9564 /* Carefully distinguish all the standard types of C,
9565 without messing up if the language is not C. */
9567 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9568 return (qualifiers | 6);
9570 else
9571 return (qualifiers | 7);
9573 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9574 /* ??? We need to distinguish between double and float complex types,
9575 but I don't know how yet because I can't reach this code from
9576 existing front-ends. */
9577 return (qualifiers | 7); /* Who knows? */
9579 case VECTOR_TYPE:
9580 case BOOLEAN_TYPE: /* Boolean truth value type. */
9581 case LANG_TYPE:
9582 case NULLPTR_TYPE:
9583 return qualifiers;
9585 default:
9586 gcc_unreachable (); /* Not a type! */
9590 return qualifiers;
9593 /* Nested function support. */
9595 /* Emit RTL insns to initialize the variable parts of a trampoline.
9596 FNADDR is an RTX for the address of the function's pure code.
9597 CXT is an RTX for the static chain value for the function.
9599 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9600 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9601 (to store insns). This is a bit excessive. Perhaps a different
9602 mechanism would be better here.
9604 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9606 static void
9607 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9609 /* SPARC 32-bit trampoline:
9611 sethi %hi(fn), %g1
9612 sethi %hi(static), %g2
9613 jmp %g1+%lo(fn)
9614 or %g2, %lo(static), %g2
9616 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9617 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9620 emit_move_insn
9621 (adjust_address (m_tramp, SImode, 0),
9622 expand_binop (SImode, ior_optab,
9623 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9624 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9625 NULL_RTX, 1, OPTAB_DIRECT));
9627 emit_move_insn
9628 (adjust_address (m_tramp, SImode, 4),
9629 expand_binop (SImode, ior_optab,
9630 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9631 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9632 NULL_RTX, 1, OPTAB_DIRECT));
9634 emit_move_insn
9635 (adjust_address (m_tramp, SImode, 8),
9636 expand_binop (SImode, ior_optab,
9637 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9638 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9639 NULL_RTX, 1, OPTAB_DIRECT));
9641 emit_move_insn
9642 (adjust_address (m_tramp, SImode, 12),
9643 expand_binop (SImode, ior_optab,
9644 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9645 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9646 NULL_RTX, 1, OPTAB_DIRECT));
9648 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9649 aligned on a 16 byte boundary so one flush clears it all. */
9650 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9651 if (sparc_cpu != PROCESSOR_ULTRASPARC
9652 && sparc_cpu != PROCESSOR_ULTRASPARC3
9653 && sparc_cpu != PROCESSOR_NIAGARA
9654 && sparc_cpu != PROCESSOR_NIAGARA2
9655 && sparc_cpu != PROCESSOR_NIAGARA3
9656 && sparc_cpu != PROCESSOR_NIAGARA4
9657 && sparc_cpu != PROCESSOR_NIAGARA7
9658 && sparc_cpu != PROCESSOR_M8)
9659 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9661 /* Call __enable_execute_stack after writing onto the stack to make sure
9662 the stack address is accessible. */
9663 #ifdef HAVE_ENABLE_EXECUTE_STACK
9664 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9665 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9666 #endif
9670 /* The 64-bit version is simpler because it makes more sense to load the
9671 values as "immediate" data out of the trampoline. It's also easier since
9672 we can read the PC without clobbering a register. */
9674 static void
9675 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9677 /* SPARC 64-bit trampoline:
9679 rd %pc, %g1
9680 ldx [%g1+24], %g5
9681 jmp %g5
9682 ldx [%g1+16], %g5
9683 +16 bytes data
9686 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9687 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9688 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9689 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9690 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9691 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9692 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9693 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9694 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9695 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9696 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9698 if (sparc_cpu != PROCESSOR_ULTRASPARC
9699 && sparc_cpu != PROCESSOR_ULTRASPARC3
9700 && sparc_cpu != PROCESSOR_NIAGARA
9701 && sparc_cpu != PROCESSOR_NIAGARA2
9702 && sparc_cpu != PROCESSOR_NIAGARA3
9703 && sparc_cpu != PROCESSOR_NIAGARA4
9704 && sparc_cpu != PROCESSOR_NIAGARA7
9705 && sparc_cpu != PROCESSOR_M8)
9706 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9708 /* Call __enable_execute_stack after writing onto the stack to make sure
9709 the stack address is accessible. */
9710 #ifdef HAVE_ENABLE_EXECUTE_STACK
9711 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9712 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9713 #endif
9716 /* Worker for TARGET_TRAMPOLINE_INIT. */
9718 static void
9719 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9721 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9722 cxt = force_reg (Pmode, cxt);
9723 if (TARGET_ARCH64)
9724 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9725 else
9726 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9729 /* Adjust the cost of a scheduling dependency. Return the new cost of
9730 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9732 static int
9733 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
9734 int cost)
9736 enum attr_type insn_type;
9738 if (recog_memoized (insn) < 0)
9739 return cost;
9741 insn_type = get_attr_type (insn);
9743 if (dep_type == 0)
9745 /* Data dependency; DEP_INSN writes a register that INSN reads some
9746 cycles later. */
9748 /* if a load, then the dependence must be on the memory address;
9749 add an extra "cycle". Note that the cost could be two cycles
9750 if the reg was written late in an instruction group; we ca not tell
9751 here. */
9752 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9753 return cost + 3;
9755 /* Get the delay only if the address of the store is the dependence. */
9756 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9758 rtx pat = PATTERN(insn);
9759 rtx dep_pat = PATTERN (dep_insn);
9761 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9762 return cost; /* This should not happen! */
9764 /* The dependency between the two instructions was on the data that
9765 is being stored. Assume that this implies that the address of the
9766 store is not dependent. */
9767 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9768 return cost;
9770 return cost + 3; /* An approximation. */
9773 /* A shift instruction cannot receive its data from an instruction
9774 in the same cycle; add a one cycle penalty. */
9775 if (insn_type == TYPE_SHIFT)
9776 return cost + 3; /* Split before cascade into shift. */
9778 else
9780 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9781 INSN writes some cycles later. */
9783 /* These are only significant for the fpu unit; writing a fp reg before
9784 the fpu has finished with it stalls the processor. */
9786 /* Reusing an integer register causes no problems. */
9787 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9788 return 0;
9791 return cost;
9794 static int
9795 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
9796 int cost)
9798 enum attr_type insn_type, dep_type;
9799 rtx pat = PATTERN(insn);
9800 rtx dep_pat = PATTERN (dep_insn);
9802 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9803 return cost;
9805 insn_type = get_attr_type (insn);
9806 dep_type = get_attr_type (dep_insn);
9808 switch (dtype)
9810 case 0:
9811 /* Data dependency; DEP_INSN writes a register that INSN reads some
9812 cycles later. */
9814 switch (insn_type)
9816 case TYPE_STORE:
9817 case TYPE_FPSTORE:
9818 /* Get the delay iff the address of the store is the dependence. */
9819 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9820 return cost;
9822 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9823 return cost;
9824 return cost + 3;
9826 case TYPE_LOAD:
9827 case TYPE_SLOAD:
9828 case TYPE_FPLOAD:
9829 /* If a load, then the dependence must be on the memory address. If
9830 the addresses aren't equal, then it might be a false dependency */
9831 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9833 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9834 || GET_CODE (SET_DEST (dep_pat)) != MEM
9835 || GET_CODE (SET_SRC (pat)) != MEM
9836 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9837 XEXP (SET_SRC (pat), 0)))
9838 return cost + 2;
9840 return cost + 8;
9842 break;
9844 case TYPE_BRANCH:
9845 /* Compare to branch latency is 0. There is no benefit from
9846 separating compare and branch. */
9847 if (dep_type == TYPE_COMPARE)
9848 return 0;
9849 /* Floating point compare to branch latency is less than
9850 compare to conditional move. */
9851 if (dep_type == TYPE_FPCMP)
9852 return cost - 1;
9853 break;
9854 default:
9855 break;
9857 break;
9859 case REG_DEP_ANTI:
9860 /* Anti-dependencies only penalize the fpu unit. */
9861 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9862 return 0;
9863 break;
9865 default:
9866 break;
9869 return cost;
9872 static int
9873 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
9874 unsigned int)
9876 switch (sparc_cpu)
9878 case PROCESSOR_SUPERSPARC:
9879 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
9880 break;
9881 case PROCESSOR_HYPERSPARC:
9882 case PROCESSOR_SPARCLITE86X:
9883 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
9884 break;
9885 default:
9886 break;
9888 return cost;
9891 static void
9892 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9893 int sched_verbose ATTRIBUTE_UNUSED,
9894 int max_ready ATTRIBUTE_UNUSED)
9897 static int
9898 sparc_use_sched_lookahead (void)
9900 if (sparc_cpu == PROCESSOR_NIAGARA
9901 || sparc_cpu == PROCESSOR_NIAGARA2
9902 || sparc_cpu == PROCESSOR_NIAGARA3)
9903 return 0;
9904 if (sparc_cpu == PROCESSOR_NIAGARA4
9905 || sparc_cpu == PROCESSOR_NIAGARA7
9906 || sparc_cpu == PROCESSOR_M8)
9907 return 2;
9908 if (sparc_cpu == PROCESSOR_ULTRASPARC
9909 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9910 return 4;
9911 if ((1 << sparc_cpu) &
9912 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9913 (1 << PROCESSOR_SPARCLITE86X)))
9914 return 3;
9915 return 0;
9918 static int
9919 sparc_issue_rate (void)
9921 switch (sparc_cpu)
9923 case PROCESSOR_NIAGARA:
9924 case PROCESSOR_NIAGARA2:
9925 case PROCESSOR_NIAGARA3:
9926 default:
9927 return 1;
9928 case PROCESSOR_NIAGARA4:
9929 case PROCESSOR_NIAGARA7:
9930 case PROCESSOR_V9:
9931 /* Assume V9 processors are capable of at least dual-issue. */
9932 return 2;
9933 case PROCESSOR_SUPERSPARC:
9934 return 3;
9935 case PROCESSOR_HYPERSPARC:
9936 case PROCESSOR_SPARCLITE86X:
9937 return 2;
9938 case PROCESSOR_ULTRASPARC:
9939 case PROCESSOR_ULTRASPARC3:
9940 case PROCESSOR_M8:
9941 return 4;
9945 static int
9946 set_extends (rtx_insn *insn)
9948 register rtx pat = PATTERN (insn);
9950 switch (GET_CODE (SET_SRC (pat)))
9952 /* Load and some shift instructions zero extend. */
9953 case MEM:
9954 case ZERO_EXTEND:
9955 /* sethi clears the high bits */
9956 case HIGH:
9957 /* LO_SUM is used with sethi. sethi cleared the high
9958 bits and the values used with lo_sum are positive */
9959 case LO_SUM:
9960 /* Store flag stores 0 or 1 */
9961 case LT: case LTU:
9962 case GT: case GTU:
9963 case LE: case LEU:
9964 case GE: case GEU:
9965 case EQ:
9966 case NE:
9967 return 1;
9968 case AND:
9970 rtx op0 = XEXP (SET_SRC (pat), 0);
9971 rtx op1 = XEXP (SET_SRC (pat), 1);
9972 if (GET_CODE (op1) == CONST_INT)
9973 return INTVAL (op1) >= 0;
9974 if (GET_CODE (op0) != REG)
9975 return 0;
9976 if (sparc_check_64 (op0, insn) == 1)
9977 return 1;
9978 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9980 case IOR:
9981 case XOR:
9983 rtx op0 = XEXP (SET_SRC (pat), 0);
9984 rtx op1 = XEXP (SET_SRC (pat), 1);
9985 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9986 return 0;
9987 if (GET_CODE (op1) == CONST_INT)
9988 return INTVAL (op1) >= 0;
9989 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9991 case LSHIFTRT:
9992 return GET_MODE (SET_SRC (pat)) == SImode;
9993 /* Positive integers leave the high bits zero. */
9994 case CONST_INT:
9995 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
9996 case ASHIFTRT:
9997 case SIGN_EXTEND:
9998 return - (GET_MODE (SET_SRC (pat)) == SImode);
9999 case REG:
10000 return sparc_check_64 (SET_SRC (pat), insn);
10001 default:
10002 return 0;
10006 /* We _ought_ to have only one kind per function, but... */
10007 static GTY(()) rtx sparc_addr_diff_list;
10008 static GTY(()) rtx sparc_addr_list;
10010 void
10011 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10013 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10014 if (diff)
10015 sparc_addr_diff_list
10016 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10017 else
10018 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10021 static void
10022 sparc_output_addr_vec (rtx vec)
10024 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10025 int idx, vlen = XVECLEN (body, 0);
10027 #ifdef ASM_OUTPUT_ADDR_VEC_START
10028 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10029 #endif
10031 #ifdef ASM_OUTPUT_CASE_LABEL
10032 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10033 NEXT_INSN (lab));
10034 #else
10035 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10036 #endif
10038 for (idx = 0; idx < vlen; idx++)
10040 ASM_OUTPUT_ADDR_VEC_ELT
10041 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10044 #ifdef ASM_OUTPUT_ADDR_VEC_END
10045 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10046 #endif
10049 static void
10050 sparc_output_addr_diff_vec (rtx vec)
10052 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10053 rtx base = XEXP (XEXP (body, 0), 0);
10054 int idx, vlen = XVECLEN (body, 1);
10056 #ifdef ASM_OUTPUT_ADDR_VEC_START
10057 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10058 #endif
10060 #ifdef ASM_OUTPUT_CASE_LABEL
10061 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10062 NEXT_INSN (lab));
10063 #else
10064 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10065 #endif
10067 for (idx = 0; idx < vlen; idx++)
10069 ASM_OUTPUT_ADDR_DIFF_ELT
10070 (asm_out_file,
10071 body,
10072 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10073 CODE_LABEL_NUMBER (base));
10076 #ifdef ASM_OUTPUT_ADDR_VEC_END
10077 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10078 #endif
10081 static void
10082 sparc_output_deferred_case_vectors (void)
10084 rtx t;
10085 int align;
10087 if (sparc_addr_list == NULL_RTX
10088 && sparc_addr_diff_list == NULL_RTX)
10089 return;
10091 /* Align to cache line in the function's code section. */
10092 switch_to_section (current_function_section ());
10094 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10095 if (align > 0)
10096 ASM_OUTPUT_ALIGN (asm_out_file, align);
10098 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10099 sparc_output_addr_vec (XEXP (t, 0));
10100 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10101 sparc_output_addr_diff_vec (XEXP (t, 0));
10103 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10106 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10107 unknown. Return 1 if the high bits are zero, -1 if the register is
10108 sign extended. */
10110 sparc_check_64 (rtx x, rtx_insn *insn)
10112 /* If a register is set only once it is safe to ignore insns this
10113 code does not know how to handle. The loop will either recognize
10114 the single set and return the correct value or fail to recognize
10115 it and return 0. */
10116 int set_once = 0;
10117 rtx y = x;
10119 gcc_assert (GET_CODE (x) == REG);
10121 if (GET_MODE (x) == DImode)
10122 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10124 if (flag_expensive_optimizations
10125 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10126 set_once = 1;
10128 if (insn == 0)
10130 if (set_once)
10131 insn = get_last_insn_anywhere ();
10132 else
10133 return 0;
10136 while ((insn = PREV_INSN (insn)))
10138 switch (GET_CODE (insn))
10140 case JUMP_INSN:
10141 case NOTE:
10142 break;
10143 case CODE_LABEL:
10144 case CALL_INSN:
10145 default:
10146 if (! set_once)
10147 return 0;
10148 break;
10149 case INSN:
10151 rtx pat = PATTERN (insn);
10152 if (GET_CODE (pat) != SET)
10153 return 0;
10154 if (rtx_equal_p (x, SET_DEST (pat)))
10155 return set_extends (insn);
10156 if (y && rtx_equal_p (y, SET_DEST (pat)))
10157 return set_extends (insn);
10158 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10159 return 0;
10163 return 0;
10166 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10167 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10169 const char *
10170 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10172 static char asm_code[60];
10174 /* The scratch register is only required when the destination
10175 register is not a 64-bit global or out register. */
10176 if (which_alternative != 2)
10177 operands[3] = operands[0];
10179 /* We can only shift by constants <= 63. */
10180 if (GET_CODE (operands[2]) == CONST_INT)
10181 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10183 if (GET_CODE (operands[1]) == CONST_INT)
10185 output_asm_insn ("mov\t%1, %3", operands);
10187 else
10189 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10190 if (sparc_check_64 (operands[1], insn) <= 0)
10191 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10192 output_asm_insn ("or\t%L1, %3, %3", operands);
10195 strcpy (asm_code, opcode);
10197 if (which_alternative != 2)
10198 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10199 else
10200 return
10201 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10204 /* Output rtl to increment the profiler label LABELNO
10205 for profiling a function entry. */
10207 void
10208 sparc_profile_hook (int labelno)
10210 char buf[32];
10211 rtx lab, fun;
10213 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10214 if (NO_PROFILE_COUNTERS)
10216 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10218 else
10220 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10221 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10222 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10226 #ifdef TARGET_SOLARIS
10227 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10229 static void
10230 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10231 tree decl ATTRIBUTE_UNUSED)
10233 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10235 solaris_elf_asm_comdat_section (name, flags, decl);
10236 return;
10239 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10241 if (!(flags & SECTION_DEBUG))
10242 fputs (",#alloc", asm_out_file);
10243 if (flags & SECTION_WRITE)
10244 fputs (",#write", asm_out_file);
10245 if (flags & SECTION_TLS)
10246 fputs (",#tls", asm_out_file);
10247 if (flags & SECTION_CODE)
10248 fputs (",#execinstr", asm_out_file);
10250 if (flags & SECTION_NOTYPE)
10252 else if (flags & SECTION_BSS)
10253 fputs (",#nobits", asm_out_file);
10254 else
10255 fputs (",#progbits", asm_out_file);
10257 fputc ('\n', asm_out_file);
10259 #endif /* TARGET_SOLARIS */
10261 /* We do not allow indirect calls to be optimized into sibling calls.
10263 We cannot use sibling calls when delayed branches are disabled
10264 because they will likely require the call delay slot to be filled.
10266 Also, on SPARC 32-bit we cannot emit a sibling call when the
10267 current function returns a structure. This is because the "unimp
10268 after call" convention would cause the callee to return to the
10269 wrong place. The generic code already disallows cases where the
10270 function being called returns a structure.
10272 It may seem strange how this last case could occur. Usually there
10273 is code after the call which jumps to epilogue code which dumps the
10274 return value into the struct return area. That ought to invalidate
10275 the sibling call right? Well, in the C++ case we can end up passing
10276 the pointer to the struct return area to a constructor (which returns
10277 void) and then nothing else happens. Such a sibling call would look
10278 valid without the added check here.
10280 VxWorks PIC PLT entries require the global pointer to be initialized
10281 on entry. We therefore can't emit sibling calls to them. */
10282 static bool
10283 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10285 return (decl
10286 && flag_delayed_branch
10287 && (TARGET_ARCH64 || ! cfun->returns_struct)
10288 && !(TARGET_VXWORKS_RTP
10289 && flag_pic
10290 && !targetm.binds_local_p (decl)));
10293 /* libfunc renaming. */
10295 static void
10296 sparc_init_libfuncs (void)
10298 if (TARGET_ARCH32)
10300 /* Use the subroutines that Sun's library provides for integer
10301 multiply and divide. The `*' prevents an underscore from
10302 being prepended by the compiler. .umul is a little faster
10303 than .mul. */
10304 set_optab_libfunc (smul_optab, SImode, "*.umul");
10305 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10306 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10307 set_optab_libfunc (smod_optab, SImode, "*.rem");
10308 set_optab_libfunc (umod_optab, SImode, "*.urem");
10310 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10311 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10312 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10313 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10314 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10315 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10317 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10318 is because with soft-float, the SFmode and DFmode sqrt
10319 instructions will be absent, and the compiler will notice and
10320 try to use the TFmode sqrt instruction for calls to the
10321 builtin function sqrt, but this fails. */
10322 if (TARGET_FPU)
10323 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10325 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10326 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10327 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10328 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10329 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10330 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10332 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10333 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10334 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10335 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10337 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10338 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10339 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10340 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10342 if (DITF_CONVERSION_LIBFUNCS)
10344 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10345 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10346 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10347 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10350 if (SUN_CONVERSION_LIBFUNCS)
10352 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10353 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10354 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10355 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10358 if (TARGET_ARCH64)
10360 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10361 do not exist in the library. Make sure the compiler does not
10362 emit calls to them by accident. (It should always use the
10363 hardware instructions.) */
10364 set_optab_libfunc (smul_optab, SImode, 0);
10365 set_optab_libfunc (sdiv_optab, SImode, 0);
10366 set_optab_libfunc (udiv_optab, SImode, 0);
10367 set_optab_libfunc (smod_optab, SImode, 0);
10368 set_optab_libfunc (umod_optab, SImode, 0);
10370 if (SUN_INTEGER_MULTIPLY_64)
10372 set_optab_libfunc (smul_optab, DImode, "__mul64");
10373 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10374 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10375 set_optab_libfunc (smod_optab, DImode, "__rem64");
10376 set_optab_libfunc (umod_optab, DImode, "__urem64");
10379 if (SUN_CONVERSION_LIBFUNCS)
10381 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10382 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10383 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10384 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10389 /* SPARC builtins. */
10390 enum sparc_builtins
10392 /* FPU builtins. */
10393 SPARC_BUILTIN_LDFSR,
10394 SPARC_BUILTIN_STFSR,
10396 /* VIS 1.0 builtins. */
10397 SPARC_BUILTIN_FPACK16,
10398 SPARC_BUILTIN_FPACK32,
10399 SPARC_BUILTIN_FPACKFIX,
10400 SPARC_BUILTIN_FEXPAND,
10401 SPARC_BUILTIN_FPMERGE,
10402 SPARC_BUILTIN_FMUL8X16,
10403 SPARC_BUILTIN_FMUL8X16AU,
10404 SPARC_BUILTIN_FMUL8X16AL,
10405 SPARC_BUILTIN_FMUL8SUX16,
10406 SPARC_BUILTIN_FMUL8ULX16,
10407 SPARC_BUILTIN_FMULD8SUX16,
10408 SPARC_BUILTIN_FMULD8ULX16,
10409 SPARC_BUILTIN_FALIGNDATAV4HI,
10410 SPARC_BUILTIN_FALIGNDATAV8QI,
10411 SPARC_BUILTIN_FALIGNDATAV2SI,
10412 SPARC_BUILTIN_FALIGNDATADI,
10413 SPARC_BUILTIN_WRGSR,
10414 SPARC_BUILTIN_RDGSR,
10415 SPARC_BUILTIN_ALIGNADDR,
10416 SPARC_BUILTIN_ALIGNADDRL,
10417 SPARC_BUILTIN_PDIST,
10418 SPARC_BUILTIN_EDGE8,
10419 SPARC_BUILTIN_EDGE8L,
10420 SPARC_BUILTIN_EDGE16,
10421 SPARC_BUILTIN_EDGE16L,
10422 SPARC_BUILTIN_EDGE32,
10423 SPARC_BUILTIN_EDGE32L,
10424 SPARC_BUILTIN_FCMPLE16,
10425 SPARC_BUILTIN_FCMPLE32,
10426 SPARC_BUILTIN_FCMPNE16,
10427 SPARC_BUILTIN_FCMPNE32,
10428 SPARC_BUILTIN_FCMPGT16,
10429 SPARC_BUILTIN_FCMPGT32,
10430 SPARC_BUILTIN_FCMPEQ16,
10431 SPARC_BUILTIN_FCMPEQ32,
10432 SPARC_BUILTIN_FPADD16,
10433 SPARC_BUILTIN_FPADD16S,
10434 SPARC_BUILTIN_FPADD32,
10435 SPARC_BUILTIN_FPADD32S,
10436 SPARC_BUILTIN_FPSUB16,
10437 SPARC_BUILTIN_FPSUB16S,
10438 SPARC_BUILTIN_FPSUB32,
10439 SPARC_BUILTIN_FPSUB32S,
10440 SPARC_BUILTIN_ARRAY8,
10441 SPARC_BUILTIN_ARRAY16,
10442 SPARC_BUILTIN_ARRAY32,
10444 /* VIS 2.0 builtins. */
10445 SPARC_BUILTIN_EDGE8N,
10446 SPARC_BUILTIN_EDGE8LN,
10447 SPARC_BUILTIN_EDGE16N,
10448 SPARC_BUILTIN_EDGE16LN,
10449 SPARC_BUILTIN_EDGE32N,
10450 SPARC_BUILTIN_EDGE32LN,
10451 SPARC_BUILTIN_BMASK,
10452 SPARC_BUILTIN_BSHUFFLEV4HI,
10453 SPARC_BUILTIN_BSHUFFLEV8QI,
10454 SPARC_BUILTIN_BSHUFFLEV2SI,
10455 SPARC_BUILTIN_BSHUFFLEDI,
10457 /* VIS 3.0 builtins. */
10458 SPARC_BUILTIN_CMASK8,
10459 SPARC_BUILTIN_CMASK16,
10460 SPARC_BUILTIN_CMASK32,
10461 SPARC_BUILTIN_FCHKSM16,
10462 SPARC_BUILTIN_FSLL16,
10463 SPARC_BUILTIN_FSLAS16,
10464 SPARC_BUILTIN_FSRL16,
10465 SPARC_BUILTIN_FSRA16,
10466 SPARC_BUILTIN_FSLL32,
10467 SPARC_BUILTIN_FSLAS32,
10468 SPARC_BUILTIN_FSRL32,
10469 SPARC_BUILTIN_FSRA32,
10470 SPARC_BUILTIN_PDISTN,
10471 SPARC_BUILTIN_FMEAN16,
10472 SPARC_BUILTIN_FPADD64,
10473 SPARC_BUILTIN_FPSUB64,
10474 SPARC_BUILTIN_FPADDS16,
10475 SPARC_BUILTIN_FPADDS16S,
10476 SPARC_BUILTIN_FPSUBS16,
10477 SPARC_BUILTIN_FPSUBS16S,
10478 SPARC_BUILTIN_FPADDS32,
10479 SPARC_BUILTIN_FPADDS32S,
10480 SPARC_BUILTIN_FPSUBS32,
10481 SPARC_BUILTIN_FPSUBS32S,
10482 SPARC_BUILTIN_FUCMPLE8,
10483 SPARC_BUILTIN_FUCMPNE8,
10484 SPARC_BUILTIN_FUCMPGT8,
10485 SPARC_BUILTIN_FUCMPEQ8,
10486 SPARC_BUILTIN_FHADDS,
10487 SPARC_BUILTIN_FHADDD,
10488 SPARC_BUILTIN_FHSUBS,
10489 SPARC_BUILTIN_FHSUBD,
10490 SPARC_BUILTIN_FNHADDS,
10491 SPARC_BUILTIN_FNHADDD,
10492 SPARC_BUILTIN_UMULXHI,
10493 SPARC_BUILTIN_XMULX,
10494 SPARC_BUILTIN_XMULXHI,
10496 /* VIS 4.0 builtins. */
10497 SPARC_BUILTIN_FPADD8,
10498 SPARC_BUILTIN_FPADDS8,
10499 SPARC_BUILTIN_FPADDUS8,
10500 SPARC_BUILTIN_FPADDUS16,
10501 SPARC_BUILTIN_FPCMPLE8,
10502 SPARC_BUILTIN_FPCMPGT8,
10503 SPARC_BUILTIN_FPCMPULE16,
10504 SPARC_BUILTIN_FPCMPUGT16,
10505 SPARC_BUILTIN_FPCMPULE32,
10506 SPARC_BUILTIN_FPCMPUGT32,
10507 SPARC_BUILTIN_FPMAX8,
10508 SPARC_BUILTIN_FPMAX16,
10509 SPARC_BUILTIN_FPMAX32,
10510 SPARC_BUILTIN_FPMAXU8,
10511 SPARC_BUILTIN_FPMAXU16,
10512 SPARC_BUILTIN_FPMAXU32,
10513 SPARC_BUILTIN_FPMIN8,
10514 SPARC_BUILTIN_FPMIN16,
10515 SPARC_BUILTIN_FPMIN32,
10516 SPARC_BUILTIN_FPMINU8,
10517 SPARC_BUILTIN_FPMINU16,
10518 SPARC_BUILTIN_FPMINU32,
10519 SPARC_BUILTIN_FPSUB8,
10520 SPARC_BUILTIN_FPSUBS8,
10521 SPARC_BUILTIN_FPSUBUS8,
10522 SPARC_BUILTIN_FPSUBUS16,
10524 /* VIS 4.0B builtins. */
10526 /* Note that all the DICTUNPACK* entries should be kept
10527 contiguous. */
10528 SPARC_BUILTIN_FIRST_DICTUNPACK,
10529 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10530 SPARC_BUILTIN_DICTUNPACK16,
10531 SPARC_BUILTIN_DICTUNPACK32,
10532 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10534 /* Note that all the FPCMP*SHL entries should be kept
10535 contiguous. */
10536 SPARC_BUILTIN_FIRST_FPCMPSHL,
10537 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10538 SPARC_BUILTIN_FPCMPGT8SHL,
10539 SPARC_BUILTIN_FPCMPEQ8SHL,
10540 SPARC_BUILTIN_FPCMPNE8SHL,
10541 SPARC_BUILTIN_FPCMPLE16SHL,
10542 SPARC_BUILTIN_FPCMPGT16SHL,
10543 SPARC_BUILTIN_FPCMPEQ16SHL,
10544 SPARC_BUILTIN_FPCMPNE16SHL,
10545 SPARC_BUILTIN_FPCMPLE32SHL,
10546 SPARC_BUILTIN_FPCMPGT32SHL,
10547 SPARC_BUILTIN_FPCMPEQ32SHL,
10548 SPARC_BUILTIN_FPCMPNE32SHL,
10549 SPARC_BUILTIN_FPCMPULE8SHL,
10550 SPARC_BUILTIN_FPCMPUGT8SHL,
10551 SPARC_BUILTIN_FPCMPULE16SHL,
10552 SPARC_BUILTIN_FPCMPUGT16SHL,
10553 SPARC_BUILTIN_FPCMPULE32SHL,
10554 SPARC_BUILTIN_FPCMPUGT32SHL,
10555 SPARC_BUILTIN_FPCMPDE8SHL,
10556 SPARC_BUILTIN_FPCMPDE16SHL,
10557 SPARC_BUILTIN_FPCMPDE32SHL,
10558 SPARC_BUILTIN_FPCMPUR8SHL,
10559 SPARC_BUILTIN_FPCMPUR16SHL,
10560 SPARC_BUILTIN_FPCMPUR32SHL,
10561 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
10563 SPARC_BUILTIN_MAX
10566 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10567 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10569 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
10570 The instruction should require a constant operand of some sort. The
10571 function prints an error if OPVAL is not valid. */
10573 static int
10574 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
10576 if (GET_CODE (opval) != CONST_INT)
10578 error ("%qs expects a constant argument", insn_data[icode].name);
10579 return false;
10582 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
10584 error ("constant argument out of range for %qs", insn_data[icode].name);
10585 return false;
10587 return true;
10590 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10591 function decl or NULL_TREE if the builtin was not added. */
10593 static tree
10594 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10595 tree type)
10597 tree t
10598 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10600 if (t)
10602 sparc_builtins[code] = t;
10603 sparc_builtins_icode[code] = icode;
10606 return t;
10609 /* Likewise, but also marks the function as "const". */
10611 static tree
10612 def_builtin_const (const char *name, enum insn_code icode,
10613 enum sparc_builtins code, tree type)
10615 tree t = def_builtin (name, icode, code, type);
10617 if (t)
10618 TREE_READONLY (t) = 1;
10620 return t;
10623 /* Implement the TARGET_INIT_BUILTINS target hook.
10624 Create builtin functions for special SPARC instructions. */
10626 static void
10627 sparc_init_builtins (void)
10629 if (TARGET_FPU)
10630 sparc_fpu_init_builtins ();
10632 if (TARGET_VIS)
10633 sparc_vis_init_builtins ();
10636 /* Create builtin functions for FPU instructions. */
10638 static void
10639 sparc_fpu_init_builtins (void)
10641 tree ftype
10642 = build_function_type_list (void_type_node,
10643 build_pointer_type (unsigned_type_node), 0);
10644 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10645 SPARC_BUILTIN_LDFSR, ftype);
10646 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10647 SPARC_BUILTIN_STFSR, ftype);
10650 /* Create builtin functions for VIS instructions. */
10652 static void
10653 sparc_vis_init_builtins (void)
10655 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10656 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10657 tree v4hi = build_vector_type (intHI_type_node, 4);
10658 tree v2hi = build_vector_type (intHI_type_node, 2);
10659 tree v2si = build_vector_type (intSI_type_node, 2);
10660 tree v1si = build_vector_type (intSI_type_node, 1);
10662 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10663 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10664 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10665 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10666 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10667 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10668 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10669 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10670 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10671 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10672 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10673 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10674 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10675 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10676 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10677 v8qi, v8qi,
10678 intDI_type_node, 0);
10679 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10680 v8qi, v8qi, 0);
10681 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10682 v8qi, v8qi, 0);
10683 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
10684 intSI_type_node, 0);
10685 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
10686 intSI_type_node, 0);
10687 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
10688 intDI_type_node, 0);
10689 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10690 intDI_type_node,
10691 intDI_type_node, 0);
10692 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10693 intSI_type_node,
10694 intSI_type_node, 0);
10695 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10696 ptr_type_node,
10697 intSI_type_node, 0);
10698 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10699 ptr_type_node,
10700 intDI_type_node, 0);
10701 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10702 ptr_type_node,
10703 ptr_type_node, 0);
10704 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10705 ptr_type_node,
10706 ptr_type_node, 0);
10707 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10708 v4hi, v4hi, 0);
10709 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10710 v2si, v2si, 0);
10711 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10712 v4hi, v4hi, 0);
10713 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10714 v2si, v2si, 0);
10715 tree void_ftype_di = build_function_type_list (void_type_node,
10716 intDI_type_node, 0);
10717 tree di_ftype_void = build_function_type_list (intDI_type_node,
10718 void_type_node, 0);
10719 tree void_ftype_si = build_function_type_list (void_type_node,
10720 intSI_type_node, 0);
10721 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10722 float_type_node,
10723 float_type_node, 0);
10724 tree df_ftype_df_df = build_function_type_list (double_type_node,
10725 double_type_node,
10726 double_type_node, 0);
10728 /* Packing and expanding vectors. */
10729 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10730 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10731 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10732 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10733 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10734 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10735 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10736 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10737 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10738 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10740 /* Multiplications. */
10741 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10742 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10743 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10744 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10745 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10746 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10747 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10748 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10749 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10750 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10751 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10752 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10753 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10754 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10756 /* Data aligning. */
10757 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10758 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10759 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10760 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10761 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10762 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10763 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10764 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10766 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10767 SPARC_BUILTIN_WRGSR, void_ftype_di);
10768 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10769 SPARC_BUILTIN_RDGSR, di_ftype_void);
10771 if (TARGET_ARCH64)
10773 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10774 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10775 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10776 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10778 else
10780 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10781 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10782 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10783 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10786 /* Pixel distance. */
10787 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10788 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10790 /* Edge handling. */
10791 if (TARGET_ARCH64)
10793 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10794 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10795 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10796 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10797 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10798 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10799 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10800 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10801 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10802 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10803 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10804 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10806 else
10808 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10809 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10810 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10811 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10812 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10813 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10814 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10815 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10816 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10817 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10818 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10819 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10822 /* Pixel compare. */
10823 if (TARGET_ARCH64)
10825 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10826 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10827 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10828 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10829 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10830 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10831 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10832 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10833 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10834 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10835 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10836 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10837 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10838 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10839 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10840 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10842 else
10844 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10845 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10846 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10847 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10848 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10849 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10850 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10851 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10852 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10853 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10854 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10855 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10856 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10857 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10858 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10859 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10862 /* Addition and subtraction. */
10863 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10864 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10865 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10866 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10867 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10868 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10869 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10870 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10871 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10872 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10873 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10874 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10875 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10876 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10877 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10878 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10880 /* Three-dimensional array addressing. */
10881 if (TARGET_ARCH64)
10883 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10884 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10885 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10886 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10887 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10888 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10890 else
10892 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10893 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10894 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10895 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10896 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10897 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10900 if (TARGET_VIS2)
10902 /* Edge handling. */
10903 if (TARGET_ARCH64)
10905 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10906 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10907 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10908 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10909 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10910 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10911 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10912 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10913 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10914 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10915 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10916 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10918 else
10920 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10921 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10922 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10923 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10924 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10925 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10926 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10927 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10928 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10929 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10930 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10931 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10934 /* Byte mask and shuffle. */
10935 if (TARGET_ARCH64)
10936 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10937 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10938 else
10939 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10940 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10941 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10942 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10943 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10944 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10945 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10946 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10947 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10948 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10951 if (TARGET_VIS3)
10953 if (TARGET_ARCH64)
10955 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10956 SPARC_BUILTIN_CMASK8, void_ftype_di);
10957 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10958 SPARC_BUILTIN_CMASK16, void_ftype_di);
10959 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10960 SPARC_BUILTIN_CMASK32, void_ftype_di);
10962 else
10964 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10965 SPARC_BUILTIN_CMASK8, void_ftype_si);
10966 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10967 SPARC_BUILTIN_CMASK16, void_ftype_si);
10968 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10969 SPARC_BUILTIN_CMASK32, void_ftype_si);
10972 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10973 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
10975 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10976 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
10977 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10978 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
10979 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10980 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
10981 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10982 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
10983 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10984 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
10985 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10986 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
10987 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10988 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
10989 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10990 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
10992 if (TARGET_ARCH64)
10993 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10994 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
10995 else
10996 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10997 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
10999 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11000 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11001 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11002 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11003 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11004 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11006 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11007 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11008 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11009 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11010 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11011 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11012 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11013 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11014 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11015 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11016 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11017 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11018 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11019 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11020 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11021 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11023 if (TARGET_ARCH64)
11025 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11026 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11027 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11028 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11029 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11030 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11031 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11032 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11034 else
11036 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11037 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11038 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11039 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11040 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11041 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11042 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11043 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11046 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11047 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11048 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11049 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11050 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11051 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11052 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11053 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11054 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11055 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11056 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11057 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11059 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11060 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11061 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11062 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11063 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11064 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11067 if (TARGET_VIS4)
11069 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11070 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11071 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11072 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11073 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11074 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11075 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11076 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11079 if (TARGET_ARCH64)
11081 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11082 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11083 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11084 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11085 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11086 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11087 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11088 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11089 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11090 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11091 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11092 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11094 else
11096 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11097 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11098 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11099 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11100 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11101 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11102 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11103 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11104 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11105 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11106 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11107 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11110 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11111 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11112 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11113 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11114 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11115 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11116 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11117 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11118 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11119 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11120 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11121 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11122 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11123 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11124 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11125 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11126 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11127 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11128 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11129 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11130 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11131 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11132 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11133 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11134 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11135 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11136 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11137 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11138 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11139 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11140 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11141 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11144 if (TARGET_VIS4B)
11146 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11147 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11148 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11149 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11150 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11151 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11153 if (TARGET_ARCH64)
11155 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11156 v8qi, v8qi,
11157 intSI_type_node, 0);
11158 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11159 v4hi, v4hi,
11160 intSI_type_node, 0);
11161 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11162 v2si, v2si,
11163 intSI_type_node, 0);
11165 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11166 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11167 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11168 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11169 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11170 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11171 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11172 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11174 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11175 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11176 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11177 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11178 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11179 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11180 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11181 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11183 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11184 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11185 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11186 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11187 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11188 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11189 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11190 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11193 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11194 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11195 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11196 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11198 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11199 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11200 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11201 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11203 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11204 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11205 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11206 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11208 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11209 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11210 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11211 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11212 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11213 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11215 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11216 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11217 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11218 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11219 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11220 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11223 else
11225 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11226 v8qi, v8qi,
11227 intSI_type_node, 0);
11228 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11229 v4hi, v4hi,
11230 intSI_type_node, 0);
11231 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11232 v2si, v2si,
11233 intSI_type_node, 0);
11235 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11236 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11237 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11238 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11239 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11240 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11241 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11242 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11244 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11245 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11246 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11247 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11248 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11249 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11250 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11251 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11253 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11254 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11255 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11256 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11257 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11258 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11259 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11260 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11263 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11264 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11265 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11266 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11268 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11269 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11270 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11271 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11273 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11274 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11275 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11276 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11278 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11279 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11280 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11281 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11282 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11283 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11285 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11286 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11287 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11288 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11289 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11290 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11295 /* Implement TARGET_BUILTIN_DECL hook. */
11297 static tree
11298 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11300 if (code >= SPARC_BUILTIN_MAX)
11301 return error_mark_node;
11303 return sparc_builtins[code];
11306 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11308 static rtx
11309 sparc_expand_builtin (tree exp, rtx target,
11310 rtx subtarget ATTRIBUTE_UNUSED,
11311 machine_mode tmode ATTRIBUTE_UNUSED,
11312 int ignore ATTRIBUTE_UNUSED)
11314 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11315 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11316 enum insn_code icode = sparc_builtins_icode[code];
11317 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11318 call_expr_arg_iterator iter;
11319 int arg_count = 0;
11320 rtx pat, op[4];
11321 tree arg;
11323 if (nonvoid)
11325 machine_mode tmode = insn_data[icode].operand[0].mode;
11326 if (!target
11327 || GET_MODE (target) != tmode
11328 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11329 op[0] = gen_reg_rtx (tmode);
11330 else
11331 op[0] = target;
11334 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11336 const struct insn_operand_data *insn_op;
11337 int idx;
11339 if (arg == error_mark_node)
11340 return NULL_RTX;
11342 arg_count++;
11343 idx = arg_count - !nonvoid;
11344 insn_op = &insn_data[icode].operand[idx];
11345 op[arg_count] = expand_normal (arg);
11347 /* Some of the builtins require constant arguments. We check
11348 for this here. */
11349 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11350 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11351 && arg_count == 3)
11352 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11353 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11354 && arg_count == 2))
11356 if (!check_constant_argument (icode, idx, op[arg_count]))
11357 return const0_rtx;
11360 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11362 if (!address_operand (op[arg_count], SImode))
11364 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11365 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11367 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11370 else if (insn_op->mode == V1DImode
11371 && GET_MODE (op[arg_count]) == DImode)
11372 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11374 else if (insn_op->mode == V1SImode
11375 && GET_MODE (op[arg_count]) == SImode)
11376 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11378 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11379 insn_op->mode))
11380 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11383 switch (arg_count)
11385 case 0:
11386 pat = GEN_FCN (icode) (op[0]);
11387 break;
11388 case 1:
11389 if (nonvoid)
11390 pat = GEN_FCN (icode) (op[0], op[1]);
11391 else
11392 pat = GEN_FCN (icode) (op[1]);
11393 break;
11394 case 2:
11395 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11396 break;
11397 case 3:
11398 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11399 break;
11400 default:
11401 gcc_unreachable ();
11404 if (!pat)
11405 return NULL_RTX;
11407 emit_insn (pat);
11409 return (nonvoid ? op[0] : const0_rtx);
11412 /* Return the upper 16 bits of the 8x16 multiplication. */
11414 static int
11415 sparc_vis_mul8x16 (int e8, int e16)
11417 return (e8 * e16 + 128) / 256;
11420 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11421 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11423 static void
11424 sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode,
11425 tree inner_type, tree cst0, tree cst1)
11427 unsigned i, num = VECTOR_CST_NELTS (cst0);
11428 int scale;
11430 switch (fncode)
11432 case SPARC_BUILTIN_FMUL8X16:
11433 for (i = 0; i < num; ++i)
11435 int val
11436 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11437 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11438 n_elts[i] = build_int_cst (inner_type, val);
11440 break;
11442 case SPARC_BUILTIN_FMUL8X16AU:
11443 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11445 for (i = 0; i < num; ++i)
11447 int val
11448 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11449 scale);
11450 n_elts[i] = build_int_cst (inner_type, val);
11452 break;
11454 case SPARC_BUILTIN_FMUL8X16AL:
11455 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11457 for (i = 0; i < num; ++i)
11459 int val
11460 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11461 scale);
11462 n_elts[i] = build_int_cst (inner_type, val);
11464 break;
11466 default:
11467 gcc_unreachable ();
11471 /* Implement TARGET_FOLD_BUILTIN hook.
11473 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11474 result of the function call is ignored. NULL_TREE is returned if the
11475 function could not be folded. */
11477 static tree
11478 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11479 tree *args, bool ignore)
11481 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11482 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11483 tree arg0, arg1, arg2;
11485 if (ignore)
11486 switch (code)
11488 case SPARC_BUILTIN_LDFSR:
11489 case SPARC_BUILTIN_STFSR:
11490 case SPARC_BUILTIN_ALIGNADDR:
11491 case SPARC_BUILTIN_WRGSR:
11492 case SPARC_BUILTIN_BMASK:
11493 case SPARC_BUILTIN_CMASK8:
11494 case SPARC_BUILTIN_CMASK16:
11495 case SPARC_BUILTIN_CMASK32:
11496 break;
11498 default:
11499 return build_zero_cst (rtype);
11502 switch (code)
11504 case SPARC_BUILTIN_FEXPAND:
11505 arg0 = args[0];
11506 STRIP_NOPS (arg0);
11508 if (TREE_CODE (arg0) == VECTOR_CST)
11510 tree inner_type = TREE_TYPE (rtype);
11511 tree *n_elts;
11512 unsigned i;
11514 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
11515 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11516 n_elts[i] = build_int_cst (inner_type,
11517 TREE_INT_CST_LOW
11518 (VECTOR_CST_ELT (arg0, i)) << 4);
11519 return build_vector (rtype, n_elts);
11521 break;
11523 case SPARC_BUILTIN_FMUL8X16:
11524 case SPARC_BUILTIN_FMUL8X16AU:
11525 case SPARC_BUILTIN_FMUL8X16AL:
11526 arg0 = args[0];
11527 arg1 = args[1];
11528 STRIP_NOPS (arg0);
11529 STRIP_NOPS (arg1);
11531 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11533 tree inner_type = TREE_TYPE (rtype);
11534 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
11535 sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1);
11536 return build_vector (rtype, n_elts);
11538 break;
11540 case SPARC_BUILTIN_FPMERGE:
11541 arg0 = args[0];
11542 arg1 = args[1];
11543 STRIP_NOPS (arg0);
11544 STRIP_NOPS (arg1);
11546 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11548 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
11549 unsigned i;
11550 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11552 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
11553 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
11556 return build_vector (rtype, n_elts);
11558 break;
11560 case SPARC_BUILTIN_PDIST:
11561 case SPARC_BUILTIN_PDISTN:
11562 arg0 = args[0];
11563 arg1 = args[1];
11564 STRIP_NOPS (arg0);
11565 STRIP_NOPS (arg1);
11566 if (code == SPARC_BUILTIN_PDIST)
11568 arg2 = args[2];
11569 STRIP_NOPS (arg2);
11571 else
11572 arg2 = integer_zero_node;
11574 if (TREE_CODE (arg0) == VECTOR_CST
11575 && TREE_CODE (arg1) == VECTOR_CST
11576 && TREE_CODE (arg2) == INTEGER_CST)
11578 bool overflow = false;
11579 widest_int result = wi::to_widest (arg2);
11580 widest_int tmp;
11581 unsigned i;
11583 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11585 tree e0 = VECTOR_CST_ELT (arg0, i);
11586 tree e1 = VECTOR_CST_ELT (arg1, i);
11588 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11590 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11591 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11592 if (wi::neg_p (tmp))
11593 tmp = wi::neg (tmp, &neg2_ovf);
11594 else
11595 neg2_ovf = false;
11596 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11597 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
11600 gcc_assert (!overflow);
11602 return wide_int_to_tree (rtype, result);
11605 default:
11606 break;
11609 return NULL_TREE;
11612 /* ??? This duplicates information provided to the compiler by the
11613 ??? scheduler description. Some day, teach genautomata to output
11614 ??? the latencies and then CSE will just use that. */
11616 static bool
11617 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11618 int opno ATTRIBUTE_UNUSED,
11619 int *total, bool speed ATTRIBUTE_UNUSED)
11621 int code = GET_CODE (x);
11622 bool float_mode_p = FLOAT_MODE_P (mode);
11624 switch (code)
11626 case CONST_INT:
11627 if (SMALL_INT (x))
11628 *total = 0;
11629 else
11630 *total = 2;
11631 return true;
11633 case CONST_WIDE_INT:
11634 *total = 0;
11635 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11636 *total += 2;
11637 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11638 *total += 2;
11639 return true;
11641 case HIGH:
11642 *total = 2;
11643 return true;
11645 case CONST:
11646 case LABEL_REF:
11647 case SYMBOL_REF:
11648 *total = 4;
11649 return true;
11651 case CONST_DOUBLE:
11652 *total = 8;
11653 return true;
11655 case MEM:
11656 /* If outer-code was a sign or zero extension, a cost
11657 of COSTS_N_INSNS (1) was already added in. This is
11658 why we are subtracting it back out. */
11659 if (outer_code == ZERO_EXTEND)
11661 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11663 else if (outer_code == SIGN_EXTEND)
11665 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11667 else if (float_mode_p)
11669 *total = sparc_costs->float_load;
11671 else
11673 *total = sparc_costs->int_load;
11676 return true;
11678 case PLUS:
11679 case MINUS:
11680 if (float_mode_p)
11681 *total = sparc_costs->float_plusminus;
11682 else
11683 *total = COSTS_N_INSNS (1);
11684 return false;
11686 case FMA:
11688 rtx sub;
11690 gcc_assert (float_mode_p);
11691 *total = sparc_costs->float_mul;
11693 sub = XEXP (x, 0);
11694 if (GET_CODE (sub) == NEG)
11695 sub = XEXP (sub, 0);
11696 *total += rtx_cost (sub, mode, FMA, 0, speed);
11698 sub = XEXP (x, 2);
11699 if (GET_CODE (sub) == NEG)
11700 sub = XEXP (sub, 0);
11701 *total += rtx_cost (sub, mode, FMA, 2, speed);
11702 return true;
11705 case MULT:
11706 if (float_mode_p)
11707 *total = sparc_costs->float_mul;
11708 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11709 *total = COSTS_N_INSNS (25);
11710 else
11712 int bit_cost;
11714 bit_cost = 0;
11715 if (sparc_costs->int_mul_bit_factor)
11717 int nbits;
11719 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11721 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11722 for (nbits = 0; value != 0; value &= value - 1)
11723 nbits++;
11725 else
11726 nbits = 7;
11728 if (nbits < 3)
11729 nbits = 3;
11730 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11731 bit_cost = COSTS_N_INSNS (bit_cost);
11734 if (mode == DImode || !TARGET_HARD_MUL)
11735 *total = sparc_costs->int_mulX + bit_cost;
11736 else
11737 *total = sparc_costs->int_mul + bit_cost;
11739 return false;
11741 case ASHIFT:
11742 case ASHIFTRT:
11743 case LSHIFTRT:
11744 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11745 return false;
11747 case DIV:
11748 case UDIV:
11749 case MOD:
11750 case UMOD:
11751 if (float_mode_p)
11753 if (mode == DFmode)
11754 *total = sparc_costs->float_div_df;
11755 else
11756 *total = sparc_costs->float_div_sf;
11758 else
11760 if (mode == DImode)
11761 *total = sparc_costs->int_divX;
11762 else
11763 *total = sparc_costs->int_div;
11765 return false;
11767 case NEG:
11768 if (! float_mode_p)
11770 *total = COSTS_N_INSNS (1);
11771 return false;
11773 /* FALLTHRU */
11775 case ABS:
11776 case FLOAT:
11777 case UNSIGNED_FLOAT:
11778 case FIX:
11779 case UNSIGNED_FIX:
11780 case FLOAT_EXTEND:
11781 case FLOAT_TRUNCATE:
11782 *total = sparc_costs->float_move;
11783 return false;
11785 case SQRT:
11786 if (mode == DFmode)
11787 *total = sparc_costs->float_sqrt_df;
11788 else
11789 *total = sparc_costs->float_sqrt_sf;
11790 return false;
11792 case COMPARE:
11793 if (float_mode_p)
11794 *total = sparc_costs->float_cmp;
11795 else
11796 *total = COSTS_N_INSNS (1);
11797 return false;
11799 case IF_THEN_ELSE:
11800 if (float_mode_p)
11801 *total = sparc_costs->float_cmove;
11802 else
11803 *total = sparc_costs->int_cmove;
11804 return false;
11806 case IOR:
11807 /* Handle the NAND vector patterns. */
11808 if (sparc_vector_mode_supported_p (mode)
11809 && GET_CODE (XEXP (x, 0)) == NOT
11810 && GET_CODE (XEXP (x, 1)) == NOT)
11812 *total = COSTS_N_INSNS (1);
11813 return true;
11815 else
11816 return false;
11818 default:
11819 return false;
11823 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11825 static inline bool
11826 general_or_i64_p (reg_class_t rclass)
11828 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11831 /* Implement TARGET_REGISTER_MOVE_COST. */
11833 static int
11834 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11835 reg_class_t from, reg_class_t to)
11837 bool need_memory = false;
11839 /* This helps postreload CSE to eliminate redundant comparisons. */
11840 if (from == NO_REGS || to == NO_REGS)
11841 return 100;
11843 if (from == FPCC_REGS || to == FPCC_REGS)
11844 need_memory = true;
11845 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11846 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11848 if (TARGET_VIS3)
11850 int size = GET_MODE_SIZE (mode);
11851 if (size == 8 || size == 4)
11853 if (! TARGET_ARCH32 || size == 4)
11854 return 4;
11855 else
11856 return 6;
11859 need_memory = true;
11862 if (need_memory)
11864 if (sparc_cpu == PROCESSOR_ULTRASPARC
11865 || sparc_cpu == PROCESSOR_ULTRASPARC3
11866 || sparc_cpu == PROCESSOR_NIAGARA
11867 || sparc_cpu == PROCESSOR_NIAGARA2
11868 || sparc_cpu == PROCESSOR_NIAGARA3
11869 || sparc_cpu == PROCESSOR_NIAGARA4
11870 || sparc_cpu == PROCESSOR_NIAGARA7
11871 || sparc_cpu == PROCESSOR_M8)
11872 return 12;
11874 return 6;
11877 return 2;
11880 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11881 This is achieved by means of a manual dynamic stack space allocation in
11882 the current frame. We make the assumption that SEQ doesn't contain any
11883 function calls, with the possible exception of calls to the GOT helper. */
11885 static void
11886 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11888 /* We must preserve the lowest 16 words for the register save area. */
11889 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11890 /* We really need only 2 words of fresh stack space. */
11891 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11893 rtx slot
11894 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11895 SPARC_STACK_BIAS + offset));
11897 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11898 emit_insn (gen_rtx_SET (slot, reg));
11899 if (reg2)
11900 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
11901 reg2));
11902 emit_insn (seq);
11903 if (reg2)
11904 emit_insn (gen_rtx_SET (reg2,
11905 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11906 emit_insn (gen_rtx_SET (reg, slot));
11907 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11910 /* Output the assembler code for a thunk function. THUNK_DECL is the
11911 declaration for the thunk function itself, FUNCTION is the decl for
11912 the target function. DELTA is an immediate constant offset to be
11913 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11914 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11916 static void
11917 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11918 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11919 tree function)
11921 rtx this_rtx, funexp;
11922 rtx_insn *insn;
11923 unsigned int int_arg_first;
11925 reload_completed = 1;
11926 epilogue_completed = 1;
11928 emit_note (NOTE_INSN_PROLOGUE_END);
11930 if (TARGET_FLAT)
11932 sparc_leaf_function_p = 1;
11934 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11936 else if (flag_delayed_branch)
11938 /* We will emit a regular sibcall below, so we need to instruct
11939 output_sibcall that we are in a leaf function. */
11940 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11942 /* This will cause final.c to invoke leaf_renumber_regs so we
11943 must behave as if we were in a not-yet-leafified function. */
11944 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11946 else
11948 /* We will emit the sibcall manually below, so we will need to
11949 manually spill non-leaf registers. */
11950 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11952 /* We really are in a leaf function. */
11953 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11956 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11957 returns a structure, the structure return pointer is there instead. */
11958 if (TARGET_ARCH64
11959 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11960 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11961 else
11962 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11964 /* Add DELTA. When possible use a plain add, otherwise load it into
11965 a register first. */
11966 if (delta)
11968 rtx delta_rtx = GEN_INT (delta);
11970 if (! SPARC_SIMM13_P (delta))
11972 rtx scratch = gen_rtx_REG (Pmode, 1);
11973 emit_move_insn (scratch, delta_rtx);
11974 delta_rtx = scratch;
11977 /* THIS_RTX += DELTA. */
11978 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11981 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
11982 if (vcall_offset)
11984 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11985 rtx scratch = gen_rtx_REG (Pmode, 1);
11987 gcc_assert (vcall_offset < 0);
11989 /* SCRATCH = *THIS_RTX. */
11990 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11992 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11993 may not have any available scratch register at this point. */
11994 if (SPARC_SIMM13_P (vcall_offset))
11996 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11997 else if (! fixed_regs[5]
11998 /* The below sequence is made up of at least 2 insns,
11999 while the default method may need only one. */
12000 && vcall_offset < -8192)
12002 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12003 emit_move_insn (scratch2, vcall_offset_rtx);
12004 vcall_offset_rtx = scratch2;
12006 else
12008 rtx increment = GEN_INT (-4096);
12010 /* VCALL_OFFSET is a negative number whose typical range can be
12011 estimated as -32768..0 in 32-bit mode. In almost all cases
12012 it is therefore cheaper to emit multiple add insns than
12013 spilling and loading the constant into a register (at least
12014 6 insns). */
12015 while (! SPARC_SIMM13_P (vcall_offset))
12017 emit_insn (gen_add2_insn (scratch, increment));
12018 vcall_offset += 4096;
12020 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12023 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12024 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12025 gen_rtx_PLUS (Pmode,
12026 scratch,
12027 vcall_offset_rtx)));
12029 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12030 emit_insn (gen_add2_insn (this_rtx, scratch));
12033 /* Generate a tail call to the target function. */
12034 if (! TREE_USED (function))
12036 assemble_external (function);
12037 TREE_USED (function) = 1;
12039 funexp = XEXP (DECL_RTL (function), 0);
12041 if (flag_delayed_branch)
12043 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12044 insn = emit_call_insn (gen_sibcall (funexp));
12045 SIBLING_CALL_P (insn) = 1;
12047 else
12049 /* The hoops we have to jump through in order to generate a sibcall
12050 without using delay slots... */
12051 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12053 if (flag_pic)
12055 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12056 start_sequence ();
12057 load_got_register (); /* clobbers %o7 */
12058 scratch = sparc_legitimize_pic_address (funexp, scratch);
12059 seq = get_insns ();
12060 end_sequence ();
12061 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12063 else if (TARGET_ARCH32)
12065 emit_insn (gen_rtx_SET (scratch,
12066 gen_rtx_HIGH (SImode, funexp)));
12067 emit_insn (gen_rtx_SET (scratch,
12068 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12070 else /* TARGET_ARCH64 */
12072 switch (sparc_cmodel)
12074 case CM_MEDLOW:
12075 case CM_MEDMID:
12076 /* The destination can serve as a temporary. */
12077 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12078 break;
12080 case CM_MEDANY:
12081 case CM_EMBMEDANY:
12082 /* The destination cannot serve as a temporary. */
12083 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12084 start_sequence ();
12085 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12086 seq = get_insns ();
12087 end_sequence ();
12088 emit_and_preserve (seq, spill_reg, 0);
12089 break;
12091 default:
12092 gcc_unreachable ();
12096 emit_jump_insn (gen_indirect_jump (scratch));
12099 emit_barrier ();
12101 /* Run just enough of rest_of_compilation to get the insns emitted.
12102 There's not really enough bulk here to make other passes such as
12103 instruction scheduling worth while. Note that use_thunk calls
12104 assemble_start_function and assemble_end_function. */
12105 insn = get_insns ();
12106 shorten_branches (insn);
12107 final_start_function (insn, file, 1);
12108 final (insn, file, 1);
12109 final_end_function ();
12111 reload_completed = 0;
12112 epilogue_completed = 0;
12115 /* Return true if sparc_output_mi_thunk would be able to output the
12116 assembler code for the thunk function specified by the arguments
12117 it is passed, and false otherwise. */
12118 static bool
12119 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12120 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12121 HOST_WIDE_INT vcall_offset,
12122 const_tree function ATTRIBUTE_UNUSED)
12124 /* Bound the loop used in the default method above. */
12125 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12128 /* How to allocate a 'struct machine_function'. */
12130 static struct machine_function *
12131 sparc_init_machine_status (void)
12133 return ggc_cleared_alloc<machine_function> ();
12136 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12137 We need to emit DTP-relative relocations. */
12139 static void
12140 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12142 switch (size)
12144 case 4:
12145 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12146 break;
12147 case 8:
12148 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12149 break;
12150 default:
12151 gcc_unreachable ();
12153 output_addr_const (file, x);
12154 fputs (")", file);
12157 /* Do whatever processing is required at the end of a file. */
12159 static void
12160 sparc_file_end (void)
12162 /* If we need to emit the special GOT helper function, do so now. */
12163 if (got_helper_rtx)
12165 const char *name = XSTR (got_helper_rtx, 0);
12166 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
12167 #ifdef DWARF2_UNWIND_INFO
12168 bool do_cfi;
12169 #endif
12171 if (USE_HIDDEN_LINKONCE)
12173 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12174 get_identifier (name),
12175 build_function_type_list (void_type_node,
12176 NULL_TREE));
12177 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12178 NULL_TREE, void_type_node);
12179 TREE_PUBLIC (decl) = 1;
12180 TREE_STATIC (decl) = 1;
12181 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12182 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12183 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12184 resolve_unique_section (decl, 0, flag_function_sections);
12185 allocate_struct_function (decl, true);
12186 cfun->is_thunk = 1;
12187 current_function_decl = decl;
12188 init_varasm_status ();
12189 assemble_start_function (decl, name);
12191 else
12193 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12194 switch_to_section (text_section);
12195 if (align > 0)
12196 ASM_OUTPUT_ALIGN (asm_out_file, align);
12197 ASM_OUTPUT_LABEL (asm_out_file, name);
12200 #ifdef DWARF2_UNWIND_INFO
12201 do_cfi = dwarf2out_do_cfi_asm ();
12202 if (do_cfi)
12203 fprintf (asm_out_file, "\t.cfi_startproc\n");
12204 #endif
12205 if (flag_delayed_branch)
12206 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
12207 reg_name, reg_name);
12208 else
12209 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
12210 reg_name, reg_name);
12211 #ifdef DWARF2_UNWIND_INFO
12212 if (do_cfi)
12213 fprintf (asm_out_file, "\t.cfi_endproc\n");
12214 #endif
12217 if (NEED_INDICATE_EXEC_STACK)
12218 file_end_indicate_exec_stack ();
12220 #ifdef TARGET_SOLARIS
12221 solaris_file_end ();
12222 #endif
12225 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12226 /* Implement TARGET_MANGLE_TYPE. */
12228 static const char *
12229 sparc_mangle_type (const_tree type)
12231 if (TARGET_ARCH32
12232 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12233 && TARGET_LONG_DOUBLE_128)
12234 return "g";
12236 /* For all other types, use normal C++ mangling. */
12237 return NULL;
12239 #endif
12241 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12242 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12243 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12245 void
12246 sparc_emit_membar_for_model (enum memmodel model,
12247 int load_store, int before_after)
12249 /* Bits for the MEMBAR mmask field. */
12250 const int LoadLoad = 1;
12251 const int StoreLoad = 2;
12252 const int LoadStore = 4;
12253 const int StoreStore = 8;
12255 int mm = 0, implied = 0;
12257 switch (sparc_memory_model)
12259 case SMM_SC:
12260 /* Sequential Consistency. All memory transactions are immediately
12261 visible in sequential execution order. No barriers needed. */
12262 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12263 break;
12265 case SMM_TSO:
12266 /* Total Store Ordering: all memory transactions with store semantics
12267 are followed by an implied StoreStore. */
12268 implied |= StoreStore;
12270 /* If we're not looking for a raw barrer (before+after), then atomic
12271 operations get the benefit of being both load and store. */
12272 if (load_store == 3 && before_after == 1)
12273 implied |= StoreLoad;
12274 /* FALLTHRU */
12276 case SMM_PSO:
12277 /* Partial Store Ordering: all memory transactions with load semantics
12278 are followed by an implied LoadLoad | LoadStore. */
12279 implied |= LoadLoad | LoadStore;
12281 /* If we're not looking for a raw barrer (before+after), then atomic
12282 operations get the benefit of being both load and store. */
12283 if (load_store == 3 && before_after == 2)
12284 implied |= StoreLoad | StoreStore;
12285 /* FALLTHRU */
12287 case SMM_RMO:
12288 /* Relaxed Memory Ordering: no implicit bits. */
12289 break;
12291 default:
12292 gcc_unreachable ();
12295 if (before_after & 1)
12297 if (is_mm_release (model) || is_mm_acq_rel (model)
12298 || is_mm_seq_cst (model))
12300 if (load_store & 1)
12301 mm |= LoadLoad | StoreLoad;
12302 if (load_store & 2)
12303 mm |= LoadStore | StoreStore;
12306 if (before_after & 2)
12308 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12309 || is_mm_seq_cst (model))
12311 if (load_store & 1)
12312 mm |= LoadLoad | LoadStore;
12313 if (load_store & 2)
12314 mm |= StoreLoad | StoreStore;
12318 /* Remove the bits implied by the system memory model. */
12319 mm &= ~implied;
12321 /* For raw barriers (before+after), always emit a barrier.
12322 This will become a compile-time barrier if needed. */
12323 if (mm || before_after == 3)
12324 emit_insn (gen_membar (GEN_INT (mm)));
12327 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12328 compare and swap on the word containing the byte or half-word. */
12330 static void
12331 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12332 rtx oldval, rtx newval)
12334 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12335 rtx addr = gen_reg_rtx (Pmode);
12336 rtx off = gen_reg_rtx (SImode);
12337 rtx oldv = gen_reg_rtx (SImode);
12338 rtx newv = gen_reg_rtx (SImode);
12339 rtx oldvalue = gen_reg_rtx (SImode);
12340 rtx newvalue = gen_reg_rtx (SImode);
12341 rtx res = gen_reg_rtx (SImode);
12342 rtx resv = gen_reg_rtx (SImode);
12343 rtx memsi, val, mask, cc;
12345 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12347 if (Pmode != SImode)
12348 addr1 = gen_lowpart (SImode, addr1);
12349 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12351 memsi = gen_rtx_MEM (SImode, addr);
12352 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12353 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12355 val = copy_to_reg (memsi);
12357 emit_insn (gen_rtx_SET (off,
12358 gen_rtx_XOR (SImode, off,
12359 GEN_INT (GET_MODE (mem) == QImode
12360 ? 3 : 2))));
12362 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12364 if (GET_MODE (mem) == QImode)
12365 mask = force_reg (SImode, GEN_INT (0xff));
12366 else
12367 mask = force_reg (SImode, GEN_INT (0xffff));
12369 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12371 emit_insn (gen_rtx_SET (val,
12372 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12373 val)));
12375 oldval = gen_lowpart (SImode, oldval);
12376 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12378 newval = gen_lowpart_common (SImode, newval);
12379 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12381 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12383 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12385 rtx_code_label *end_label = gen_label_rtx ();
12386 rtx_code_label *loop_label = gen_label_rtx ();
12387 emit_label (loop_label);
12389 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12391 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12393 emit_move_insn (bool_result, const1_rtx);
12395 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12397 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12399 emit_insn (gen_rtx_SET (resv,
12400 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12401 res)));
12403 emit_move_insn (bool_result, const0_rtx);
12405 cc = gen_compare_reg_1 (NE, resv, val);
12406 emit_insn (gen_rtx_SET (val, resv));
12408 /* Use cbranchcc4 to separate the compare and branch! */
12409 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12410 cc, const0_rtx, loop_label));
12412 emit_label (end_label);
12414 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12416 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12418 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12421 /* Expand code to perform a compare-and-swap. */
12423 void
12424 sparc_expand_compare_and_swap (rtx operands[])
12426 rtx bval, retval, mem, oldval, newval;
12427 machine_mode mode;
12428 enum memmodel model;
12430 bval = operands[0];
12431 retval = operands[1];
12432 mem = operands[2];
12433 oldval = operands[3];
12434 newval = operands[4];
12435 model = (enum memmodel) INTVAL (operands[6]);
12436 mode = GET_MODE (mem);
12438 sparc_emit_membar_for_model (model, 3, 1);
12440 if (reg_overlap_mentioned_p (retval, oldval))
12441 oldval = copy_to_reg (oldval);
12443 if (mode == QImode || mode == HImode)
12444 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12445 else
12447 rtx (*gen) (rtx, rtx, rtx, rtx);
12448 rtx x;
12450 if (mode == SImode)
12451 gen = gen_atomic_compare_and_swapsi_1;
12452 else
12453 gen = gen_atomic_compare_and_swapdi_1;
12454 emit_insn (gen (retval, mem, oldval, newval));
12456 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12457 if (x != bval)
12458 convert_move (bval, x, 1);
12461 sparc_emit_membar_for_model (model, 3, 2);
12464 void
12465 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12467 rtx t_1, t_2, t_3;
12469 sel = gen_lowpart (DImode, sel);
12470 switch (vmode)
12472 case E_V2SImode:
12473 /* inp = xxxxxxxAxxxxxxxB */
12474 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12475 NULL_RTX, 1, OPTAB_DIRECT);
12476 /* t_1 = ....xxxxxxxAxxx. */
12477 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12478 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12479 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12480 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12481 /* sel = .......B */
12482 /* t_1 = ...A.... */
12483 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12484 /* sel = ...A...B */
12485 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12486 /* sel = AAAABBBB * 4 */
12487 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12488 /* sel = { A*4, A*4+1, A*4+2, ... } */
12489 break;
12491 case E_V4HImode:
12492 /* inp = xxxAxxxBxxxCxxxD */
12493 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12494 NULL_RTX, 1, OPTAB_DIRECT);
12495 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12496 NULL_RTX, 1, OPTAB_DIRECT);
12497 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12498 NULL_RTX, 1, OPTAB_DIRECT);
12499 /* t_1 = ..xxxAxxxBxxxCxx */
12500 /* t_2 = ....xxxAxxxBxxxC */
12501 /* t_3 = ......xxxAxxxBxx */
12502 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12503 GEN_INT (0x07),
12504 NULL_RTX, 1, OPTAB_DIRECT);
12505 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12506 GEN_INT (0x0700),
12507 NULL_RTX, 1, OPTAB_DIRECT);
12508 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12509 GEN_INT (0x070000),
12510 NULL_RTX, 1, OPTAB_DIRECT);
12511 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12512 GEN_INT (0x07000000),
12513 NULL_RTX, 1, OPTAB_DIRECT);
12514 /* sel = .......D */
12515 /* t_1 = .....C.. */
12516 /* t_2 = ...B.... */
12517 /* t_3 = .A...... */
12518 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12519 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12520 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12521 /* sel = .A.B.C.D */
12522 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12523 /* sel = AABBCCDD * 2 */
12524 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12525 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12526 break;
12528 case E_V8QImode:
12529 /* input = xAxBxCxDxExFxGxH */
12530 sel = expand_simple_binop (DImode, AND, sel,
12531 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12532 | 0x0f0f0f0f),
12533 NULL_RTX, 1, OPTAB_DIRECT);
12534 /* sel = .A.B.C.D.E.F.G.H */
12535 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12536 NULL_RTX, 1, OPTAB_DIRECT);
12537 /* t_1 = ..A.B.C.D.E.F.G. */
12538 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12539 NULL_RTX, 1, OPTAB_DIRECT);
12540 /* sel = .AABBCCDDEEFFGGH */
12541 sel = expand_simple_binop (DImode, AND, sel,
12542 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12543 | 0xff00ff),
12544 NULL_RTX, 1, OPTAB_DIRECT);
12545 /* sel = ..AB..CD..EF..GH */
12546 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12547 NULL_RTX, 1, OPTAB_DIRECT);
12548 /* t_1 = ....AB..CD..EF.. */
12549 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12550 NULL_RTX, 1, OPTAB_DIRECT);
12551 /* sel = ..ABABCDCDEFEFGH */
12552 sel = expand_simple_binop (DImode, AND, sel,
12553 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12554 NULL_RTX, 1, OPTAB_DIRECT);
12555 /* sel = ....ABCD....EFGH */
12556 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12557 NULL_RTX, 1, OPTAB_DIRECT);
12558 /* t_1 = ........ABCD.... */
12559 sel = gen_lowpart (SImode, sel);
12560 t_1 = gen_lowpart (SImode, t_1);
12561 break;
12563 default:
12564 gcc_unreachable ();
12567 /* Always perform the final addition/merge within the bmask insn. */
12568 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12571 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12573 static bool
12574 sparc_frame_pointer_required (void)
12576 /* If the stack pointer is dynamically modified in the function, it cannot
12577 serve as the frame pointer. */
12578 if (cfun->calls_alloca)
12579 return true;
12581 /* If the function receives nonlocal gotos, it needs to save the frame
12582 pointer in the nonlocal_goto_save_area object. */
12583 if (cfun->has_nonlocal_label)
12584 return true;
12586 /* In flat mode, that's it. */
12587 if (TARGET_FLAT)
12588 return false;
12590 /* Otherwise, the frame pointer is required if the function isn't leaf, but
12591 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
12592 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
12595 /* The way this is structured, we can't eliminate SFP in favor of SP
12596 if the frame pointer is required: we want to use the SFP->HFP elimination
12597 in that case. But the test in update_eliminables doesn't know we are
12598 assuming below that we only do the former elimination. */
12600 static bool
12601 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12603 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12606 /* Return the hard frame pointer directly to bypass the stack bias. */
12608 static rtx
12609 sparc_builtin_setjmp_frame_value (void)
12611 return hard_frame_pointer_rtx;
12614 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12615 they won't be allocated. */
12617 static void
12618 sparc_conditional_register_usage (void)
12620 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12622 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12623 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12625 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12626 /* then honor it. */
12627 if (TARGET_ARCH32 && fixed_regs[5])
12628 fixed_regs[5] = 1;
12629 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12630 fixed_regs[5] = 0;
12631 if (! TARGET_V9)
12633 int regno;
12634 for (regno = SPARC_FIRST_V9_FP_REG;
12635 regno <= SPARC_LAST_V9_FP_REG;
12636 regno++)
12637 fixed_regs[regno] = 1;
12638 /* %fcc0 is used by v8 and v9. */
12639 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12640 regno <= SPARC_LAST_V9_FCC_REG;
12641 regno++)
12642 fixed_regs[regno] = 1;
12644 if (! TARGET_FPU)
12646 int regno;
12647 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12648 fixed_regs[regno] = 1;
12650 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12651 /* then honor it. Likewise with g3 and g4. */
12652 if (fixed_regs[2] == 2)
12653 fixed_regs[2] = ! TARGET_APP_REGS;
12654 if (fixed_regs[3] == 2)
12655 fixed_regs[3] = ! TARGET_APP_REGS;
12656 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12657 fixed_regs[4] = ! TARGET_APP_REGS;
12658 else if (TARGET_CM_EMBMEDANY)
12659 fixed_regs[4] = 1;
12660 else if (fixed_regs[4] == 2)
12661 fixed_regs[4] = 0;
12662 if (TARGET_FLAT)
12664 int regno;
12665 /* Disable leaf functions. */
12666 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12667 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12668 leaf_reg_remap [regno] = regno;
12670 if (TARGET_VIS)
12671 global_regs[SPARC_GSR_REG] = 1;
12674 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12676 - We can't load constants into FP registers.
12677 - We can't load FP constants into integer registers when soft-float,
12678 because there is no soft-float pattern with a r/F constraint.
12679 - We can't load FP constants into integer registers for TFmode unless
12680 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12681 - Try and reload integer constants (symbolic or otherwise) back into
12682 registers directly, rather than having them dumped to memory. */
12684 static reg_class_t
12685 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12687 machine_mode mode = GET_MODE (x);
12688 if (CONSTANT_P (x))
12690 if (FP_REG_CLASS_P (rclass)
12691 || rclass == GENERAL_OR_FP_REGS
12692 || rclass == GENERAL_OR_EXTRA_FP_REGS
12693 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12694 || (mode == TFmode && ! const_zero_operand (x, mode)))
12695 return NO_REGS;
12697 if (GET_MODE_CLASS (mode) == MODE_INT)
12698 return GENERAL_REGS;
12700 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12702 if (! FP_REG_CLASS_P (rclass)
12703 || !(const_zero_operand (x, mode)
12704 || const_all_ones_operand (x, mode)))
12705 return NO_REGS;
12709 if (TARGET_VIS3
12710 && ! TARGET_ARCH64
12711 && (rclass == EXTRA_FP_REGS
12712 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12714 int regno = true_regnum (x);
12716 if (SPARC_INT_REG_P (regno))
12717 return (rclass == EXTRA_FP_REGS
12718 ? FP_REGS : GENERAL_OR_FP_REGS);
12721 return rclass;
12724 /* Return true if we use LRA instead of reload pass. */
12726 static bool
12727 sparc_lra_p (void)
12729 return TARGET_LRA;
12732 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12733 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12735 const char *
12736 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12738 char mulstr[32];
12740 gcc_assert (! TARGET_ARCH64);
12742 if (sparc_check_64 (operands[1], insn) <= 0)
12743 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12744 if (which_alternative == 1)
12745 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12746 if (GET_CODE (operands[2]) == CONST_INT)
12748 if (which_alternative == 1)
12750 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12751 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12752 output_asm_insn (mulstr, operands);
12753 return "srlx\t%L0, 32, %H0";
12755 else
12757 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12758 output_asm_insn ("or\t%L1, %3, %3", operands);
12759 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12760 output_asm_insn (mulstr, operands);
12761 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12762 return "mov\t%3, %L0";
12765 else if (rtx_equal_p (operands[1], operands[2]))
12767 if (which_alternative == 1)
12769 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12770 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12771 output_asm_insn (mulstr, operands);
12772 return "srlx\t%L0, 32, %H0";
12774 else
12776 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12777 output_asm_insn ("or\t%L1, %3, %3", operands);
12778 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12779 output_asm_insn (mulstr, operands);
12780 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12781 return "mov\t%3, %L0";
12784 if (sparc_check_64 (operands[2], insn) <= 0)
12785 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12786 if (which_alternative == 1)
12788 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12789 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12790 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12791 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12792 output_asm_insn (mulstr, operands);
12793 return "srlx\t%L0, 32, %H0";
12795 else
12797 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12798 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12799 output_asm_insn ("or\t%L1, %3, %3", operands);
12800 output_asm_insn ("or\t%L2, %4, %4", operands);
12801 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12802 output_asm_insn (mulstr, operands);
12803 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12804 return "mov\t%3, %L0";
12808 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12809 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12810 and INNER_MODE are the modes describing TARGET. */
12812 static void
12813 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12814 machine_mode inner_mode)
12816 rtx t1, final_insn, sel;
12817 int bmask;
12819 t1 = gen_reg_rtx (mode);
12821 elt = convert_modes (SImode, inner_mode, elt, true);
12822 emit_move_insn (gen_lowpart(SImode, t1), elt);
12824 switch (mode)
12826 case E_V2SImode:
12827 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12828 bmask = 0x45674567;
12829 break;
12830 case E_V4HImode:
12831 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12832 bmask = 0x67676767;
12833 break;
12834 case E_V8QImode:
12835 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12836 bmask = 0x77777777;
12837 break;
12838 default:
12839 gcc_unreachable ();
12842 sel = force_reg (SImode, GEN_INT (bmask));
12843 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
12844 emit_insn (final_insn);
12847 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12848 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12850 static void
12851 vector_init_fpmerge (rtx target, rtx elt)
12853 rtx t1, t2, t2_low, t3, t3_low;
12855 t1 = gen_reg_rtx (V4QImode);
12856 elt = convert_modes (SImode, QImode, elt, true);
12857 emit_move_insn (gen_lowpart (SImode, t1), elt);
12859 t2 = gen_reg_rtx (V8QImode);
12860 t2_low = gen_lowpart (V4QImode, t2);
12861 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12863 t3 = gen_reg_rtx (V8QImode);
12864 t3_low = gen_lowpart (V4QImode, t3);
12865 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12867 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12870 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12871 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12873 static void
12874 vector_init_faligndata (rtx target, rtx elt)
12876 rtx t1 = gen_reg_rtx (V4HImode);
12877 int i;
12879 elt = convert_modes (SImode, HImode, elt, true);
12880 emit_move_insn (gen_lowpart (SImode, t1), elt);
12882 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12883 force_reg (SImode, GEN_INT (6)),
12884 const0_rtx));
12886 for (i = 0; i < 4; i++)
12887 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12890 /* Emit code to initialize TARGET to values for individual fields VALS. */
12892 void
12893 sparc_expand_vector_init (rtx target, rtx vals)
12895 const machine_mode mode = GET_MODE (target);
12896 const machine_mode inner_mode = GET_MODE_INNER (mode);
12897 const int n_elts = GET_MODE_NUNITS (mode);
12898 int i, n_var = 0;
12899 bool all_same = true;
12900 rtx mem;
12902 for (i = 0; i < n_elts; i++)
12904 rtx x = XVECEXP (vals, 0, i);
12905 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
12906 n_var++;
12908 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12909 all_same = false;
12912 if (n_var == 0)
12914 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12915 return;
12918 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12920 if (GET_MODE_SIZE (inner_mode) == 4)
12922 emit_move_insn (gen_lowpart (SImode, target),
12923 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12924 return;
12926 else if (GET_MODE_SIZE (inner_mode) == 8)
12928 emit_move_insn (gen_lowpart (DImode, target),
12929 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12930 return;
12933 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12934 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12936 emit_move_insn (gen_highpart (word_mode, target),
12937 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12938 emit_move_insn (gen_lowpart (word_mode, target),
12939 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12940 return;
12943 if (all_same && GET_MODE_SIZE (mode) == 8)
12945 if (TARGET_VIS2)
12947 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12948 return;
12950 if (mode == V8QImode)
12952 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12953 return;
12955 if (mode == V4HImode)
12957 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12958 return;
12962 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12963 for (i = 0; i < n_elts; i++)
12964 emit_move_insn (adjust_address_nv (mem, inner_mode,
12965 i * GET_MODE_SIZE (inner_mode)),
12966 XVECEXP (vals, 0, i));
12967 emit_move_insn (target, mem);
12970 /* Implement TARGET_SECONDARY_RELOAD. */
12972 static reg_class_t
12973 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12974 machine_mode mode, secondary_reload_info *sri)
12976 enum reg_class rclass = (enum reg_class) rclass_i;
12978 sri->icode = CODE_FOR_nothing;
12979 sri->extra_cost = 0;
12981 /* We need a temporary when loading/storing a HImode/QImode value
12982 between memory and the FPU registers. This can happen when combine puts
12983 a paradoxical subreg in a float/fix conversion insn. */
12984 if (FP_REG_CLASS_P (rclass)
12985 && (mode == HImode || mode == QImode)
12986 && (GET_CODE (x) == MEM
12987 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12988 && true_regnum (x) == -1)))
12989 return GENERAL_REGS;
12991 /* On 32-bit we need a temporary when loading/storing a DFmode value
12992 between unaligned memory and the upper FPU registers. */
12993 if (TARGET_ARCH32
12994 && rclass == EXTRA_FP_REGS
12995 && mode == DFmode
12996 && GET_CODE (x) == MEM
12997 && ! mem_min_alignment (x, 8))
12998 return FP_REGS;
13000 if (((TARGET_CM_MEDANY
13001 && symbolic_operand (x, mode))
13002 || (TARGET_CM_EMBMEDANY
13003 && text_segment_operand (x, mode)))
13004 && ! flag_pic)
13006 if (in_p)
13007 sri->icode = direct_optab_handler (reload_in_optab, mode);
13008 else
13009 sri->icode = direct_optab_handler (reload_out_optab, mode);
13010 return NO_REGS;
13013 if (TARGET_VIS3 && TARGET_ARCH32)
13015 int regno = true_regnum (x);
13017 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13018 to move 8-byte values in 4-byte pieces. This only works via
13019 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13020 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13021 an FP_REGS intermediate move. */
13022 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13023 || ((general_or_i64_p (rclass)
13024 || rclass == GENERAL_OR_FP_REGS)
13025 && SPARC_FP_REG_P (regno)))
13027 sri->extra_cost = 2;
13028 return FP_REGS;
13032 return NO_REGS;
13035 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13036 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13038 bool
13039 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13041 enum rtx_code rc = GET_CODE (operands[1]);
13042 machine_mode cmp_mode;
13043 rtx cc_reg, dst, cmp;
13045 cmp = operands[1];
13046 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13047 return false;
13049 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13050 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13052 cmp_mode = GET_MODE (XEXP (cmp, 0));
13053 rc = GET_CODE (cmp);
13055 dst = operands[0];
13056 if (! rtx_equal_p (operands[2], dst)
13057 && ! rtx_equal_p (operands[3], dst))
13059 if (reg_overlap_mentioned_p (dst, cmp))
13060 dst = gen_reg_rtx (mode);
13062 emit_move_insn (dst, operands[3]);
13064 else if (operands[2] == dst)
13066 operands[2] = operands[3];
13068 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13069 rc = reverse_condition_maybe_unordered (rc);
13070 else
13071 rc = reverse_condition (rc);
13074 if (XEXP (cmp, 1) == const0_rtx
13075 && GET_CODE (XEXP (cmp, 0)) == REG
13076 && cmp_mode == DImode
13077 && v9_regcmp_p (rc))
13078 cc_reg = XEXP (cmp, 0);
13079 else
13080 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13082 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13084 emit_insn (gen_rtx_SET (dst,
13085 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13087 if (dst != operands[0])
13088 emit_move_insn (operands[0], dst);
13090 return true;
13093 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13094 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13095 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13096 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13097 code to be used for the condition mask. */
13099 void
13100 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13102 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13103 enum rtx_code code = GET_CODE (operands[3]);
13105 mask = gen_reg_rtx (Pmode);
13106 cop0 = operands[4];
13107 cop1 = operands[5];
13108 if (code == LT || code == GE)
13110 rtx t;
13112 code = swap_condition (code);
13113 t = cop0; cop0 = cop1; cop1 = t;
13116 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13118 fcmp = gen_rtx_UNSPEC (Pmode,
13119 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13120 fcode);
13122 cmask = gen_rtx_UNSPEC (DImode,
13123 gen_rtvec (2, mask, gsr),
13124 ccode);
13126 bshuf = gen_rtx_UNSPEC (mode,
13127 gen_rtvec (3, operands[1], operands[2], gsr),
13128 UNSPEC_BSHUFFLE);
13130 emit_insn (gen_rtx_SET (mask, fcmp));
13131 emit_insn (gen_rtx_SET (gsr, cmask));
13133 emit_insn (gen_rtx_SET (operands[0], bshuf));
13136 /* On sparc, any mode which naturally allocates into the float
13137 registers should return 4 here. */
13139 unsigned int
13140 sparc_regmode_natural_size (machine_mode mode)
13142 int size = UNITS_PER_WORD;
13144 if (TARGET_ARCH64)
13146 enum mode_class mclass = GET_MODE_CLASS (mode);
13148 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
13149 size = 4;
13152 return size;
13155 /* Implement TARGET_HARD_REGNO_NREGS.
13157 On SPARC, ordinary registers hold 32 bits worth; this means both
13158 integer and floating point registers. On v9, integer regs hold 64
13159 bits worth; floating point regs hold 32 bits worth (this includes the
13160 new fp regs as even the odd ones are included in the hard register
13161 count). */
13163 static unsigned int
13164 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13166 if (regno == SPARC_GSR_REG)
13167 return 1;
13168 if (TARGET_ARCH64)
13170 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13171 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13172 return CEIL (GET_MODE_SIZE (mode), 4);
13174 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13177 /* Implement TARGET_HARD_REGNO_MODE_OK.
13179 ??? Because of the funny way we pass parameters we should allow certain
13180 ??? types of float/complex values to be in integer registers during
13181 ??? RTL generation. This only matters on arch32. */
13183 static bool
13184 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13186 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13189 /* Implement TARGET_MODES_TIEABLE_P.
13191 For V9 we have to deal with the fact that only the lower 32 floating
13192 point registers are 32-bit addressable. */
13194 static bool
13195 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13197 enum mode_class mclass1, mclass2;
13198 unsigned short size1, size2;
13200 if (mode1 == mode2)
13201 return true;
13203 mclass1 = GET_MODE_CLASS (mode1);
13204 mclass2 = GET_MODE_CLASS (mode2);
13205 if (mclass1 != mclass2)
13206 return false;
13208 if (! TARGET_V9)
13209 return true;
13211 /* Classes are the same and we are V9 so we have to deal with upper
13212 vs. lower floating point registers. If one of the modes is a
13213 4-byte mode, and the other is not, we have to mark them as not
13214 tieable because only the lower 32 floating point register are
13215 addressable 32-bits at a time.
13217 We can't just test explicitly for SFmode, otherwise we won't
13218 cover the vector mode cases properly. */
13220 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13221 return true;
13223 size1 = GET_MODE_SIZE (mode1);
13224 size2 = GET_MODE_SIZE (mode2);
13225 if ((size1 > 4 && size2 == 4)
13226 || (size2 > 4 && size1 == 4))
13227 return false;
13229 return true;
13232 /* Implement TARGET_CSTORE_MODE. */
13234 static scalar_int_mode
13235 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13237 return (TARGET_ARCH64 ? DImode : SImode);
13240 /* Return the compound expression made of T1 and T2. */
13242 static inline tree
13243 compound_expr (tree t1, tree t2)
13245 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13248 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13250 static void
13251 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13253 if (!TARGET_FPU)
13254 return;
13256 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13257 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13259 /* We generate the equivalent of feholdexcept (&fenv_var):
13261 unsigned int fenv_var;
13262 __builtin_store_fsr (&fenv_var);
13264 unsigned int tmp1_var;
13265 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13267 __builtin_load_fsr (&tmp1_var); */
13269 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13270 TREE_ADDRESSABLE (fenv_var) = 1;
13271 tree fenv_addr = build_fold_addr_expr (fenv_var);
13272 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13273 tree hold_stfsr
13274 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13275 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13277 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13278 TREE_ADDRESSABLE (tmp1_var) = 1;
13279 tree masked_fenv_var
13280 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13281 build_int_cst (unsigned_type_node,
13282 ~(accrued_exception_mask | trap_enable_mask)));
13283 tree hold_mask
13284 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13285 NULL_TREE, NULL_TREE);
13287 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13288 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13289 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13291 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13293 /* We reload the value of tmp1_var to clear the exceptions:
13295 __builtin_load_fsr (&tmp1_var); */
13297 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13299 /* We generate the equivalent of feupdateenv (&fenv_var):
13301 unsigned int tmp2_var;
13302 __builtin_store_fsr (&tmp2_var);
13304 __builtin_load_fsr (&fenv_var);
13306 if (SPARC_LOW_FE_EXCEPT_VALUES)
13307 tmp2_var >>= 5;
13308 __atomic_feraiseexcept ((int) tmp2_var); */
13310 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13311 TREE_ADDRESSABLE (tmp2_var) = 1;
13312 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13313 tree update_stfsr
13314 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13315 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13317 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13319 tree atomic_feraiseexcept
13320 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13321 tree update_call
13322 = build_call_expr (atomic_feraiseexcept, 1,
13323 fold_convert (integer_type_node, tmp2_var));
13325 if (SPARC_LOW_FE_EXCEPT_VALUES)
13327 tree shifted_tmp2_var
13328 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13329 build_int_cst (unsigned_type_node, 5));
13330 tree update_shift
13331 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13332 update_call = compound_expr (update_shift, update_call);
13335 *update
13336 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13339 #include "gt-sparc.h"